Merge branch 'master' into utf8mb4-in-mysql-client

This commit is contained in:
Alexey Milovidov 2021-01-31 01:37:27 +03:00
commit 7b8589dbde
128 changed files with 4407 additions and 7569 deletions

2
.gitmodules vendored
View File

@ -184,7 +184,7 @@
url = https://github.com/ClickHouse-Extras/krb5
[submodule "contrib/cyrus-sasl"]
path = contrib/cyrus-sasl
url = https://github.com/cyrusimap/cyrus-sasl
url = https://github.com/ClickHouse-Extras/cyrus-sasl
branch = cyrus-sasl-2.1
[submodule "contrib/croaring"]
path = contrib/croaring

View File

@ -84,10 +84,12 @@
# define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined")))
# define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address")))
# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined")))
#else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it.
# define NO_SANITIZE_UNDEFINED
# define NO_SANITIZE_ADDRESS
# define NO_SANITIZE_THREAD
# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED ALWAYS_INLINE
#endif
/// A template function for suppressing warnings about unused variables or function results.

View File

@ -31,7 +31,7 @@ static void *volatile vdso_func = (void *)getcpu_init;
int sched_getcpu(void)
{
int r;
unsigned cpu;
unsigned cpu = 0;
#ifdef VDSO_GETCPU_SYM
getcpu_f f = (getcpu_f)vdso_func;

View File

@ -62,6 +62,7 @@ RUN python3 -m pip install \
avro \
cassandra-driver \
confluent-kafka \
dict2xml \
dicttoxml \
docker \
docker-compose==1.22.0 \

View File

@ -53,10 +53,12 @@ function run_tests()
if [ "$NUM_TRIES" -gt "1" ]; then
ADDITIONAL_OPTIONS+=('--skip')
ADDITIONAL_OPTIONS+=('00000_no_tests_to_skip')
ADDITIONAL_OPTIONS+=('--jobs')
ADDITIONAL_OPTIONS+=('4')
fi
clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
--test-runs "$NUM_TRIES" --jobs 4 \
--test-runs "$NUM_TRIES" \
"$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt

View File

@ -7,8 +7,6 @@ toc_title: EmbeddedRocksDB
This engine allows integrating ClickHouse with [rocksdb](http://rocksdb.org/).
`EmbeddedRocksDB` lets you:
## Creating a Table {#table_engine-EmbeddedRocksDB-creating-a-table}
``` sql
@ -23,6 +21,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Required parameters:
- `primary_key_name` any column name in the column list.
- `primary key` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a `rocksdb key`.
- columns other than the primary key will be serialized in binary as `rocksdb` value in corresponding order.
- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from `rocksdb`.
Example:
@ -38,8 +39,4 @@ ENGINE = EmbeddedRocksDB
PRIMARY KEY key
```
## Description {#description}
- `primary key` must be specified, it only supports one column in primary key. The primary key will serialized in binary as rocksdb key.
- columns other than the primary key will be serialized in binary as rocksdb value in corresponding order.
- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from rocksdb.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/embedded-rocksdb/) <!--hide-->

View File

@ -428,7 +428,7 @@ Possible values:
- `'basic'` — Use basic parser.
ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `'2019-08-20 10:18:56'` or `2019-08-20`.
ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `2019-08-20 10:18:56` or `2019-08-20`.
Default value: `'basic'`.
@ -443,19 +443,19 @@ Allows choosing different output formats of the text representation of date and
Possible values:
- `'simple'` - Simple output format.
- `simple` - Simple output format.
Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `'2019-08-20 10:18:56'`. Calculation is performed according to the data type's time zone (if present) or server time zone.
Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `2019-08-20 10:18:56`. The calculation is performed according to the data type's time zone (if present) or server time zone.
- `'iso'` - ISO output format.
- `iso` - ISO output format.
Clickhouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `'2019-08-20T10:18:56Z'`. Note that output is in UTC (`Z` means UTC).
Clickhouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `2019-08-20T10:18:56Z`. Note that output is in UTC (`Z` means UTC).
- `'unix_timestamp'` - Unix timestamp output format.
- `unix_timestamp` - Unix timestamp output format.
Clickhouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `'1566285536'`.
Clickhouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `1566285536`.
Default value: `'simple'`.
Default value: `simple`.
See also:

View File

@ -241,7 +241,7 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN)
**Parameters**
- `window` — Length of the sliding window in seconds.
- `window` — Length of the sliding window. The unit of `window` depends on the timestamp itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`.
- `mode` - It is an optional argument.
- `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values.
- `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, its value cant exceed the Int64 maximum, which is 2^63 - 1).

View File

@ -11,7 +11,7 @@ Key length depends on encryption mode. It is 16, 24, and 32 bytes long for `-128
Initialization vector length is always 16 bytes (bytes in excess of 16 are ignored).
Note that these functions work slowly.
Note that these functions work slowly until ClickHouse 21.1.
## encrypt {#encrypt}
@ -41,7 +41,7 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad])
**Returned value**
- Ciphered String. [String](../../sql-reference/data-types/string.md#string).
- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
**Examples**
@ -52,57 +52,38 @@ Query:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
`comment` String,
`secret` String
)
ENGINE = Memory
```
Insert this data:
Insert some data (please avoid storing the keys/ivs in the database as this undermines the whole concept of encryption), also storing 'hints' is unsafe too and used only for illustrative purposes:
Query:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
INSERT INTO encryption_test VALUES('aes-256-cfb128 no IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212')),\
('aes-256-cfb128 no IV, different key', encrypt('aes-256-cfb128', 'Secret', 'keykeykeykeykeykeykeykeykeykeyke')),\
('aes-256-cfb128 with IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')),\
('aes-256-cbc no IV', encrypt('aes-256-cbc', 'Secret', '12345678910121314151617181920212'));
```
Example without `iv`:
Query:
``` sql
SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test;
SELECT comment, hex(secret) FROM encryption_test;
```
Result:
``` text
┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐
│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │
│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │
│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │
└─────────────┴──────────────────────────────────────────────────────────────────┘
```
Example with `iv`:
Query:
``` sql
SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
```
Result:
``` text
┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐
│ aes-256-ctr │ │
│ aes-256-ctr │ 7FB039F7 │
│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │
└─────────────┴───────────────────────────────────────────────┘
┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
│ aes-256-cfb128 no IV │ B4972BDC4459 │
│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │
│ aes-256-cfb128 with IV │ 5E6CB398F653 │
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
└─────────────────────────────────────┴──────────────────────────────────┘
```
Example with `-gcm`:
@ -110,40 +91,26 @@ Example with `-gcm`:
Query:
``` sql
SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
INSERT INTO encryption_test VALUES('aes-256-gcm', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')), \
('aes-256-gcm with AAD', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv', 'aad'));
SELECT comment, hex(secret) FROM encryption_test WHERE comment LIKE '%gcm%';
```
Result:
``` text
┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐
│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │
│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │
│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │
└─────────────┴────────────────────────────────────────────────────────────────────────┘
```
Example with `-gcm` mode and with `aad`:
Query:
``` sql
SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test;
```
Result:
``` text
┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐
│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │
│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │
│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │
└─────────────┴────────────────────────────────────────────────────────────────────────┘
┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
└──────────────────────┴──────────────────────────────────────────────┘
```
## aes_encrypt_mysql {#aes_encrypt_mysql}
Compatible with mysql encryption and can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
Compatible with mysql encryption and resulting ciphertext can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
Will produce same ciphertext as `encrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_encrypt_mysql` will stick to what MySQL's `aes_encrypt` does: 'fold' `key` and ignore excess bits of `IV`.
Supported encryption modes:
@ -156,7 +123,7 @@ Supported encryption modes:
**Syntax**
```sql
``` sql
aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
```
@ -164,78 +131,98 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string).
- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string).
- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Optinal, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string).
**Returned value**
- Ciphered String. [String](../../sql-reference/data-types/string.md#string).
- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
**Examples**
Create this table:
Given equal input `encrypt` and `aes_encrypt_mysql` produce the same ciphertext:
Query:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
SELECT encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') = aes_encrypt_mysql('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') AS ciphertexts_equal;
```
Insert this data:
Result:
Query:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
```
┌─ciphertexts_equal─┐
│ 1 │
└───────────────────┘
```
Example without `iv`:
But `encrypt` fails when `key` or `iv` is longer than expected:
Query:
``` sql
SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test;
SELECT encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123');
```
Result:
``` text
┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐
│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │
│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │
│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │
└─────────────┴──────────────────────────────────────────────────────────────────┘
Received exception from server (version 21.1.2):
Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123').
```
Example with `iv`:
While `aes_encrypt_mysql` produces MySQL-compatitalbe output:
Query:
``` sql
SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test;
SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123')) AS ciphertext;
```
Result:
```text
┌─ciphertext───┐
│ 24E9E4966469 │
└──────────────┘
```
Notice how supplying even longer `IV` produces the same result
Query:
``` sql
SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456')) AS ciphertext
```
Result:
``` text
┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐
│ aes-256-cfb128 │ │
│ aes-256-cfb128 │ 7FB039F7 │
│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │
└────────────────┴────────────────────────────────────────────────────────────┘
┌─ciphertext───┐
│ 24E9E4966469 │
└──────────────┘
```
Which is binary equal to what MySQL produces on same inputs:
``` sql
mysql> SET block_encryption_mode='aes-256-cfb128';
Query OK, 0 rows affected (0.00 sec)
mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
+------------------------+
| ciphertext |
+------------------------+
| 0x24E9E4966469 |
+------------------------+
1 row in set (0.00 sec)
```
## decrypt {#decrypt}
This function decrypts data using these modes:
This function decrypts ciphertext into a plaintext using these modes:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
@ -247,7 +234,7 @@ This function decrypts data using these modes:
**Syntax**
```sql
``` sql
decrypt('mode', 'ciphertext', 'key' [, iv, aad])
```
@ -265,51 +252,56 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad])
**Examples**
Create this table:
Re-using table from [encrypt](./encryption-functions.md#encrypt).
Query:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
```
Insert this data:
Query:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
```
Query:
``` sql
SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test;
SELECT comment, hex(secret) FROM encryption_test;
```
Result:
```text
┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐
│ aes-128-ecb │ │
│ aes-128-ecb │ text │
│ aes-128-ecb │ What Is ClickHouse? │
└─────────────┴─────────────────────────────────────────────────────────────────────┘
``` text
┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
└──────────────────────┴──────────────────────────────────────────────┘
┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
│ aes-256-cfb128 no IV │ B4972BDC4459 │
│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │
│ aes-256-cfb128 with IV │ 5E6CB398F653 │
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
└─────────────────────────────────────┴──────────────────────────────────┘
```
Now let's try to decrypt all that data.
Query:
``` sql
SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920212') as plaintext FROM encryption_test
```
Result:
``` text
┌─comment─────────────────────────────┬─plaintext─┐
│ aes-256-cfb128 no IV │ Secret │
│ aes-256-cfb128 no IV, different key │ <20>4<EFBFBD>
<20>
│ aes-256-cfb128 with IV │ <20><><EFBFBD>6<EFBFBD>~ │
│aes-256-cbc no IV │ <20>2*4<>h3c<33>4w<34><77>@
└─────────────────────────────────────┴───────────┘
```
Notice how only portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption.
## aes_decrypt_mysql {#aes_decrypt_mysql}
Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function.
Will produce same plaintext as `decrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_decrypt_mysql` will stick to what MySQL's `aes_decrypt` does: 'fold' `key` and ignore excess bits of `IV`.
Supported decryption modes:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
@ -321,7 +313,7 @@ Supported decryption modes:
**Syntax**
```sql
``` sql
aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
```
@ -338,44 +330,30 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
**Examples**
Create this table:
Query:
Let's decrypt data we've previously encrypted with MySQL:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
```
mysql> SET block_encryption_mode='aes-256-cfb128';
Query OK, 0 rows affected (0.00 sec)
Insert this data:
Query:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
+------------------------+
| ciphertext |
+------------------------+
| 0x24E9E4966469 |
+------------------------+
1 row in set (0.00 sec)
```
Query:
``` sql
SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test;
SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext
```
Result:
``` text
┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐
│ aes-128-cbc │ │
│ aes-128-cbc │ text │
│ aes-128-cbc │ What Is ClickHouse? │
└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘
┌─plaintext─┐
│ Secret │
└───────────┘
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/encryption_functions/) <!--hide-->

View File

@ -0,0 +1,44 @@
---
toc_priority: 6
toc_title: EmbeddedRocksDB
---
# Движок EmbeddedRocksDB {#EmbeddedRocksDB-engine}
Этот движок позволяет интегрировать ClickHouse с [rocksdb](http://rocksdb.org/).
## Создание таблицы {#table_engine-EmbeddedRocksDB-creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = EmbeddedRocksDB
PRIMARY KEY(primary_key_name);
```
Обязательные параметры:
- `primary_key_name` может быть любое имя столбца из списка столбцов.
- Указание первичного ключа `primary key` является обязательным. Он будет сериализован в двоичном формате как ключ `rocksdb`.
- Поддерживается только один столбец в первичном ключе.
- Столбцы, которые отличаются от первичного ключа, будут сериализованы в двоичном формате как значение `rockdb` в соответствующем порядке.
- Запросы с фильтрацией по ключу `equals` или `in` оптимизируются для поиска по нескольким ключам из `rocksdb`.
Пример:
``` sql
CREATE TABLE test
(
`key` String,
`v1` UInt32,
`v2` String,
`v3` Float32,
)
ENGINE = EmbeddedRocksDB
PRIMARY KEY key;
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/embedded-rocksdb/) <!--hide-->

View File

@ -406,21 +406,46 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
Возможные значения:
- `'best_effort'` — включает расширенный парсинг.
- `best_effort` — включает расширенный парсинг.
ClickHouse может парсить базовый формат `YYYY-MM-DD HH:MM:SS` и все форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). Например, `'2018-06-08T01:02:03.000Z'`.
ClickHouse может парсить базовый формат `YYYY-MM-DD HH:MM:SS` и все форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). Например, `2018-06-08T01:02:03.000Z`.
- `'basic'` — используется базовый парсер.
- `basic` — используется базовый парсер.
ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS` или `YYYY-MM-DD`. Например, `'2019-08-20 10:18:56'` или `2019-08-20`.
ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS` или `YYYY-MM-DD`. Например, `2019-08-20 10:18:56` или `2019-08-20`.
Значение по умолчанию: `'basic'`.
Значение по умолчанию: `basic`.
См. также:
- [Тип данных DateTime.](../../sql-reference/data-types/datetime.md)
- [Функции для работы с датой и временем.](../../sql-reference/functions/date-time-functions.md)
## date_time_output_format {#settings-date_time_output_format}
Позволяет выбрать разные выходные форматы текстового представления даты и времени.
Возможные значения:
- `simple` - простой выходной формат.
Выходные дата и время Clickhouse в формате `YYYY-MM-DD hh:mm:ss`. Например, `2019-08-20 10:18:56`. Расчет выполняется в соответствии с часовым поясом типа данных (если он есть) или часовым поясом сервера.
- `iso` - выходной формат ISO.
Выходные дата и время Clickhouse в формате [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ`. Например, `2019-08-20T10:18:56Z`. Обратите внимание, что выходные данные отображаются в формате UTC (`Z` означает UTC).
- `unix_timestamp` - выходной формат Unix.
Выходные дата и время в формате [Unix](https://en.wikipedia.org/wiki/Unix_time). Например `1566285536`.
Значение по умолчанию: `simple`.
См. также:
- [Тип данных DateTime](../../sql-reference/data-types/datetime.md)
- [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md)
## join_default_strictness {#settings-join_default_strictness}
Устанавливает строгость по умолчанию для [JOIN](../../sql-reference/statements/select/join.md#select-join).

View File

@ -27,7 +27,7 @@ DateTime([timezone])
Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`.
ClickHouse отображает значения типа `DateTime` в формате `YYYY-MM-DD hh:mm:ss`. Отображение можно поменять с помощью функции [formatDateTime](../../sql-reference/data-types/datetime.md#formatdatetime).
ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/settings.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime).
При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format).

View File

@ -401,7 +401,7 @@ TTL date_time + INTERVAL 15 HOUR
### 列 TTL {#mergetree-column-ttl}
当列中的值过期时, ClickHouse会将它们替换成该列数据类型的默认值。如果数据片段中列的所有值均已过期则ClickHouse 会从文件系统中的数据片段中此列。
当列中的值过期时, ClickHouse会将它们替换成该列数据类型的默认值。如果数据片段中列的所有值均已过期则ClickHouse 会从文件系统中的数据片段中删除此列。
`TTL`子句不能被用于主键字段。

View File

@ -190,6 +190,7 @@ add_object_library(clickhouse_processors_sources Processors/Sources)
add_object_library(clickhouse_processors_merges Processors/Merges)
add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms)
add_object_library(clickhouse_processors_queryplan Processors/QueryPlan)
add_object_library(clickhouse_processors_queryplan_optimizations Processors/QueryPlan/Optimizations)
set (DBMS_COMMON_LIBRARIES)
# libgcc_s does not provide an implementation of an atomics library. Instead,

View File

@ -0,0 +1,2 @@
add_executable(test-connect test_connect.cpp)
target_link_libraries (test-connect PRIVATE dbms)

View File

@ -0,0 +1,99 @@
#include <sys/types.h>
#include <sys/socket.h>
#include <unistd.h>
#include <iostream>
#include <thread>
#include <atomic>
#include <Poco/Net/StreamSocket.h>
#include <Common/Exception.h>
#include <Common/Stopwatch.h>
#include <IO/ReadHelpers.h>
/** In a loop it connects to the server and immediately breaks the connection.
* Using the SO_LINGER option, we ensure that the connection is terminated by sending a RST packet (not FIN).
* Long time ago this behavior caused a bug in the TCPServer implementation in the Poco library.
*/
int main(int argc, char ** argv)
try
{
size_t num_iterations = 1;
size_t num_threads = 1;
std::string host = "localhost";
uint16_t port = 9000;
if (argc >= 2)
num_iterations = DB::parse<size_t>(argv[1]);
if (argc >= 3)
num_threads = DB::parse<size_t>(argv[2]);
if (argc >= 4)
host = argv[3];
if (argc >= 5)
port = DB::parse<uint16_t>(argv[4]);
std::atomic_bool cancel{false};
std::vector<std::thread> threads(num_threads);
for (auto & thread : threads)
{
thread = std::thread([&]
{
for (size_t i = 0; i < num_iterations && !cancel.load(std::memory_order_relaxed); ++i)
{
std::cerr << ".";
Poco::Net::SocketAddress address(host, port);
int fd = socket(PF_INET, SOCK_STREAM, IPPROTO_IP);
if (fd < 0)
DB::throwFromErrno("Cannot create socket", 0);
linger linger_value;
linger_value.l_onoff = 1;
linger_value.l_linger = 0;
if (0 != setsockopt(fd, SOL_SOCKET, SO_LINGER, &linger_value, sizeof(linger_value)))
DB::throwFromErrno("Cannot set linger", 0);
try
{
Stopwatch watch;
int res = connect(fd, address.addr(), address.length());
if (res != 0 && errno != EINPROGRESS && errno != EWOULDBLOCK)
{
close(fd);
DB::throwFromErrno("Cannot connect", 0);
}
close(fd);
if (watch.elapsedSeconds() > 0.1)
{
std::cerr << watch.elapsedSeconds() << "\n";
cancel = true;
break;
}
}
catch (const Poco::Exception & e)
{
std::cerr << e.displayText() << "\n";
}
}
});
}
for (auto & thread : threads)
thread.join();
std::cerr << "\n";
}
catch (const Poco::Exception & e)
{
std::cerr << e.displayText() << "\n";
}

View File

@ -289,7 +289,8 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
while (filt_pos < filt_end_sse)
{
int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
mask = ~mask;
if (0 == mask)
{

View File

@ -356,7 +356,8 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
while (filt_pos < filt_end_sse)
{
int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
mask = ~mask;
if (0 == mask)
{

View File

@ -17,13 +17,17 @@ namespace DB
static UInt64 toBits64(const Int8 * bytes64)
{
static const __m128i zero16 = _mm_setzero_si128();
return static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16)))
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16)))
<< 16)
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16)))
<< 32)
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16)))
<< 48);
UInt64 res =
static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16)))
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16))) << 16)
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16))) << 32)
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16))) << 48);
return ~res;
}
#endif
@ -49,7 +53,7 @@ size_t countBytesInFilter(const UInt8 * filt, size_t sz)
#endif
for (; pos < end; ++pos)
count += *pos > 0;
count += *pos != 0;
return count;
}
@ -82,7 +86,7 @@ size_t countBytesInFilterWithNull(const IColumn::Filter & filt, const UInt8 * nu
#endif
for (; pos < end; ++pos)
count += (*pos & ~*pos2) > 0;
count += (*pos & ~*pos2) != 0;
return count;
}
@ -232,9 +236,10 @@ namespace
while (filt_pos < filt_end_aligned)
{
const auto mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)),
zero_vec));
mask = ~mask;
if (mask == 0)
{

View File

@ -120,9 +120,10 @@ inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_siz
for (size_t offset = min_size; offset < max_size; offset += 16)
{
uint16_t mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
uint16_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(longest + offset)),
zero16));
mask = ~mask;
if (mask)
{

View File

@ -163,6 +163,7 @@ private:
friend class ActionsDAG;
};
using BlockPtr = std::shared_ptr<Block>;
using Blocks = std::vector<Block>;
using BlocksList = std::list<Block>;
using BlocksPtr = std::shared_ptr<Blocks>;

View File

@ -6,8 +6,10 @@
#include <Core/MySQL/PacketsProtocolText.h>
#include <Core/MySQL/PacketsReplication.h>
#include <Core/MySQL/MySQLReplication.h>
#include <Common/DNSResolver.h>
#include <Poco/String.h>
namespace DB
{
using namespace Generic;

View File

@ -7,7 +7,6 @@
#include <IO/WriteHelpers.h>
#include <Poco/Net/NetException.h>
#include <Poco/Net/StreamSocket.h>
#include <Common/DNSResolver.h>
#include <Common/Exception.h>
#include <Common/NetException.h>
#include <Core/MySQL/IMySQLWritePacket.h>

View File

@ -13,11 +13,13 @@
#include <IO/WriteBufferFromOStream.h>
#include <ext/range.h>
#include <ext/size.h>
#include <ext/map.h>
#include <ext/chrono_io.h>
#include <Common/setThreadName.h>
#include "CacheDictionary.inc.h"
#include <DataTypes/DataTypesDecimal.h>
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
#include <Functions/FunctionHelpers.h>
namespace ProfileEvents
{
@ -130,8 +132,8 @@ const IDictionarySource * CacheDictionary::getSource() const
void CacheDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
{
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_value);
getItemsNumberImpl<UInt64, UInt64>(*hierarchical_attribute, ids, out, [&](const size_t) { return null_value; });
DictionaryDefaultValueExtractor<UInt64> default_value_extractor(null_value);
getItemsNumberImpl<UInt64, UInt64>(*hierarchical_attribute, ids, out, default_value_extractor);
}
@ -249,34 +251,384 @@ void CacheDictionary::isInConstantVector(const Key child_id, const PaddedPODArra
out[i] = std::find(ancestors.begin(), ancestors.end(), ancestor_ids[i]) != ancestors.end();
}
void CacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
ColumnPtr CacheDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes &,
const ColumnPtr default_values_column) const
{
ColumnPtr result;
PaddedPODArray<Key> backup_storage;
const auto & keys = getColumnVectorData(this, key_columns.front(), backup_storage);
auto keys_size = keys.size();
auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
const auto null_value = StringRef{std::get<String>(attribute.null_value)};
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
getItemsString(attribute, ids, out, [&](const size_t) { return null_value; });
const auto & null_value = std::get<AttributeType>(attribute.null_value);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<AttributeType, String>)
{
getItemsString(attribute, keys, column.get(), default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsNumberImpl<AttributeType, AttributeType>(attribute, keys, out, default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
return result;
}
void CacheDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
void CacheDictionary::getItemsNumberImpl(
Attribute & attribute,
const PaddedPODArray<Key> & ids,
ResultArrayType<OutputType> & out,
DefaultValueExtractor & default_value_extractor) const
{
auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
/// First fill everything with default values
const auto rows = ext::size(ids);
for (const auto row : ext::range(0, rows))
out[row] = default_value_extractor[row];
getItemsString(attribute, ids, out, [&](const size_t row) { return def->getDataAt(row); });
/// Maybe there are duplicate keys, so we remember their indices.
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
size_t cache_hit = 0;
size_t cache_not_found_count = 0;
size_t cache_expired_cound = 0;
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
auto insert_to_answer_routine = [&](size_t row, size_t idx)
{
auto & cell = cells[idx];
if (!cell.isDefault())
out[row] = static_cast<OutputType>(attribute_array[idx]);
};
/// fetch up-to-date values, decide which ones require update
for (const auto row : ext::range(0, rows))
{
const auto id = ids[row];
/** cell should be updated if either:
* 1. ids do not match,
* 2. cell has expired,
* 3. explicit defaults were specified and cell was set default. */
const auto [cell_idx, state] = findCellIdxForGet(id, now);
if (state == ResultState::FoundAndValid)
{
++cache_hit;
insert_to_answer_routine(row, cell_idx);
}
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
{
++cache_not_found_count;
cache_expired_or_not_found_ids[id].push_back(row);
}
else if (state == ResultState::FoundButExpired)
{
cache_expired_cound++;
cache_expired_or_not_found_ids[id].push_back(row);
if (allow_read_expired_keys)
insert_to_answer_routine(row, cell_idx);
}
}
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_cound);
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
query_count.fetch_add(rows, std::memory_order_relaxed);
hit_count.fetch_add(rows - cache_not_found_count - cache_expired_cound, std::memory_order_release);
if (!cache_not_found_count)
{
/// Nothing to update - return
if (!cache_expired_cound)
return;
/// Update async only if allow_read_expired_keys_is_enabledadd condvar usage and better code
if (allow_read_expired_keys)
{
std::vector<Key> required_expired_ids;
required_expired_ids.reserve(cache_expired_cound);
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
/// request new values
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
tryPushToUpdateQueueOrThrow(update_unit_ptr);
/// Nothing to do - return
return;
}
}
/// From this point we have to update all keys sync.
/// Maybe allow_read_expired_keys_from_cache_dictionary is disabled
/// and there no cache_not_found_ids but some cache_expired.
std::vector<Key> required_ids;
required_ids.reserve(cache_not_found_count + cache_expired_cound);
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
/// Request new values
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
tryPushToUpdateQueueOrThrow(update_unit_ptr);
waitForCurrentUpdateFinish(update_unit_ptr);
/// Add updated keys to answer.
const size_t attribute_index = getAttributeIndex(attribute.name);
for (auto & [key, value] : update_unit_ptr->found_ids)
{
if (value.found)
{
for (const size_t row : cache_expired_or_not_found_ids[key])
out[row] = std::get<OutputType>(value.values[attribute_index]);
}
}
}
void CacheDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
void CacheDictionary::getItemsString(
Attribute & attribute,
const PaddedPODArray<Key> & ids,
ColumnString * out,
DictionaryDefaultValueExtractor<String> & default_value_extractor) const
{
auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto rows = ext::size(ids);
getItemsString(attribute, ids, out, [&](const size_t) { return StringRef{def}; });
/// Save on some allocations.
out->getOffsets().reserve(rows);
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
auto found_outdated_values = false;
/// Perform optimistic version, fallback to pessimistic if failed.
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
/// Fetch up-to-date values, discard on fail.
for (const auto row : ext::range(0, rows))
{
const auto id = ids[row];
const auto [cell_idx, state] = findCellIdxForGet(id, now);
if (state == ResultState::FoundAndValid)
{
auto & cell = cells[cell_idx];
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
out->insertData(string_ref.data, string_ref.size);
}
else
{
found_outdated_values = true;
break;
}
}
}
/// Optimistic code completed successfully.
if (!found_outdated_values)
{
query_count.fetch_add(rows, std::memory_order_relaxed);
hit_count.fetch_add(rows, std::memory_order_release);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, ids.size());
return;
}
/// Now onto the pessimistic one, discard possible partial results from the optimistic path.
out->getChars().resize_assume_reserved(0);
out->getOffsets().resize_assume_reserved(0);
/// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
/// we are going to store every string separately
std::unordered_map<Key, String> local_cache;
size_t cache_not_found_count = 0;
size_t cache_expired_count = 0;
size_t total_length = 0;
size_t cache_hit = 0;
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
auto insert_value_routine = [&](size_t row, size_t id, size_t cell_idx)
{
const auto & cell = cells[cell_idx];
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
/// Do not store default, but count it in total length.
if (!cell.isDefault())
local_cache[id] = String{string_ref};
total_length += string_ref.size + 1;
};
for (const auto row : ext::range(0, ids.size()))
{
const auto id = ids[row];
const auto [cell_idx, state] = findCellIdxForGet(id, now);
if (state == ResultState::FoundAndValid)
{
++cache_hit;
insert_value_routine(row, id, cell_idx);
}
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
{
++cache_not_found_count;
cache_expired_or_not_found_ids[id].push_back(row);
}
else if (state == ResultState::FoundButExpired)
{
++cache_expired_count;
cache_expired_or_not_found_ids[id].push_back(row);
if (allow_read_expired_keys)
insert_value_routine(row, id, cell_idx);
}
}
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_count);
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
query_count.fetch_add(rows, std::memory_order_relaxed);
hit_count.fetch_add(rows - cache_expired_count - cache_not_found_count, std::memory_order_release);
/// Async update of expired keys.
if (!cache_not_found_count)
{
if (allow_read_expired_keys && cache_expired_count)
{
std::vector<Key> required_expired_ids;
required_expired_ids.reserve(cache_expired_count);
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
tryPushToUpdateQueueOrThrow(update_unit_ptr);
/// Insert all found keys and defaults to output array.
out->getChars().reserve(total_length);
for (const auto row : ext::range(0, ext::size(ids)))
{
const auto id = ids[row];
StringRef value;
/// Previously we stored found keys in map.
const auto it = local_cache.find(id);
if (it != local_cache.end())
value = StringRef(it->second);
else
value = default_value_extractor[row];
out->insertData(value.data, value.size);
}
/// Nothing to do else.
return;
}
}
/// We will request both cache_not_found_ids and cache_expired_ids sync.
std::vector<Key> required_ids;
required_ids.reserve(cache_not_found_count + cache_expired_count);
std::transform(
std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
tryPushToUpdateQueueOrThrow(update_unit_ptr);
waitForCurrentUpdateFinish(update_unit_ptr);
const size_t attribute_index = getAttributeIndex(attribute.name);
/// Only calculate the total length.
for (auto & [key, value] : update_unit_ptr->found_ids)
{
if (value.found)
{
const auto found_value_ref = std::get<String>(value.values[attribute_index]);
total_length += (found_value_ref.size() + 1) * cache_expired_or_not_found_ids[key].size();
}
else
{
for (const auto row : cache_expired_or_not_found_ids[key])
total_length += default_value_extractor[row].size + 1;
}
}
out->getChars().reserve(total_length);
for (const auto row : ext::range(0, ext::size(ids)))
{
const auto id = ids[row];
StringRef value;
/// We have two maps: found in cache and found in source.
const auto local_it = local_cache.find(id);
if (local_it != local_cache.end())
value = StringRef(local_it->second);
else
{
const auto found_it = update_unit_ptr->found_ids.find(id);
/// Previously we didn't store defaults in local cache.
if (found_it != update_unit_ptr->found_ids.end() && found_it->second.found)
value = std::get<String>(found_it->second.values[attribute_index]);
else
value = default_value_extractor[row];
}
out->insertData(value.data, value.size);
}
}
template<class... Ts>
struct Overloaded : Ts... {using Ts::operator()...;};
@ -375,8 +727,14 @@ size_t CacheDictionary::findCellIdxForSet(const Key & id) const
return oldest_id;
}
void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
ColumnUInt8::Ptr CacheDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
{
PaddedPODArray<Key> backup_storage;
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
auto result = ColumnUInt8::create(ext::size(ids));
auto& out = result->getData();
/// There are three types of ids.
/// - Valid ids. These ids are presented in local cache and their lifetime is not expired.
/// - CacheExpired ids. Ids that are in local cache, but their values are rotted (lifetime is expired).
@ -444,7 +802,7 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
{
/// Nothing to update - return;
if (!cache_expired_count)
return;
return result;
if (allow_read_expired_keys)
{
@ -458,7 +816,7 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
tryPushToUpdateQueueOrThrow(update_unit_ptr);
/// Update is async - no need to wait.
return;
return result;
}
}
@ -483,6 +841,8 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
for (const auto row : cache_expired_or_not_found_ids[key])
out[row] = true;
}
return result;
}
@ -707,7 +1067,7 @@ PaddedPODArray<CacheDictionary::Key> CacheDictionary::getCachedIds() const
BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<CacheDictionary, Key>;
using BlockInputStreamType = DictionaryBlockInputStream<Key>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getCachedIds(), column_names);
}

View File

@ -21,6 +21,7 @@
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryHelpers.h"
namespace CurrentMetrics
{
@ -119,77 +120,20 @@ public:
std::exception_ptr getLastException() const override;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const override;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
const;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
@ -260,12 +204,18 @@ private:
/* NOLINTNEXTLINE(readability-convert-member-functions-to-static) */
Attribute createAttributeWithTypeAndName(const AttributeUnderlyingType type, const String & name, const Field & null_value);
template <typename AttributeType, typename OutputType, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
void getItemsNumberImpl(
Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
Attribute & attribute,
const PaddedPODArray<Key> & ids,
ResultArrayType<OutputType> & out,
DefaultValueExtractor & default_value_extractor) const;
template <typename DefaultGetter>
void getItemsString(Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const;
void getItemsString(
Attribute & attribute,
const PaddedPODArray<Key> & ids,
ColumnString * out,
DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
PaddedPODArray<Key> getCachedIds() const;
@ -456,5 +406,6 @@ private:
mutable std::condition_variable is_update_finished;
std::atomic<bool> finished{false};
};
};
}

View File

@ -1,368 +0,0 @@
#pragma once
#include <stdexcept>
#include "CacheDictionary.h"
#include <Columns/ColumnsNumber.h>
#include <Common/ProfilingScopedRWLock.h>
#include <Common/typeid_cast.h>
#include <DataStreams/IBlockInputStream.h>
#include <ext/chrono_io.h>
#include <ext/map.h>
#include <ext/range.h>
#include <ext/size.h>
namespace ProfileEvents
{
extern const Event DictCacheKeysRequested;
extern const Event DictCacheKeysRequestedMiss;
extern const Event DictCacheKeysRequestedFound;
extern const Event DictCacheKeysExpired;
extern const Event DictCacheKeysNotFound;
extern const Event DictCacheKeysHit;
extern const Event DictCacheRequestTimeNs;
extern const Event DictCacheRequests;
extern const Event DictCacheLockWriteNs;
extern const Event DictCacheLockReadNs;
}
namespace CurrentMetrics
{
extern const Metric DictCacheRequests;
}
namespace DB
{
namespace ErrorCodes
{
}
template <typename AttributeType, typename OutputType, typename DefaultGetter>
void CacheDictionary::getItemsNumberImpl(
Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
{
/// First fill everything with default values
const auto rows = ext::size(ids);
for (const auto row : ext::range(0, rows))
out[row] = get_default(row);
/// Maybe there are duplicate keys, so we remember their indices.
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
size_t cache_hit = 0;
size_t cache_not_found_count = 0;
size_t cache_expired_cound = 0;
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
auto insert_to_answer_routine = [&](size_t row, size_t idx)
{
auto & cell = cells[idx];
if (!cell.isDefault())
out[row] = static_cast<OutputType>(attribute_array[idx]);
};
/// fetch up-to-date values, decide which ones require update
for (const auto row : ext::range(0, rows))
{
const auto id = ids[row];
/** cell should be updated if either:
* 1. ids do not match,
* 2. cell has expired,
* 3. explicit defaults were specified and cell was set default. */
const auto [cell_idx, state] = findCellIdxForGet(id, now);
if (state == ResultState::FoundAndValid)
{
++cache_hit;
insert_to_answer_routine(row, cell_idx);
}
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
{
++cache_not_found_count;
cache_expired_or_not_found_ids[id].push_back(row);
}
else if (state == ResultState::FoundButExpired)
{
cache_expired_cound++;
cache_expired_or_not_found_ids[id].push_back(row);
if (allow_read_expired_keys)
insert_to_answer_routine(row, cell_idx);
}
}
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_cound);
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
query_count.fetch_add(rows, std::memory_order_relaxed);
hit_count.fetch_add(rows - cache_not_found_count - cache_expired_cound, std::memory_order_release);
if (!cache_not_found_count)
{
/// Nothing to update - return
if (!cache_expired_cound)
return;
/// Update async only if allow_read_expired_keys_is_enabledadd condvar usage and better code
if (allow_read_expired_keys)
{
std::vector<Key> required_expired_ids;
required_expired_ids.reserve(cache_expired_cound);
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
/// request new values
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
tryPushToUpdateQueueOrThrow(update_unit_ptr);
/// Nothing to do - return
return;
}
}
/// From this point we have to update all keys sync.
/// Maybe allow_read_expired_keys_from_cache_dictionary is disabled
/// and there no cache_not_found_ids but some cache_expired.
std::vector<Key> required_ids;
required_ids.reserve(cache_not_found_count + cache_expired_cound);
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
/// Request new values
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
tryPushToUpdateQueueOrThrow(update_unit_ptr);
waitForCurrentUpdateFinish(update_unit_ptr);
/// Add updated keys to answer.
const size_t attribute_index = getAttributeIndex(attribute.name);
for (auto & [key, value] : update_unit_ptr->found_ids)
{
if (value.found)
{
for (const size_t row : cache_expired_or_not_found_ids[key])
out[row] = std::get<OutputType>(value.values[attribute_index]);
}
}
}
template <typename DefaultGetter>
void CacheDictionary::getItemsString(
Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const
{
const auto rows = ext::size(ids);
/// Save on some allocations.
out->getOffsets().reserve(rows);
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
auto found_outdated_values = false;
/// Perform optimistic version, fallback to pessimistic if failed.
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
/// Fetch up-to-date values, discard on fail.
for (const auto row : ext::range(0, rows))
{
const auto id = ids[row];
const auto [cell_idx, state] = findCellIdxForGet(id, now);
if (state == ResultState::FoundAndValid)
{
auto & cell = cells[cell_idx];
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
out->insertData(string_ref.data, string_ref.size);
}
else
{
found_outdated_values = true;
break;
}
}
}
/// Optimistic code completed successfully.
if (!found_outdated_values)
{
query_count.fetch_add(rows, std::memory_order_relaxed);
hit_count.fetch_add(rows, std::memory_order_release);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, ids.size());
return;
}
/// Now onto the pessimistic one, discard possible partial results from the optimistic path.
out->getChars().resize_assume_reserved(0);
out->getOffsets().resize_assume_reserved(0);
/// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
/// we are going to store every string separately
std::unordered_map<Key, String> local_cache;
size_t cache_not_found_count = 0;
size_t cache_expired_count = 0;
size_t total_length = 0;
size_t cache_hit = 0;
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
auto insert_value_routine = [&](size_t row, size_t id, size_t cell_idx)
{
const auto & cell = cells[cell_idx];
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
/// Do not store default, but count it in total length.
if (!cell.isDefault())
local_cache[id] = String{string_ref};
total_length += string_ref.size + 1;
};
for (const auto row : ext::range(0, ids.size()))
{
const auto id = ids[row];
const auto [cell_idx, state] = findCellIdxForGet(id, now);
if (state == ResultState::FoundAndValid)
{
++cache_hit;
insert_value_routine(row, id, cell_idx);
}
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
{
++cache_not_found_count;
cache_expired_or_not_found_ids[id].push_back(row);
}
else if (state == ResultState::FoundButExpired)
{
++cache_expired_count;
cache_expired_or_not_found_ids[id].push_back(row);
if (allow_read_expired_keys)
insert_value_routine(row, id, cell_idx);
}
}
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_count);
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
query_count.fetch_add(rows, std::memory_order_relaxed);
hit_count.fetch_add(rows - cache_expired_count - cache_not_found_count, std::memory_order_release);
/// Async update of expired keys.
if (!cache_not_found_count)
{
if (allow_read_expired_keys && cache_expired_count)
{
std::vector<Key> required_expired_ids;
required_expired_ids.reserve(cache_expired_count);
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
tryPushToUpdateQueueOrThrow(update_unit_ptr);
/// Insert all found keys and defaults to output array.
out->getChars().reserve(total_length);
for (const auto row : ext::range(0, ext::size(ids)))
{
const auto id = ids[row];
StringRef value;
/// Previously we stored found keys in map.
const auto it = local_cache.find(id);
if (it != local_cache.end())
value = StringRef(it->second);
else
value = get_default(row);
out->insertData(value.data, value.size);
}
/// Nothing to do else.
return;
}
}
/// We will request both cache_not_found_ids and cache_expired_ids sync.
std::vector<Key> required_ids;
required_ids.reserve(cache_not_found_count + cache_expired_count);
std::transform(
std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
tryPushToUpdateQueueOrThrow(update_unit_ptr);
waitForCurrentUpdateFinish(update_unit_ptr);
const size_t attribute_index = getAttributeIndex(attribute.name);
/// Only calculate the total length.
for (auto & [key, value] : update_unit_ptr->found_ids)
{
if (value.found)
{
const auto found_value_ref = std::get<String>(value.values[attribute_index]);
total_length += (found_value_ref.size() + 1) * cache_expired_or_not_found_ids[key].size();
}
else
{
for (const auto row : cache_expired_or_not_found_ids[key])
total_length += get_default(row).size + 1;
}
}
out->getChars().reserve(total_length);
for (const auto row : ext::range(0, ext::size(ids)))
{
const auto id = ids[row];
StringRef value;
/// We have two maps: found in cache and found in source.
const auto local_it = local_cache.find(id);
if (local_it != local_cache.end())
value = StringRef(local_it->second);
else
{
const auto found_it = update_unit_ptr->found_ids.find(id);
/// Previously we didn't store defaults in local cache.
if (found_it != update_unit_ptr->found_ids.end() && found_it->second.found)
value = std::get<String>(found_it->second.values[attribute_index]);
else
value = get_default(row);
}
out->insertData(value.data, value.size);
}
}
}

View File

@ -1,32 +0,0 @@
#include <Dictionaries/CacheDictionary.h>
#include <Dictionaries/CacheDictionary.inc.h>
namespace DB
{
#define DEFINE(TYPE) \
void CacheDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) \
const \
{ \
auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
const auto null_value = std::get<TYPE>(attribute.null_value); \
getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t) { return null_value; }); \
}
DEFINE(UInt8)
DEFINE(UInt16)
DEFINE(UInt32)
DEFINE(UInt64)
DEFINE(UInt128)
DEFINE(Int8)
DEFINE(Int16)
DEFINE(Int32)
DEFINE(Int64)
DEFINE(Float32)
DEFINE(Float64)
DEFINE(Decimal32)
DEFINE(Decimal64)
DEFINE(Decimal128)
#undef DEFINE
}

View File

@ -1,34 +0,0 @@
#include <Dictionaries/CacheDictionary.h>
#include <Dictionaries/CacheDictionary.inc.h>
namespace DB
{
#define DEFINE(TYPE) \
void CacheDictionary::get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t row) { return def[row]; }); \
}
DEFINE(UInt8)
DEFINE(UInt16)
DEFINE(UInt32)
DEFINE(UInt64)
DEFINE(UInt128)
DEFINE(Int8)
DEFINE(Int16)
DEFINE(Int32)
DEFINE(Int64)
DEFINE(Float32)
DEFINE(Float64)
DEFINE(Decimal32)
DEFINE(Decimal64)
DEFINE(Decimal128)
#undef DEFINE
}

View File

@ -1,31 +0,0 @@
#include <Dictionaries/CacheDictionary.h>
#include <Dictionaries/CacheDictionary.inc.h>
namespace DB
{
#define DEFINE(TYPE) \
void CacheDictionary::get##TYPE( \
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
{ \
auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t) { return def; }); \
}
DEFINE(UInt8)
DEFINE(UInt16)
DEFINE(UInt32)
DEFINE(UInt64)
DEFINE(UInt128)
DEFINE(Int8)
DEFINE(Int16)
DEFINE(Int32)
DEFINE(Int64)
DEFINE(Float32)
DEFINE(Float64)
DEFINE(Decimal32)
DEFINE(Decimal64)
DEFINE(Decimal128)
#undef DEFINE
}

View File

@ -10,7 +10,8 @@
#include <ext/range.h>
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypesDecimal.h>
namespace ProfileEvents
{
@ -70,48 +71,50 @@ ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(
createAttributes();
}
void ComplexKeyCacheDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
{
dict_struct.validateKeyTypes(key_types);
auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto null_value = StringRef{std::get<String>(attribute.null_values)};
getItemsString(attribute, key_columns, out, [&](const size_t) { return null_value; });
}
void ComplexKeyCacheDictionary::getString(
ColumnPtr ComplexKeyCacheDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const
const ColumnPtr default_values_column) const
{
dict_struct.validateKeyTypes(key_types);
auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
getItemsString(attribute, key_columns, out, [&](const size_t row) { return def->getDataAt(row); });
}
void ComplexKeyCacheDictionary::getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const
{
dict_struct.validateKeyTypes(key_types);
ColumnPtr result;
auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
getItemsString(attribute, key_columns, out, [&](const size_t) { return StringRef{def}; });
auto keys_size = key_columns.front()->size();
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
const auto & null_value = std::get<AttributeType>(attribute.null_values);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<AttributeType, String>)
{
auto * out = column.get();
getItemsString(attribute, key_columns, out, default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsNumberImpl<AttributeType, AttributeType>(attribute, key_columns, out, default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
return result;
}
/// returns cell_idx (always valid for replacing), 'cell is valid' flag, 'cell is outdated' flag,
@ -158,15 +161,21 @@ ComplexKeyCacheDictionary::findCellIdx(const StringRef & key, const CellMetadata
return {oldest_id, false, false};
}
void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
ColumnUInt8::Ptr ComplexKeyCacheDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
{
dict_struct.validateKeyTypes(key_types);
const auto rows_num = key_columns.front()->size();
auto result = ColumnUInt8::create(rows_num);
auto& out = result->getData();
for (const auto row : ext::range(0, rows_num))
out[row] = false;
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
MapType<std::vector<size_t>> outdated_keys;
const auto rows_num = key_columns.front()->size();
const auto keys_size = dict_struct.key->size();
StringRefs keys(keys_size);
Arena temporary_keys_pool;
@ -212,7 +221,7 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
if (outdated_keys.empty())
return;
return result;
std::vector<size_t> required_rows(outdated_keys.size());
std::transform(
@ -233,8 +242,395 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
for (const auto out_idx : outdated_keys[key])
out[out_idx] = false;
});
return result;
}
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
void ComplexKeyCacheDictionary::getItemsNumberImpl(
Attribute & attribute,
const Columns & key_columns,
PaddedPODArray<OutputType> & out,
DefaultValueExtractor & default_value_extractor) const
{
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
MapType<std::vector<size_t>> outdated_keys;
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
const auto rows_num = key_columns.front()->size();
const auto keys_size = dict_struct.key->size();
StringRefs keys(keys_size);
Arena temporary_keys_pool;
PODArray<StringRef> keys_array(rows_num);
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
/// fetch up-to-date values, decide which ones require update
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
keys_array[row] = key;
const auto find_result = findCellIdx(key, now);
/** cell should be updated if either:
* 1. keys (or hash) do not match,
* 2. cell has expired,
* 3. explicit defaults were specified and cell was set default. */
if (!find_result.valid)
{
outdated_keys[key].push_back(row);
if (find_result.outdated)
++cache_expired;
else
++cache_not_found;
}
else
{
++cache_hit;
const auto & cell_idx = find_result.cell_idx;
const auto & cell = cells[cell_idx];
out[row] = cell.isDefault() ? default_value_extractor[row] : static_cast<OutputType>(attribute_array[cell_idx]);
}
}
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
query_count.fetch_add(rows_num, std::memory_order_relaxed);
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
if (outdated_keys.empty())
return;
std::vector<size_t> required_rows(outdated_keys.size());
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
{
return pair.getMapped().front();
});
/// request new values
update(
key_columns,
keys_array,
required_rows,
[&](const StringRef key, const size_t cell_idx)
{
for (const auto row : outdated_keys[key])
out[row] = static_cast<OutputType>(attribute_array[cell_idx]);
},
[&](const StringRef key, const size_t)
{
for (const auto row : outdated_keys[key])
out[row] = default_value_extractor[row];
});
}
void ComplexKeyCacheDictionary::getItemsString(
Attribute & attribute,
const Columns & key_columns,
ColumnString * out,
DictionaryDefaultValueExtractor<String> & default_value_extractor) const
{
const auto rows_num = key_columns.front()->size();
/// save on some allocations
out->getOffsets().reserve(rows_num);
const auto keys_size = dict_struct.key->size();
StringRefs keys(keys_size);
Arena temporary_keys_pool;
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
auto found_outdated_values = false;
/// perform optimistic version, fallback to pessimistic if failed
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
/// fetch up-to-date values, discard on fail
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
SCOPE_EXIT(temporary_keys_pool.rollback(key.size));
const auto find_result = findCellIdx(key, now);
if (!find_result.valid)
{
found_outdated_values = true;
break;
}
else
{
const auto & cell_idx = find_result.cell_idx;
const auto & cell = cells[cell_idx];
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
out->insertData(string_ref.data, string_ref.size);
}
}
}
/// optimistic code completed successfully
if (!found_outdated_values)
{
query_count.fetch_add(rows_num, std::memory_order_relaxed);
hit_count.fetch_add(rows_num, std::memory_order_release);
return;
}
/// now onto the pessimistic one, discard possible partial results from the optimistic path
out->getChars().resize_assume_reserved(0);
out->getOffsets().resize_assume_reserved(0);
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
MapType<std::vector<size_t>> outdated_keys;
/// we are going to store every string separately
MapType<StringRef> map;
PODArray<StringRef> keys_array(rows_num);
size_t total_length = 0;
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
keys_array[row] = key;
const auto find_result = findCellIdx(key, now);
if (!find_result.valid)
{
outdated_keys[key].push_back(row);
if (find_result.outdated)
++cache_expired;
else
++cache_not_found;
}
else
{
++cache_hit;
const auto & cell_idx = find_result.cell_idx;
const auto & cell = cells[cell_idx];
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
if (!cell.isDefault())
map[key] = copyIntoArena(string_ref, temporary_keys_pool);
total_length += string_ref.size + 1;
}
}
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
query_count.fetch_add(rows_num, std::memory_order_relaxed);
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
/// request new values
if (!outdated_keys.empty())
{
std::vector<size_t> required_rows(outdated_keys.size());
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
{
return pair.getMapped().front();
});
update(
key_columns,
keys_array,
required_rows,
[&](const StringRef key, const size_t cell_idx)
{
const StringRef attribute_value = attribute_array[cell_idx];
/// We must copy key and value to own memory, because it may be replaced with another
/// in next iterations of inner loop of update.
const StringRef copied_key = copyIntoArena(key, temporary_keys_pool);
const StringRef copied_value = copyIntoArena(attribute_value, temporary_keys_pool);
map[copied_key] = copied_value;
total_length += (attribute_value.size + 1) * outdated_keys[key].size();
},
[&](const StringRef key, const size_t)
{
for (const auto row : outdated_keys[key])
total_length += default_value_extractor[row].size + 1;
});
}
out->getChars().reserve(total_length);
for (const auto row : ext::range(0, ext::size(keys_array)))
{
const StringRef key = keys_array[row];
auto * const it = map.find(key);
const auto string_ref = it ? it->getMapped() : default_value_extractor[row];
out->insertData(string_ref.data, string_ref.size);
}
}
template <typename PresentKeyHandler, typename AbsentKeyHandler>
void ComplexKeyCacheDictionary::update(
const Columns & in_key_columns,
const PODArray<StringRef> & in_keys,
const std::vector<size_t> & in_requested_rows,
PresentKeyHandler && on_cell_updated,
AbsentKeyHandler && on_key_not_found) const
{
MapType<bool> remaining_keys{in_requested_rows.size()};
for (const auto row : in_requested_rows)
remaining_keys.insert({in_keys[row], false});
std::uniform_int_distribution<UInt64> distribution(dict_lifetime.min_sec, dict_lifetime.max_sec);
const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
{
Stopwatch watch;
auto stream = source_ptr->loadKeys(in_key_columns, in_requested_rows);
stream->readPrefix();
const auto keys_size = dict_struct.key->size();
StringRefs keys(keys_size);
const auto attributes_size = attributes.size();
const auto now = std::chrono::system_clock::now();
while (const auto block = stream->read())
{
/// cache column pointers
const auto key_columns = ext::map<Columns>(
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
{
return block.safeGetByPosition(keys_size + attribute_idx).column;
});
const auto rows_num = block.rows();
for (const auto row : ext::range(0, rows_num))
{
auto key = allocKey(row, key_columns, keys);
const auto hash = StringRefHash{}(key);
const auto find_result = findCellIdx(key, now, hash);
const auto & cell_idx = find_result.cell_idx;
auto & cell = cells[cell_idx];
for (const auto attribute_idx : ext::range(0, attributes.size()))
{
const auto & attribute_column = *attribute_columns[attribute_idx];
auto & attribute = attributes[attribute_idx];
setAttributeValue(attribute, cell_idx, attribute_column[row]);
}
/// if cell id is zero and zero does not map to this cell, then the cell is unused
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
element_count.fetch_add(1, std::memory_order_relaxed);
/// handle memory allocated for old key
if (key == cell.key)
{
freeKey(key);
key = cell.key;
}
else
{
/// new key is different from the old one
if (cell.key.data)
freeKey(cell.key);
cell.key = key;
}
cell.hash = hash;
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
else
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
/// inform caller
on_cell_updated(key, cell_idx);
/// mark corresponding id as found
remaining_keys[key] = true;
}
}
stream->readSuffix();
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, in_requested_rows.size());
ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed());
}
size_t found_num = 0;
size_t not_found_num = 0;
const auto now = std::chrono::system_clock::now();
/// Check which ids have not been found and require setting null_value
for (const auto & key_found_pair : remaining_keys)
{
if (key_found_pair.getMapped())
{
++found_num;
continue;
}
++not_found_num;
auto key = key_found_pair.getKey();
const auto hash = StringRefHash{}(key);
const auto find_result = findCellIdx(key, now, hash);
const auto & cell_idx = find_result.cell_idx;
auto & cell = cells[cell_idx];
/// Set null_value for each attribute
for (auto & attribute : attributes)
setDefaultAttributeValue(attribute, cell_idx);
/// Check if cell had not been occupied before and increment element counter if it hadn't
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
element_count.fetch_add(1, std::memory_order_relaxed);
if (key == cell.key)
key = cell.key;
else
{
if (cell.key.data)
freeKey(cell.key);
/// copy key from temporary pool
key = copyKey(key);
cell.key = key;
}
cell.hash = hash;
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
else
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
cell.setDefault();
/// inform caller that the cell has not been found
on_key_not_found(key, cell_idx);
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num);
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num);
}
void ComplexKeyCacheDictionary::createAttributes()
{
const auto attributes_size = dict_struct.attributes.size();
@ -263,6 +659,102 @@ ComplexKeyCacheDictionary::Attribute & ComplexKeyCacheDictionary::getAttribute(c
return attributes[it->second];
}
void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute, const size_t idx) const
{
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
if constexpr (std::is_same_v<AttributeType, String>)
{
const auto & null_value_ref = std::get<String>(attribute.null_values);
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
if (string_ref.data != null_value_ref.data())
{
if (string_ref.data)
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
string_ref = StringRef{null_value_ref};
}
}
else
{
std::get<ContainerPtrType<AttributeType>>(attribute.arrays)[idx] = std::get<AttributeType>(attribute.null_values);
}
};
callOnDictionaryAttributeType(attribute.type, type_call);
}
ComplexKeyCacheDictionary::Attribute
ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
{
Attribute attr{type, {}, {}};
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
if constexpr (std::is_same_v<AttributeType, String>)
{
attr.null_values = null_value.get<String>();
attr.arrays = std::make_unique<ContainerType<StringRef>>(size);
bytes_allocated += size * sizeof(StringRef);
if (!string_arena)
string_arena = std::make_unique<ArenaWithFreeLists>();
}
else
{
attr.null_values = AttributeType(null_value.get<NearestFieldType<AttributeType>>()); /* NOLINT */
attr.arrays = std::make_unique<ContainerType<AttributeType>>(size); /* NOLINT */
bytes_allocated += size * sizeof(AttributeType);
}
};
callOnDictionaryAttributeType(type, type_call);
return attr;
}
void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const size_t idx, const Field & value) const
{
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
if constexpr (std::is_same_v<AttributeType, String>)
{
const auto & string = value.get<String>();
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
const auto & null_value_ref = std::get<String>(attribute.null_values);
/// free memory unless it points to a null_value
if (string_ref.data && string_ref.data != null_value_ref.data())
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
const auto str_size = string.size();
if (str_size != 0)
{
auto * str_ptr = string_arena->alloc(str_size);
std::copy(string.data(), string.data() + str_size, str_ptr);
string_ref = StringRef{str_ptr, str_size};
}
else
string_ref = {};
}
else
{
std::get<ContainerPtrType<AttributeType>>(attribute.arrays)[idx] = value.get<NearestFieldType<AttributeType>>();
}
};
callOnDictionaryAttributeType(attribute.type, type_call);
}
StringRef ComplexKeyCacheDictionary::allocKey(const size_t row, const Columns & key_columns, StringRefs & keys) const
{
if (key_size_is_fixed)
@ -388,7 +880,7 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names &
keys.push_back(cells[idx].key);
}
using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyCacheDictionary, UInt64>;
using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, keys, column_names);
}

View File

@ -23,7 +23,7 @@
#include "IDictionary.h"
#include "IDictionarySource.h"
#include <DataStreams/IBlockInputStream.h>
#include "DictionaryHelpers.h"
namespace ProfileEvents
{
@ -89,93 +89,16 @@ public:
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
}
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
/// In all functions below, key_columns must be full (non-constant) columns.
/// See the requirement in IDataType.h for text-serialization functions.
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name,
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const;
const ColumnPtr default_values_column) const override;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const;
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
@ -252,227 +175,18 @@ private:
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
template <typename AttributeType, typename OutputType, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
void getItemsNumberImpl(
Attribute & attribute, const Columns & key_columns, PaddedPODArray<OutputType> & out, DefaultGetter && get_default) const
{
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
MapType<std::vector<size_t>> outdated_keys;
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
Attribute & attribute,
const Columns & key_columns,
PaddedPODArray<OutputType> & out,
DefaultValueExtractor & default_value_extractor) const;
const auto rows_num = key_columns.front()->size();
const auto keys_size = dict_struct.key->size();
StringRefs keys(keys_size);
Arena temporary_keys_pool;
PODArray<StringRef> keys_array(rows_num);
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
/// fetch up-to-date values, decide which ones require update
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
keys_array[row] = key;
const auto find_result = findCellIdx(key, now);
/** cell should be updated if either:
* 1. keys (or hash) do not match,
* 2. cell has expired,
* 3. explicit defaults were specified and cell was set default. */
if (!find_result.valid)
{
outdated_keys[key].push_back(row);
if (find_result.outdated)
++cache_expired;
else
++cache_not_found;
}
else
{
++cache_hit;
const auto & cell_idx = find_result.cell_idx;
const auto & cell = cells[cell_idx];
out[row] = cell.isDefault() ? get_default(row) : static_cast<OutputType>(attribute_array[cell_idx]);
}
}
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
query_count.fetch_add(rows_num, std::memory_order_relaxed);
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
if (outdated_keys.empty())
return;
std::vector<size_t> required_rows(outdated_keys.size());
std::transform(
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); });
/// request new values
update(
key_columns,
keys_array,
required_rows,
[&](const StringRef key, const size_t cell_idx)
{
for (const auto row : outdated_keys[key])
out[row] = static_cast<OutputType>(attribute_array[cell_idx]);
},
[&](const StringRef key, const size_t)
{
for (const auto row : outdated_keys[key])
out[row] = get_default(row);
});
}
template <typename DefaultGetter>
void getItemsString(Attribute & attribute, const Columns & key_columns, ColumnString * out, DefaultGetter && get_default) const
{
const auto rows_num = key_columns.front()->size();
/// save on some allocations
out->getOffsets().reserve(rows_num);
const auto keys_size = dict_struct.key->size();
StringRefs keys(keys_size);
Arena temporary_keys_pool;
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
auto found_outdated_values = false;
/// perform optimistic version, fallback to pessimistic if failed
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
/// fetch up-to-date values, discard on fail
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
SCOPE_EXIT(temporary_keys_pool.rollback(key.size));
const auto find_result = findCellIdx(key, now);
if (!find_result.valid)
{
found_outdated_values = true;
break;
}
else
{
const auto & cell_idx = find_result.cell_idx;
const auto & cell = cells[cell_idx];
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
out->insertData(string_ref.data, string_ref.size);
}
}
}
/// optimistic code completed successfully
if (!found_outdated_values)
{
query_count.fetch_add(rows_num, std::memory_order_relaxed);
hit_count.fetch_add(rows_num, std::memory_order_release);
return;
}
/// now onto the pessimistic one, discard possible partial results from the optimistic path
out->getChars().resize_assume_reserved(0);
out->getOffsets().resize_assume_reserved(0);
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
MapType<std::vector<size_t>> outdated_keys;
/// we are going to store every string separately
MapType<StringRef> map;
PODArray<StringRef> keys_array(rows_num);
size_t total_length = 0;
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
keys_array[row] = key;
const auto find_result = findCellIdx(key, now);
if (!find_result.valid)
{
outdated_keys[key].push_back(row);
if (find_result.outdated)
++cache_expired;
else
++cache_not_found;
}
else
{
++cache_hit;
const auto & cell_idx = find_result.cell_idx;
const auto & cell = cells[cell_idx];
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
if (!cell.isDefault())
map[key] = copyIntoArena(string_ref, temporary_keys_pool);
total_length += string_ref.size + 1;
}
}
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
query_count.fetch_add(rows_num, std::memory_order_relaxed);
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
/// request new values
if (!outdated_keys.empty())
{
std::vector<size_t> required_rows(outdated_keys.size());
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
{
return pair.getMapped().front();
});
update(
key_columns,
keys_array,
required_rows,
[&](const StringRef key, const size_t cell_idx)
{
const StringRef attribute_value = attribute_array[cell_idx];
/// We must copy key and value to own memory, because it may be replaced with another
/// in next iterations of inner loop of update.
const StringRef copied_key = copyIntoArena(key, temporary_keys_pool);
const StringRef copied_value = copyIntoArena(attribute_value, temporary_keys_pool);
map[copied_key] = copied_value;
total_length += (attribute_value.size + 1) * outdated_keys[key].size();
},
[&](const StringRef key, const size_t)
{
for (const auto row : outdated_keys[key])
total_length += get_default(row).size + 1;
});
}
out->getChars().reserve(total_length);
for (const auto row : ext::range(0, ext::size(keys_array)))
{
const StringRef key = keys_array[row];
const auto it = map.find(key);
const auto string_ref = it ? it->getMapped() : get_default(row);
out->insertData(string_ref.data, string_ref.size);
}
}
void getItemsString(
Attribute & attribute,
const Columns & key_columns,
ColumnString * out,
DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
template <typename PresentKeyHandler, typename AbsentKeyHandler>
void update(
@ -480,152 +194,7 @@ private:
const PODArray<StringRef> & in_keys,
const std::vector<size_t> & in_requested_rows,
PresentKeyHandler && on_cell_updated,
AbsentKeyHandler && on_key_not_found) const
{
MapType<bool> remaining_keys{in_requested_rows.size()};
for (const auto row : in_requested_rows)
remaining_keys.insert({in_keys[row], false});
std::uniform_int_distribution<UInt64> distribution(dict_lifetime.min_sec, dict_lifetime.max_sec);
const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
{
Stopwatch watch;
auto stream = source_ptr->loadKeys(in_key_columns, in_requested_rows);
stream->readPrefix();
const auto keys_size = dict_struct.key->size();
StringRefs keys(keys_size);
const auto attributes_size = attributes.size();
const auto now = std::chrono::system_clock::now();
while (const auto block = stream->read())
{
/// cache column pointers
const auto key_columns = ext::map<Columns>(
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
{
return block.safeGetByPosition(keys_size + attribute_idx).column;
});
const auto rows_num = block.rows();
for (const auto row : ext::range(0, rows_num))
{
auto key = allocKey(row, key_columns, keys);
const auto hash = StringRefHash{}(key);
const auto find_result = findCellIdx(key, now, hash);
const auto & cell_idx = find_result.cell_idx;
auto & cell = cells[cell_idx];
for (const auto attribute_idx : ext::range(0, attributes.size()))
{
const auto & attribute_column = *attribute_columns[attribute_idx];
auto & attribute = attributes[attribute_idx];
setAttributeValue(attribute, cell_idx, attribute_column[row]);
}
/// if cell id is zero and zero does not map to this cell, then the cell is unused
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
element_count.fetch_add(1, std::memory_order_relaxed);
/// handle memory allocated for old key
if (key == cell.key)
{
freeKey(key);
key = cell.key;
}
else
{
/// new key is different from the old one
if (cell.key.data)
freeKey(cell.key);
cell.key = key;
}
cell.hash = hash;
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
else
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
/// inform caller
on_cell_updated(key, cell_idx);
/// mark corresponding id as found
remaining_keys[key] = true;
}
}
stream->readSuffix();
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, in_requested_rows.size());
ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed());
}
size_t found_num = 0;
size_t not_found_num = 0;
const auto now = std::chrono::system_clock::now();
/// Check which ids have not been found and require setting null_value
for (const auto & key_found_pair : remaining_keys)
{
if (key_found_pair.getMapped())
{
++found_num;
continue;
}
++not_found_num;
auto key = key_found_pair.getKey();
const auto hash = StringRefHash{}(key);
const auto find_result = findCellIdx(key, now, hash);
const auto & cell_idx = find_result.cell_idx;
auto & cell = cells[cell_idx];
/// Set null_value for each attribute
for (auto & attribute : attributes)
setDefaultAttributeValue(attribute, cell_idx);
/// Check if cell had not been occupied before and increment element counter if it hadn't
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
element_count.fetch_add(1, std::memory_order_relaxed);
if (key == cell.key)
key = cell.key;
else
{
if (cell.key.data)
freeKey(cell.key);
/// copy key from temporary pool
key = copyKey(key);
cell.key = key;
}
cell.hash = hash;
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
else
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
cell.setDefault();
/// inform caller that the cell has not been found
on_key_not_found(key, cell_idx);
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num);
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num);
}
AbsentKeyHandler && on_key_not_found) const;
UInt64 getCellIdx(const StringRef key) const;

View File

@ -1,45 +0,0 @@
#include "ComplexKeyCacheDictionary.h"
namespace DB
{
ComplexKeyCacheDictionary::Attribute
ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
{
Attribute attr{type, {}, {}};
switch (type)
{
#define DISPATCH(TYPE) \
case AttributeUnderlyingType::ut##TYPE: \
attr.null_values = TYPE(null_value.get<NearestFieldType<TYPE>>()); /* NOLINT */ \
attr.arrays = std::make_unique<ContainerType<TYPE>>(size); /* NOLINT */ \
bytes_allocated += size * sizeof(TYPE); \
break;
DISPATCH(UInt8)
DISPATCH(UInt16)
DISPATCH(UInt32)
DISPATCH(UInt64)
DISPATCH(UInt128)
DISPATCH(Int8)
DISPATCH(Int16)
DISPATCH(Int32)
DISPATCH(Int64)
DISPATCH(Decimal32)
DISPATCH(Decimal64)
DISPATCH(Decimal128)
DISPATCH(Float32)
DISPATCH(Float64)
#undef DISPATCH
case AttributeUnderlyingType::utString:
attr.null_values = null_value.get<String>();
attr.arrays = std::make_unique<ContainerType<StringRef>>(size);
bytes_allocated += size * sizeof(StringRef);
if (!string_arena)
string_arena = std::make_unique<ArenaWithFreeLists>();
break;
}
return attr;
}
}

View File

@ -1,32 +0,0 @@
#include <Dictionaries/ComplexKeyCacheDictionary.h>
namespace DB
{
#define DEFINE(TYPE) \
void ComplexKeyCacheDictionary::get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
{ \
dict_struct.validateKeyTypes(key_types); \
auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
const auto null_value = std::get<TYPE>(attribute.null_values); \
getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t) { return null_value; }); \
}
DEFINE(UInt8)
DEFINE(UInt16)
DEFINE(UInt32)
DEFINE(UInt64)
DEFINE(UInt128)
DEFINE(Int8)
DEFINE(Int16)
DEFINE(Int32)
DEFINE(Int64)
DEFINE(Float32)
DEFINE(Float64)
DEFINE(Decimal32)
DEFINE(Decimal64)
DEFINE(Decimal128)
#undef DEFINE
}

View File

@ -1,35 +0,0 @@
#include <Dictionaries/ComplexKeyCacheDictionary.h>
namespace DB
{
#define DEFINE(TYPE) \
void ComplexKeyCacheDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
dict_struct.validateKeyTypes(key_types); \
auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t row) { return def[row]; }); \
}
DEFINE(UInt8)
DEFINE(UInt16)
DEFINE(UInt32)
DEFINE(UInt64)
DEFINE(UInt128)
DEFINE(Int8)
DEFINE(Int16)
DEFINE(Int32)
DEFINE(Int64)
DEFINE(Float32)
DEFINE(Float64)
DEFINE(Decimal32)
DEFINE(Decimal64)
DEFINE(Decimal128)
#undef DEFINE
}

View File

@ -1,35 +0,0 @@
#include <Dictionaries/ComplexKeyCacheDictionary.h>
namespace DB
{
#define DEFINE(TYPE) \
void ComplexKeyCacheDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const \
{ \
dict_struct.validateKeyTypes(key_types); \
auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t) { return def; }); \
}
DEFINE(UInt8)
DEFINE(UInt16)
DEFINE(UInt32)
DEFINE(UInt64)
DEFINE(UInt128)
DEFINE(Int8)
DEFINE(Int16)
DEFINE(Int32)
DEFINE(Int64)
DEFINE(Float32)
DEFINE(Float64)
DEFINE(Decimal32)
DEFINE(Decimal64)
DEFINE(Decimal128)
#undef DEFINE
}

View File

@ -1,78 +0,0 @@
#include "ComplexKeyCacheDictionary.h"
namespace DB
{
void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const size_t idx, const Field & value) const
{
switch (attribute.type)
{
case AttributeUnderlyingType::utUInt8:
std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = value.get<UInt64>();
break;
case AttributeUnderlyingType::utUInt16:
std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = value.get<UInt64>();
break;
case AttributeUnderlyingType::utUInt32:
std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = value.get<UInt64>();
break;
case AttributeUnderlyingType::utUInt64:
std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = value.get<UInt64>();
break;
case AttributeUnderlyingType::utUInt128:
std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = value.get<UInt128>();
break;
case AttributeUnderlyingType::utInt8:
std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::utInt16:
std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::utInt32:
std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::utInt64:
std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::utFloat32:
std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = value.get<Float64>();
break;
case AttributeUnderlyingType::utFloat64:
std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = value.get<Float64>();
break;
case AttributeUnderlyingType::utDecimal32:
std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = value.get<Decimal32>();
break;
case AttributeUnderlyingType::utDecimal64:
std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = value.get<Decimal64>();
break;
case AttributeUnderlyingType::utDecimal128:
std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = value.get<Decimal128>();
break;
case AttributeUnderlyingType::utString:
{
const auto & string = value.get<String>();
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
const auto & null_value_ref = std::get<String>(attribute.null_values);
/// free memory unless it points to a null_value
if (string_ref.data && string_ref.data != null_value_ref.data())
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
const auto str_size = string.size();
if (str_size != 0)
{
auto * str_ptr = string_arena->alloc(str_size);
std::copy(string.data(), string.data() + str_size, str_ptr);
string_ref = StringRef{str_ptr, str_size};
}
else
string_ref = {};
break;
}
}
}
}

View File

@ -1,71 +0,0 @@
#include "ComplexKeyCacheDictionary.h"
namespace DB
{
void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute, const size_t idx) const
{
switch (attribute.type)
{
case AttributeUnderlyingType::utUInt8:
std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = std::get<UInt8>(attribute.null_values);
break;
case AttributeUnderlyingType::utUInt16:
std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = std::get<UInt16>(attribute.null_values);
break;
case AttributeUnderlyingType::utUInt32:
std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = std::get<UInt32>(attribute.null_values);
break;
case AttributeUnderlyingType::utUInt64:
std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = std::get<UInt64>(attribute.null_values);
break;
case AttributeUnderlyingType::utUInt128:
std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = std::get<UInt128>(attribute.null_values);
break;
case AttributeUnderlyingType::utInt8:
std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = std::get<Int8>(attribute.null_values);
break;
case AttributeUnderlyingType::utInt16:
std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = std::get<Int16>(attribute.null_values);
break;
case AttributeUnderlyingType::utInt32:
std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = std::get<Int32>(attribute.null_values);
break;
case AttributeUnderlyingType::utInt64:
std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = std::get<Int64>(attribute.null_values);
break;
case AttributeUnderlyingType::utFloat32:
std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = std::get<Float32>(attribute.null_values);
break;
case AttributeUnderlyingType::utFloat64:
std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = std::get<Float64>(attribute.null_values);
break;
case AttributeUnderlyingType::utDecimal32:
std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = std::get<Decimal32>(attribute.null_values);
break;
case AttributeUnderlyingType::utDecimal64:
std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = std::get<Decimal64>(attribute.null_values);
break;
case AttributeUnderlyingType::utDecimal128:
std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = std::get<Decimal128>(attribute.null_values);
break;
case AttributeUnderlyingType::utString:
{
const auto & null_value_ref = std::get<String>(attribute.null_values);
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
if (string_ref.data != null_value_ref.data())
{
if (string_ref.data)
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
string_ref = StringRef{null_value_ref};
}
break;
}
}
}
}

View File

@ -3,6 +3,9 @@
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
#include <Core/Defines.h>
#include <Columns/ColumnNullable.h>
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypesDecimal.h>
namespace DB
{
@ -31,194 +34,151 @@ ComplexKeyDirectDictionary::ComplexKeyDirectDictionary(
createAttributes();
}
#define DECLARE(TYPE) \
void ComplexKeyDirectDictionary::get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
{ \
dict_struct.validateKeyTypes(key_types); \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
const auto null_value = std::get<TYPE>(attribute.null_values); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void ComplexKeyDirectDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
ColumnPtr ComplexKeyDirectDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const
{
dict_struct.validateKeyTypes(key_types);
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & null_value = std::get<StringRef>(attribute.null_values);
getItemsStringImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
[&](const size_t) { return String(null_value.data, null_value.size); });
}
#define DECLARE(TYPE) \
void ComplexKeyDirectDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
dict_struct.validateKeyTypes(key_types); \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void ComplexKeyDirectDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const
{
dict_struct.validateKeyTypes(key_types);
ColumnPtr result;
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
getItemsStringImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
[&](const size_t row) { const auto ref = def->getDataAt(row); return String(ref.data, ref.size); });
}
auto keys_size = key_columns.front()->size();
#define DECLARE(TYPE) \
void ComplexKeyDirectDictionary::get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const TYPE def, ResultArrayType<TYPE> & out) const \
{ \
dict_struct.validateKeyTypes(key_types); \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void ComplexKeyDirectDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const String & def, ColumnString * const out) const
{
dict_struct.validateKeyTypes(key_types);
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
ComplexKeyDirectDictionary::getItemsStringImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
[&](const size_t) { return def; });
}
void ComplexKeyDirectDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
{
dict_struct.validateKeyTypes(key_types);
const auto & attribute = attributes.front();
switch (attribute.type)
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to = nullptr;
if (attribute.is_nullable)
{
case AttributeUnderlyingType::utUInt8:
has<UInt8>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utUInt16:
has<UInt16>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utUInt32:
has<UInt32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utUInt64:
has<UInt64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utUInt128:
has<UInt128>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utInt8:
has<Int8>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utInt16:
has<Int16>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utInt32:
has<Int32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utInt64:
has<Int64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utFloat32:
has<Float32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utFloat64:
has<Float64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utString:
has<String>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utDecimal32:
has<Decimal32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utDecimal64:
has<Decimal64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utDecimal128:
has<Decimal128>(attribute, key_columns, out);
break;
col_null_map_to = ColumnUInt8::create(keys_size, false);
vec_null_map_to = &col_null_map_to->getData();
}
auto type_call = [&](const auto & dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<AttributeType, String>)
{
auto * out = column.get();
getItemsImpl<String, String>(
attribute,
key_columns,
[&](const size_t row, const String value, bool is_null)
{
if (attribute.is_nullable)
(*vec_null_map_to)[row] = is_null;
const auto ref = StringRef{value};
out->insertData(ref.data, ref.size);
},
default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsImpl<AttributeType, AttributeType>(
attribute,
key_columns,
[&](const size_t row, const auto value, bool is_null)
{
if (attribute.is_nullable)
(*vec_null_map_to)[row] = is_null;
out[row] = value;
},
default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
if (attribute.is_nullable)
{
result = ColumnNullable::create(result, std::move(col_null_map_to));
}
return result;
}
ColumnUInt8::Ptr ComplexKeyDirectDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
{
dict_struct.validateKeyTypes(key_types);
auto size = key_columns.front()->size();
auto result = ColumnUInt8::create(size);
auto& out = result->getData();
const auto rows = key_columns.front()->size();
const auto keys_size = dict_struct.key->size();
StringRefs keys_array(keys_size);
MapType<UInt8> has_key;
Arena temporary_keys_pool;
std::vector<size_t> to_load(rows);
PODArray<StringRef> keys(rows);
for (const auto row : ext::range(0, rows))
{
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
keys[row] = key;
has_key[key] = 0;
to_load[row] = row;
}
auto stream = source_ptr->loadKeys(key_columns, to_load);
stream->readPrefix();
while (const auto block = stream->read())
{
const auto columns = ext::map<Columns>(
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
Arena pool;
StringRefs keys_temp(keys_size);
const auto columns_size = columns.front()->size();
for (const auto row_idx : ext::range(0, columns_size))
{
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
if (has_key.has(key))
{
has_key[key] = 1;
}
}
}
stream->readSuffix();
for (const auto row : ext::range(0, rows))
{
out[row] = has_key[keys[row]];
}
query_count.fetch_add(rows, std::memory_order_relaxed);
return result;
}
void ComplexKeyDirectDictionary::createAttributes()
{
@ -229,7 +189,7 @@ void ComplexKeyDirectDictionary::createAttributes()
{
attribute_index_by_name.emplace(attribute.name, attributes.size());
attribute_name_by_index.emplace(attributes.size(), attribute.name);
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value, attribute.name));
attributes.push_back(createAttribute(attribute, attribute.null_value, attribute.name));
if (attribute.hierarchical)
throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
@ -237,7 +197,6 @@ void ComplexKeyDirectDictionary::createAttributes()
}
}
template <typename T>
void ComplexKeyDirectDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
@ -254,59 +213,19 @@ void ComplexKeyDirectDictionary::createAttributeImpl<String>(Attribute & attribu
}
ComplexKeyDirectDictionary::Attribute ComplexKeyDirectDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & attr_name)
ComplexKeyDirectDictionary::Attribute ComplexKeyDirectDictionary::createAttribute(
const DictionaryAttribute & attribute, const Field & null_value, const std::string & attr_name)
{
Attribute attr{type, {}, {}, attr_name};
Attribute attr{attribute.underlying_type, attribute.is_nullable, {}, {}, attr_name};
switch (type)
auto type_call = [&](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
createAttributeImpl<UInt8>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt16:
createAttributeImpl<UInt16>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt32:
createAttributeImpl<UInt32>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt64:
createAttributeImpl<UInt64>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::utInt8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::utInt16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::utInt32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::utInt64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat64:
createAttributeImpl<Float64>(attr, null_value);
break;
case AttributeUnderlyingType::utString:
createAttributeImpl<String>(attr, null_value);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
createAttributeImpl<AttributeType>(attr, null_value);
};
case AttributeUnderlyingType::utDecimal32:
createAttributeImpl<Decimal32>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
}
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
return attr;
}
@ -356,14 +275,18 @@ StringRef ComplexKeyDirectDictionary::placeKeysInPool(
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void ComplexKeyDirectDictionary::getItemsImpl(
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto rows = key_columns.front()->size();
const auto keys_size = dict_struct.key->size();
StringRefs keys_array(keys_size);
MapType<OutputType> value_by_key;
HashMapWithSavedHash<StringRef, bool, StringRefHash> value_is_null;
Arena temporary_keys_pool;
std::vector<size_t> to_load(rows);
PODArray<StringRef> keys(rows);
@ -372,8 +295,9 @@ void ComplexKeyDirectDictionary::getItemsImpl(
{
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
keys[row] = key;
value_by_key[key] = get_default(row);
value_by_key[key] = static_cast<AttributeType>(default_value_extractor[row]);
to_load[row] = row;
value_is_null[key] = false;
}
auto stream = source_ptr->loadKeys(key_columns, to_load);
@ -392,6 +316,11 @@ void ComplexKeyDirectDictionary::getItemsImpl(
});
for (const size_t attribute_idx : ext::range(0, attributes.size()))
{
if (attribute.name != attribute_name_by_index.at(attribute_idx))
{
continue;
}
const IColumn & attribute_column = *attribute_columns[attribute_idx];
Arena pool;
@ -402,17 +331,15 @@ void ComplexKeyDirectDictionary::getItemsImpl(
for (const auto row_idx : ext::range(0, columns_size))
{
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
if (value_by_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
if (value_by_key.has(key))
{
if (attribute.type == AttributeUnderlyingType::utFloat32)
{
value_by_key[key] = static_cast<Float32>(attribute_column[row_idx].template get<Float64>());
}
auto value = attribute_column[row_idx];
if (value.isNull())
value_is_null[key] = true;
else
{
value_by_key[key] = static_cast<OutputType>(attribute_column[row_idx].template get<AttributeType>());
}
value_by_key[key] = static_cast<OutputType>(value.template get<NearestFieldType<AttributeType>>());
}
}
}
@ -422,78 +349,13 @@ void ComplexKeyDirectDictionary::getItemsImpl(
for (const auto row : ext::range(0, rows))
{
set_value(row, value_by_key[keys[row]]);
auto key = keys[row];
set_value(row, value_by_key[key], value_is_null[key]);
}
query_count.fetch_add(rows, std::memory_order_relaxed);
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void ComplexKeyDirectDictionary::getItemsStringImpl(
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
{
const auto rows = key_columns.front()->size();
const auto keys_size = dict_struct.key->size();
StringRefs keys_array(keys_size);
MapType<String> value_by_key;
Arena temporary_keys_pool;
std::vector<size_t> to_load(rows);
PODArray<StringRef> keys(rows);
for (const auto row : ext::range(0, rows))
{
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
keys[row] = key;
value_by_key[key] = get_default(row);
to_load[row] = row;
}
auto stream = source_ptr->loadKeys(key_columns, to_load);
const auto attributes_size = attributes.size();
stream->readPrefix();
while (const auto block = stream->read())
{
const auto columns = ext::map<Columns>(
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
{
return block.safeGetByPosition(keys_size + attribute_idx).column;
});
for (const size_t attribute_idx : ext::range(0, attributes.size()))
{
const IColumn & attribute_column = *attribute_columns[attribute_idx];
Arena pool;
StringRefs keys_temp(keys_size);
const auto columns_size = columns.front()->size();
for (const auto row_idx : ext::range(0, columns_size))
{
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
if (value_by_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
{
const String from_source = attribute_column[row_idx].template get<String>();
value_by_key[key] = from_source;
}
}
}
}
stream->readSuffix();
for (const auto row : ext::range(0, rows))
{
set_value(row, value_by_key[keys[row]]);
}
query_count.fetch_add(rows, std::memory_order_relaxed);
}
const ComplexKeyDirectDictionary::Attribute & ComplexKeyDirectDictionary::getAttribute(const std::string & attribute_name) const
{
const auto it = attribute_index_by_name.find(attribute_name);
@ -503,65 +365,6 @@ const ComplexKeyDirectDictionary::Attribute & ComplexKeyDirectDictionary::getAtt
return attributes[it->second];
}
template <typename T>
void ComplexKeyDirectDictionary::has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const
{
const auto rows = key_columns.front()->size();
const auto keys_size = dict_struct.key->size();
StringRefs keys_array(keys_size);
MapType<UInt8> has_key;
Arena temporary_keys_pool;
std::vector<size_t> to_load(rows);
PODArray<StringRef> keys(rows);
for (const auto row : ext::range(0, rows))
{
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
keys[row] = key;
has_key[key] = 0;
to_load[row] = row;
}
auto stream = source_ptr->loadKeys(key_columns, to_load);
stream->readPrefix();
while (const auto block = stream->read())
{
const auto columns = ext::map<Columns>(
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
for (const size_t attribute_idx : ext::range(0, attributes.size()))
{
Arena pool;
StringRefs keys_temp(keys_size);
const auto columns_size = columns.front()->size();
for (const auto row_idx : ext::range(0, columns_size))
{
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
if (has_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
{
has_key[key] = 1;
}
}
}
}
stream->readSuffix();
for (const auto row : ext::range(0, rows))
{
out[row] = has_key[keys[row]];
}
query_count.fetch_add(rows, std::memory_order_relaxed);
}
BlockInputStreamPtr ComplexKeyDirectDictionary::getBlockInputStream(const Names & /* column_names */, size_t /* max_block_size */) const
{
return source_ptr->loadAll();

View File

@ -12,14 +12,13 @@
#include <ext/range.h>
#include <ext/size.h>
#include <ext/map.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryStructure.h"
#include "DictionaryHelpers.h"
namespace DB
{
using BlockPtr = std::shared_ptr<Block>;
class ComplexKeyDirectDictionary final : public IDictionaryBase
{
@ -60,78 +59,16 @@ public:
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
}
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const override;
void getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const TYPE def, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const String & def, ColumnString * const out) const;
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
@ -142,6 +79,8 @@ private:
struct Attribute final
{
AttributeUnderlyingType type;
bool is_nullable;
std::variant<
UInt8,
UInt16,
@ -168,27 +107,21 @@ private:
template <typename T>
void addAttributeSize(const Attribute & attribute);
void calculateBytesAllocated();
template <typename T>
void createAttributeImpl(Attribute & attribute, const Field & null_value);
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & name);
static Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value, const std::string & name);
template <typename Pool>
StringRef placeKeysInPool(
const size_t row, const Columns & key_columns, StringRefs & keys, const std::vector<DictionaryAttribute> & key_attributes, Pool & pool) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsStringImpl(
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename T>
void resize(Attribute & attribute, const Key id);
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename T>
void setAttributeValueImpl(Attribute & attribute, const Key id, const T & value);
@ -197,9 +130,6 @@ private:
const Attribute & getAttribute(const std::string & attribute_name) const;
template <typename T>
void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;
const DictionaryLifetime dict_lifetime;

View File

@ -1,6 +1,10 @@
#include "ComplexKeyHashedDictionary.h"
#include <ext/map.h>
#include <ext/range.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnNullable.h>
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypesDecimal.h>
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
@ -32,216 +36,111 @@ ComplexKeyHashedDictionary::ComplexKeyHashedDictionary(
calculateBytesAllocated();
}
#define DECLARE(TYPE) \
void ComplexKeyHashedDictionary::get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
{ \
dict_struct.validateKeyTypes(key_types); \
\
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
const auto null_value = std::get<TYPE>(attribute.null_values); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, \
key_columns, \
[&](const size_t row, const auto value) { out[row] = value; }, \
[&](const size_t) { return null_value; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void ComplexKeyHashedDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
{
dict_struct.validateKeyTypes(key_types);
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return null_value; });
}
#define DECLARE(TYPE) \
void ComplexKeyHashedDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
dict_struct.validateKeyTypes(key_types); \
\
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, \
key_columns, \
[&](const size_t row, const auto value) { out[row] = value; }, \
[&](const size_t row) { return def[row]; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void ComplexKeyHashedDictionary::getString(
ColumnPtr ComplexKeyHashedDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const
const ColumnPtr default_values_column) const
{
dict_struct.validateKeyTypes(key_types);
ColumnPtr result;
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t row) { return def->getDataAt(row); });
}
auto keys_size = key_columns.front()->size();
#define DECLARE(TYPE) \
void ComplexKeyHashedDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const \
{ \
dict_struct.validateKeyTypes(key_types); \
\
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to = nullptr;
if (attribute.is_nullable)
{
col_null_map_to = ColumnUInt8::create(keys_size, false);
vec_null_map_to = &col_null_map_to->getData();
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void ComplexKeyHashedDictionary::getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const
{
dict_struct.validateKeyTypes(key_types);
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return StringRef{def}; });
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<AttributeType, String>)
{
auto * out = column.get();
getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t row, const StringRef value, bool is_null)
{
if (attribute.is_nullable)
(*vec_null_map_to)[row] = is_null;
out->insertData(value.data, value.size);
},
default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsImpl<AttributeType, AttributeType>(
attribute,
key_columns,
[&](const size_t row, const auto value, bool is_null)
{
if (attribute.is_nullable)
(*vec_null_map_to)[row] = is_null;
out[row] = value;
},
default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
if (attribute.is_nullable)
{
result = ColumnNullable::create(result, std::move(col_null_map_to));
}
return result;
}
void ComplexKeyHashedDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
ColumnUInt8::Ptr ComplexKeyHashedDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
{
dict_struct.validateKeyTypes(key_types);
auto size = key_columns.front()->size();
auto result = ColumnUInt8::create(size);
auto& out = result->getData();
const auto & attribute = attributes.front();
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
has<UInt8>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utUInt16:
has<UInt16>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utUInt32:
has<UInt32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utUInt64:
has<UInt64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utUInt128:
has<UInt128>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utInt8:
has<Int8>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utInt16:
has<Int16>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utInt32:
has<Int32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utInt64:
has<Int64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utFloat32:
has<Float32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utFloat64:
has<Float64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utString:
has<StringRef>(attribute, key_columns, out);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
case AttributeUnderlyingType::utDecimal32:
has<Decimal32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utDecimal64:
has<Decimal64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utDecimal128:
has<Decimal128>(attribute, key_columns, out);
break;
}
has<ValueType>(attribute, key_columns, out);
};
callOnDictionaryAttributeType(attribute.type, type_call);
return result;
}
void ComplexKeyHashedDictionary::createAttributes()
@ -252,7 +151,7 @@ void ComplexKeyHashedDictionary::createAttributes()
for (const auto & attribute : dict_struct.attributes)
{
attribute_index_by_name.emplace(attribute.name, attributes.size());
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
attributes.push_back(createAttribute(attribute, attribute.null_value));
if (attribute.hierarchical)
throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
@ -407,66 +306,30 @@ void ComplexKeyHashedDictionary::addAttributeSize(const Attribute & attribute)
bucket_count = map_ref.getBufferSizeInCells();
}
template <>
void ComplexKeyHashedDictionary::addAttributeSize<String>(const Attribute & attribute)
{
const auto & map_ref = std::get<ContainerType<StringRef>>(attribute.maps);
bytes_allocated += sizeof(ContainerType<StringRef>) + map_ref.getBufferSizeInBytes();
bucket_count = map_ref.getBufferSizeInCells();
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
}
void ComplexKeyHashedDictionary::calculateBytesAllocated()
{
bytes_allocated += attributes.size() * sizeof(attributes.front());
for (const auto & attribute : attributes)
{
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
addAttributeSize<UInt8>(attribute);
break;
case AttributeUnderlyingType::utUInt16:
addAttributeSize<UInt16>(attribute);
break;
case AttributeUnderlyingType::utUInt32:
addAttributeSize<UInt32>(attribute);
break;
case AttributeUnderlyingType::utUInt64:
addAttributeSize<UInt64>(attribute);
break;
case AttributeUnderlyingType::utUInt128:
addAttributeSize<UInt128>(attribute);
break;
case AttributeUnderlyingType::utInt8:
addAttributeSize<Int8>(attribute);
break;
case AttributeUnderlyingType::utInt16:
addAttributeSize<Int16>(attribute);
break;
case AttributeUnderlyingType::utInt32:
addAttributeSize<Int32>(attribute);
break;
case AttributeUnderlyingType::utInt64:
addAttributeSize<Int64>(attribute);
break;
case AttributeUnderlyingType::utFloat32:
addAttributeSize<Float32>(attribute);
break;
case AttributeUnderlyingType::utFloat64:
addAttributeSize<Float64>(attribute);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
addAttributeSize<Decimal32>(attribute);
break;
case AttributeUnderlyingType::utDecimal64:
addAttributeSize<Decimal64>(attribute);
break;
case AttributeUnderlyingType::utDecimal128:
addAttributeSize<Decimal128>(attribute);
break;
addAttributeSize<AttributeType>(attribute);
};
case AttributeUnderlyingType::utString:
{
addAttributeSize<StringRef>(attribute);
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
break;
}
}
callOnDictionaryAttributeType(attribute.type, type_call);
}
bytes_allocated += keys_pool.size();
@ -479,73 +342,41 @@ void ComplexKeyHashedDictionary::createAttributeImpl(Attribute & attribute, cons
attribute.maps.emplace<ContainerType<T>>();
}
ComplexKeyHashedDictionary::Attribute
ComplexKeyHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
template <>
void ComplexKeyHashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
{
Attribute attr{type, {}, {}, {}};
attribute.string_arena = std::make_unique<Arena>();
const String & string = null_value.get<String>();
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
attribute.maps.emplace<ContainerType<StringRef>>();
}
switch (type)
ComplexKeyHashedDictionary::Attribute
ComplexKeyHashedDictionary::createAttribute(const DictionaryAttribute & attribute, const Field & null_value)
{
auto nullable_set = attribute.is_nullable ? std::make_unique<NullableSet>() : nullptr;
Attribute attr{attribute.underlying_type, attribute.is_nullable, std::move(nullable_set), {}, {}, {}};
auto type_call = [&](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
createAttributeImpl<UInt8>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt16:
createAttributeImpl<UInt16>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt32:
createAttributeImpl<UInt32>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt64:
createAttributeImpl<UInt64>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::utInt8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::utInt16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::utInt32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::utInt64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat64:
createAttributeImpl<Float64>(attr, null_value);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
createAttributeImpl<AttributeType>(attr, null_value);
};
case AttributeUnderlyingType::utDecimal32:
createAttributeImpl<Decimal32>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
case AttributeUnderlyingType::utString:
{
attr.null_values = null_value.get<String>();
attr.maps.emplace<ContainerType<StringRef>>();
attr.string_arena = std::make_unique<Arena>();
break;
}
}
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
return attr;
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void ComplexKeyHashedDictionary::getItemsImpl(
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto & attr = std::get<ContainerType<AttributeType>>(attribute.maps);
@ -560,7 +391,18 @@ void ComplexKeyHashedDictionary::getItemsImpl(
const auto key = placeKeysInPool(i, key_columns, keys, temporary_keys_pool);
const auto it = attr.find(key);
set_value(i, it ? static_cast<OutputType>(it->getMapped()) : get_default(i));
if (it)
{
set_value(i, static_cast<OutputType>(it->getMapped()), false);
}
else
{
if (attribute.is_nullable && attribute.nullable_set->find(key) != nullptr)
set_value(i, default_value_extractor[i], true);
else
set_value(i, default_value_extractor[i], false);
}
/// free memory allocated for the key
temporary_keys_pool.rollback(key.size);
@ -578,51 +420,42 @@ bool ComplexKeyHashedDictionary::setAttributeValueImpl(Attribute & attribute, co
return pair.second;
}
template <>
bool ComplexKeyHashedDictionary::setAttributeValueImpl<String>(Attribute & attribute, const StringRef key, const String value)
{
const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
return setAttributeValueImpl<StringRef>(attribute, key, StringRef{string_in_arena, value.size()});
}
bool ComplexKeyHashedDictionary::setAttributeValue(Attribute & attribute, const StringRef key, const Field & value)
{
switch (attribute.type)
bool result = false;
auto type_call = [&](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
return setAttributeValueImpl<UInt8>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::utUInt16:
return setAttributeValueImpl<UInt16>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::utUInt32:
return setAttributeValueImpl<UInt32>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::utUInt64:
return setAttributeValueImpl<UInt64>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::utUInt128:
return setAttributeValueImpl<UInt128>(attribute, key, value.get<UInt128>());
case AttributeUnderlyingType::utInt8:
return setAttributeValueImpl<Int8>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::utInt16:
return setAttributeValueImpl<Int16>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::utInt32:
return setAttributeValueImpl<Int32>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::utInt64:
return setAttributeValueImpl<Int64>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::utFloat32:
return setAttributeValueImpl<Float32>(attribute, key, value.get<Float64>());
case AttributeUnderlyingType::utFloat64:
return setAttributeValueImpl<Float64>(attribute, key, value.get<Float64>());
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
return setAttributeValueImpl<Decimal32>(attribute, key, value.get<Decimal32>());
case AttributeUnderlyingType::utDecimal64:
return setAttributeValueImpl<Decimal64>(attribute, key, value.get<Decimal64>());
case AttributeUnderlyingType::utDecimal128:
return setAttributeValueImpl<Decimal128>(attribute, key, value.get<Decimal128>());
case AttributeUnderlyingType::utString:
if (attribute.is_nullable)
{
auto & map = std::get<ContainerType<StringRef>>(attribute.maps);
const auto & string = value.get<String>();
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
const auto pair = map.insert({key, StringRef{string_in_arena, string.size()}});
return pair.second;
if (value.isNull())
{
attribute.nullable_set->insert(key);
result = true;
return;
}
else
{
attribute.nullable_set->erase(key);
}
}
}
return {};
result = setAttributeValueImpl<AttributeType>(attribute, key, value.get<NearestFieldType<AttributeType>>());
};
callOnDictionaryAttributeType(attribute.type, type_call);
return result;
}
const ComplexKeyHashedDictionary::Attribute & ComplexKeyHashedDictionary::getAttribute(const std::string & attribute_name) const
@ -673,6 +506,9 @@ void ComplexKeyHashedDictionary::has(const Attribute & attribute, const Columns
const auto it = attr.find(key);
out[i] = static_cast<bool>(it);
if (attribute.is_nullable && !out[i])
out[i] = attribute.nullable_set->find(key) != nullptr;
/// free memory allocated for the key
temporary_keys_pool.rollback(key.size);
}
@ -684,41 +520,26 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys() const
{
const Attribute & attribute = attributes.front();
switch (attribute.type)
{
case AttributeUnderlyingType::utUInt8:
return getKeys<UInt8>(attribute);
case AttributeUnderlyingType::utUInt16:
return getKeys<UInt16>(attribute);
case AttributeUnderlyingType::utUInt32:
return getKeys<UInt32>(attribute);
case AttributeUnderlyingType::utUInt64:
return getKeys<UInt64>(attribute);
case AttributeUnderlyingType::utUInt128:
return getKeys<UInt128>(attribute);
case AttributeUnderlyingType::utInt8:
return getKeys<Int8>(attribute);
case AttributeUnderlyingType::utInt16:
return getKeys<Int16>(attribute);
case AttributeUnderlyingType::utInt32:
return getKeys<Int32>(attribute);
case AttributeUnderlyingType::utInt64:
return getKeys<Int64>(attribute);
case AttributeUnderlyingType::utFloat32:
return getKeys<Float32>(attribute);
case AttributeUnderlyingType::utFloat64:
return getKeys<Float64>(attribute);
case AttributeUnderlyingType::utString:
return getKeys<StringRef>(attribute);
std::vector<StringRef> result;
case AttributeUnderlyingType::utDecimal32:
return getKeys<Decimal32>(attribute);
case AttributeUnderlyingType::utDecimal64:
return getKeys<Decimal64>(attribute);
case AttributeUnderlyingType::utDecimal128:
return getKeys<Decimal128>(attribute);
}
return {};
auto type_call = [&](const auto & dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
if constexpr (std::is_same_v<AttributeType, String>)
{
result = getKeys<StringRef>(attribute);
}
else
{
result = getKeys<AttributeType>(attribute);
}
};
callOnDictionaryAttributeType(attribute.type, type_call);
return result;
}
template <typename T>
@ -730,12 +551,18 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys(const Attribute & att
for (const auto & key : attr)
keys.push_back(key.getKey());
if (attribute.is_nullable)
{
for (const auto & key: *attribute.nullable_set)
keys.push_back(key.getKey());
}
return keys;
}
BlockInputStreamPtr ComplexKeyHashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyHashedDictionary, UInt64>;
using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getKeys(), column_names);
}

View File

@ -7,17 +7,17 @@
#include <Columns/ColumnString.h>
#include <Common/Arena.h>
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashSet.h>
#include <Core/Block.h>
#include <common/StringRef.h>
#include <ext/range.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryStructure.h"
#include "DictionaryHelpers.h"
namespace DB
{
using BlockPtr = std::shared_ptr<Block>;
class ComplexKeyHashedDictionary final : public IDictionaryBase
{
@ -60,91 +60,16 @@ public:
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
}
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name,
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const;
const ColumnPtr default_values_column) const override;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const;
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
@ -152,9 +77,14 @@ private:
template <typename Value>
using ContainerType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
using NullableSet = HashSetWithSavedHash<StringRef, StringRefHash>;
struct Attribute final
{
AttributeUnderlyingType type;
bool is_nullable;
std::unique_ptr<NullableSet> nullable_set;
std::variant<
UInt8,
UInt16,
@ -170,7 +100,7 @@ private:
Decimal128,
Float32,
Float64,
String>
StringRef>
null_values;
std::variant<
ContainerType<UInt8>,
@ -206,18 +136,21 @@ private:
void calculateBytesAllocated();
template <typename T>
void createAttributeImpl(Attribute & attribute, const Field & null_value);
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
static Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value);
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void
getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename T>
bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
static bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
bool setAttributeValue(Attribute & attribute, const StringRef key, const Field & value);
static bool setAttributeValue(Attribute & attribute, const StringRef key, const Field & value);
const Attribute & getAttribute(const std::string & attribute_name) const;

View File

@ -25,12 +25,10 @@ namespace ErrorCodes
/* BlockInputStream implementation for external dictionaries
* read() returns blocks consisting of the in-memory contents of the dictionaries
*/
template <typename DictionaryType, typename Key>
template <typename Key>
class DictionaryBlockInputStream : public DictionaryBlockInputStreamBase
{
public:
using DictionaryPtr = std::shared_ptr<DictionaryType const>;
DictionaryBlockInputStream(
std::shared_ptr<const IDictionaryBase> dictionary, UInt64 max_block_size, PaddedPODArray<Key> && ids, const Names & column_names);
@ -60,111 +58,9 @@ protected:
Block getBlock(size_t start, size_t size) const override;
private:
// pointer types to getXXX functions
// for single key dictionaries
template <typename Type>
using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, PaddedPODArray<Type> &) const;
template <typename Type>
using DictionaryDecimalGetter
= void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, DecimalPaddedPODArray<Type> &) const;
using DictionaryStringGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, ColumnString *) const;
// for complex complex key dictionaries
template <typename Type>
using GetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, PaddedPODArray<Type> & out) const;
template <typename Type>
using DecimalGetterByKey
= void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, DecimalPaddedPODArray<Type> & out) const;
using StringGetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, ColumnString * out) const;
// call getXXX
// for single key dictionaries
template <typename Type, typename Container>
void callGetter(
DictionaryGetter<Type> getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
template <typename Type, typename Container>
void callGetter(
DictionaryDecimalGetter<Type> getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
template <typename Container>
void callGetter(
DictionaryStringGetter getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
// for complex complex key dictionaries
template <typename Type, typename Container>
void callGetter(
GetterByKey<Type> getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
template <typename Type, typename Container>
void callGetter(
DecimalGetterByKey<Type> getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
template <typename Container>
void callGetter(
StringGetterByKey getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
template <template <typename> class Getter, template <typename> class DecimalGetter, typename StringGetter>
Block
fillBlock(const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const;
template <typename AttributeType, typename Getter>
ColumnPtr getColumnFromAttribute(
Getter getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
template <typename Getter>
ColumnPtr getColumnFromStringAttribute(
Getter getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
ColumnPtr getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const;
void fillKeyColumns(
@ -174,65 +70,54 @@ private:
const DictionaryStructure & dictionary_structure,
ColumnsWithTypeAndName & columns) const;
DictionaryPtr dictionary;
std::shared_ptr<const IDictionaryBase> dictionary;
Names column_names;
PaddedPODArray<Key> ids;
ColumnsWithTypeAndName key_columns;
Poco::Logger * logger;
using FillBlockFunction = Block (DictionaryBlockInputStream<DictionaryType, Key>::*)(
const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const;
FillBlockFunction fill_block_function;
Columns data_columns;
GetColumnsFunction get_key_columns_function;
GetColumnsFunction get_view_columns_function;
enum class DictionaryKeyType
enum class DictionaryInputStreamKeyType
{
Id,
ComplexKey,
Callback
};
DictionaryKeyType key_type;
DictionaryInputStreamKeyType key_type;
};
template <typename DictionaryType, typename Key>
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
template <typename Key>
DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
std::shared_ptr<const IDictionaryBase> dictionary_, UInt64 max_block_size_, PaddedPODArray<Key> && ids_, const Names & column_names_)
: DictionaryBlockInputStreamBase(ids_.size(), max_block_size_)
, dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
, dictionary(dictionary_)
, column_names(column_names_)
, ids(std::move(ids_))
, logger(&Poco::Logger::get("DictionaryBlockInputStream"))
, fill_block_function(
&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<DictionaryGetter, DictionaryDecimalGetter, DictionaryStringGetter>)
, key_type(DictionaryKeyType::Id)
, key_type(DictionaryInputStreamKeyType::Id)
{
}
template <typename DictionaryType, typename Key>
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
template <typename Key>
DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
std::shared_ptr<const IDictionaryBase> dictionary_,
UInt64 max_block_size_,
const std::vector<StringRef> & keys,
const Names & column_names_)
: DictionaryBlockInputStreamBase(keys.size(), max_block_size_)
, dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
, dictionary(dictionary_)
, column_names(column_names_)
, logger(&Poco::Logger::get("DictionaryBlockInputStream"))
, fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>)
, key_type(DictionaryKeyType::ComplexKey)
, key_type(DictionaryInputStreamKeyType::ComplexKey)
{
const DictionaryStructure & dictionaty_structure = dictionary->getStructure();
fillKeyColumns(keys, 0, keys.size(), dictionaty_structure, key_columns);
const DictionaryStructure & dictionary_structure = dictionary->getStructure();
fillKeyColumns(keys, 0, keys.size(), dictionary_structure, key_columns);
}
template <typename DictionaryType, typename Key>
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
template <typename Key>
DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
std::shared_ptr<const IDictionaryBase> dictionary_,
UInt64 max_block_size_,
const Columns & data_columns_,
@ -240,24 +125,23 @@ DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
GetColumnsFunction && get_key_columns_function_,
GetColumnsFunction && get_view_columns_function_)
: DictionaryBlockInputStreamBase(data_columns_.front()->size(), max_block_size_)
, dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
, dictionary(dictionary_)
, column_names(column_names_)
, logger(&Poco::Logger::get("DictionaryBlockInputStream"))
, fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>)
, data_columns(data_columns_)
, get_key_columns_function(get_key_columns_function_)
, get_view_columns_function(get_view_columns_function_)
, key_type(DictionaryKeyType::Callback)
, get_key_columns_function(std::move(get_key_columns_function_))
, get_view_columns_function(std::move(get_view_columns_function_))
, key_type(DictionaryInputStreamKeyType::Callback)
{
}
template <typename DictionaryType, typename Key>
Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, size_t length) const
template <typename Key>
Block DictionaryBlockInputStream<Key>::getBlock(size_t start, size_t length) const
{
/// TODO: Rewrite
switch (key_type)
{
case DictionaryKeyType::ComplexKey:
case DictionaryInputStreamKeyType::ComplexKey:
{
Columns columns;
ColumnsWithTypeAndName view_columns;
@ -268,16 +152,16 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, si
columns.emplace_back(column);
view_columns.emplace_back(column, key_column.type, key_column.name);
}
return (this->*fill_block_function)({}, columns, {}, std::move(view_columns));
return fillBlock({}, columns, {}, std::move(view_columns));
}
case DictionaryKeyType::Id:
case DictionaryInputStreamKeyType::Id:
{
PaddedPODArray<Key> ids_to_fill(ids.begin() + start, ids.begin() + start + length);
return (this->*fill_block_function)(ids_to_fill, {}, {}, {});
return fillBlock(ids_to_fill, {}, {}, {});
}
case DictionaryKeyType::Callback:
case DictionaryInputStreamKeyType::Callback:
{
Columns columns;
columns.reserve(data_columns.size());
@ -294,102 +178,15 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, si
columns.push_back(key_column.column);
types.push_back(key_column.type);
}
return (this->*fill_block_function)({}, columns, types, std::move(view_with_type_and_name));
return fillBlock({}, columns, types, std::move(view_with_type_and_name));
}
}
throw Exception("Unexpected DictionaryKeyType.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Unexpected DictionaryInputStreamKeyType.", ErrorCodes::LOGICAL_ERROR);
}
template <typename DictionaryType, typename Key>
template <typename Type, typename Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
DictionaryGetter<Type> getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & /*keys*/,
const DataTypes & /*data_types*/,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{
(dict.*getter)(attribute.name, ids_to_fill, container);
}
template <typename DictionaryType, typename Key>
template <typename Type, typename Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
DictionaryDecimalGetter<Type> getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & /*keys*/,
const DataTypes & /*data_types*/,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{
(dict.*getter)(attribute.name, ids_to_fill, container);
}
template <typename DictionaryType, typename Key>
template <typename Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
DictionaryStringGetter getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & /*keys*/,
const DataTypes & /*data_types*/,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{
(dict.*getter)(attribute.name, ids_to_fill, container);
}
template <typename DictionaryType, typename Key>
template <typename Type, typename Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
GetterByKey<Type> getter,
const PaddedPODArray<Key> & /*ids_to_fill*/,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{
(dict.*getter)(attribute.name, keys, data_types, container);
}
template <typename DictionaryType, typename Key>
template <typename Type, typename Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
DecimalGetterByKey<Type> getter,
const PaddedPODArray<Key> & /*ids_to_fill*/,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{
(dict.*getter)(attribute.name, keys, data_types, container);
}
template <typename DictionaryType, typename Key>
template <typename Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
StringGetterByKey getter,
const PaddedPODArray<Key> & /*ids_to_fill*/,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{
(dict.*getter)(attribute.name, keys, data_types, container);
}
template <typename DictionaryType, typename Key>
template <template <typename> class Getter, template <typename> class DecimalGetter, typename StringGetter>
Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
template <typename Key>
Block DictionaryBlockInputStream<Key>::fillBlock(
const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const
{
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
@ -408,9 +205,14 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
block_columns.push_back(column);
const DictionaryStructure & structure = dictionary->getStructure();
ColumnPtr ids_column = getColumnFromIds(ids_to_fill);
if (structure.id && names.find(structure.id->name) != names.end())
block_columns.emplace_back(getColumnFromIds(ids_to_fill), std::make_shared<DataTypeUInt64>(), structure.id->name);
{
block_columns.emplace_back(ids_column, std::make_shared<DataTypeUInt64>(), structure.id->name);
}
auto dictionary_key_type = dictionary->getKeyType();
for (const auto idx : ext::range(0, structure.attributes.size()))
{
@ -418,126 +220,35 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
if (names.find(attribute.name) != names.end())
{
ColumnPtr column;
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
column = getColumnFromAttribute<TYPE, Getter<TYPE>>(&DictionaryType::get##TYPE, ids_to_fill, keys, data_types, attribute, *dictionary)
switch (attribute.underlying_type)
if (dictionary_key_type == DictionaryKeyType::simple)
{
case AttributeUnderlyingType::utUInt8:
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
break;
case AttributeUnderlyingType::utUInt16:
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
break;
case AttributeUnderlyingType::utUInt32:
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
break;
case AttributeUnderlyingType::utUInt64:
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
break;
case AttributeUnderlyingType::utUInt128:
GET_COLUMN_FORM_ATTRIBUTE(UInt128);
break;
case AttributeUnderlyingType::utInt8:
GET_COLUMN_FORM_ATTRIBUTE(Int8);
break;
case AttributeUnderlyingType::utInt16:
GET_COLUMN_FORM_ATTRIBUTE(Int16);
break;
case AttributeUnderlyingType::utInt32:
GET_COLUMN_FORM_ATTRIBUTE(Int32);
break;
case AttributeUnderlyingType::utInt64:
GET_COLUMN_FORM_ATTRIBUTE(Int64);
break;
case AttributeUnderlyingType::utFloat32:
GET_COLUMN_FORM_ATTRIBUTE(Float32);
break;
case AttributeUnderlyingType::utFloat64:
GET_COLUMN_FORM_ATTRIBUTE(Float64);
break;
case AttributeUnderlyingType::utDecimal32:
{
column = getColumnFromAttribute<Decimal32, DecimalGetter<Decimal32>>(
&DictionaryType::getDecimal32, ids_to_fill, keys, data_types, attribute, *dictionary);
break;
}
case AttributeUnderlyingType::utDecimal64:
{
column = getColumnFromAttribute<Decimal64, DecimalGetter<Decimal64>>(
&DictionaryType::getDecimal64, ids_to_fill, keys, data_types, attribute, *dictionary);
break;
}
case AttributeUnderlyingType::utDecimal128:
{
column = getColumnFromAttribute<Decimal128, DecimalGetter<Decimal128>>(
&DictionaryType::getDecimal128, ids_to_fill, keys, data_types, attribute, *dictionary);
break;
}
case AttributeUnderlyingType::utString:
{
column = getColumnFromStringAttribute<StringGetter>(
&DictionaryType::getString, ids_to_fill, keys, data_types, attribute, *dictionary);
break;
}
column = dictionary->getColumn(
attribute.name,
attribute.type,
{ids_column},
{std::make_shared<DataTypeUInt64>()},
nullptr /* default_values_column */);
}
#undef GET_COLUMN_FORM_ATTRIBUTE
else
{
column = dictionary->getColumn(
attribute.name,
attribute.type,
keys,
data_types,
nullptr /* default_values_column*/);
}
block_columns.emplace_back(column, attribute.type, attribute.name);
}
}
return Block(block_columns);
}
template <typename DictionaryType, typename Key>
template <typename AttributeType, typename Getter>
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribute(
Getter getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{
if constexpr (IsDecimalNumber<AttributeType>)
{
auto size = ids_to_fill.size();
if (!keys.empty())
size = keys.front()->size();
auto column = ColumnDecimal<AttributeType>::create(size, 0); /// NOTE: There's wrong scale here, but it's unused.
callGetter(getter, ids_to_fill, keys, data_types, column->getData(), attribute, dict);
return column;
}
else
{
auto size = ids_to_fill.size();
if (!keys.empty())
size = keys.front()->size();
auto column_vector = ColumnVector<AttributeType>::create(size);
callGetter(getter, ids_to_fill, keys, data_types, column_vector->getData(), attribute, dict);
return column_vector;
}
}
template <typename DictionaryType, typename Key>
template <typename Getter>
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromStringAttribute(
Getter getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{
auto column_string = ColumnString::create();
auto ptr = column_string.get();
callGetter(getter, ids_to_fill, keys, data_types, ptr, attribute, dict);
return column_string;
}
template <typename DictionaryType, typename Key>
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const
template <typename Key>
ColumnPtr DictionaryBlockInputStream<Key>::getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const
{
auto column_vector = ColumnVector<UInt64>::create();
column_vector->getData().reserve(ids_to_fill.size());
@ -547,8 +258,8 @@ ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(cons
}
template <typename DictionaryType, typename Key>
void DictionaryBlockInputStream<DictionaryType, Key>::fillKeyColumns(
template <typename Key>
void DictionaryBlockInputStream<Key>::fillKeyColumns(
const std::vector<StringRef> & keys,
size_t start,
size_t size,

View File

@ -0,0 +1,149 @@
#pragma once
#include <Columns/IColumn.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <DataTypes/DataTypesDecimal.h>
#include "DictionaryStructure.h"
namespace DB
{
namespace ErrorCodes
{
extern const int TYPE_MISMATCH;
}
/**
* In Dictionaries implementation String attribute is stored in arena and StringRefs are pointing to it.
*/
template <typename DictionaryAttributeType>
using DictionaryValueType =
std::conditional_t<std::is_same_v<DictionaryAttributeType, String>, StringRef, DictionaryAttributeType>;
/**
* Used to create column with right type for DictionaryAttributeType.
*/
template <typename DictionaryAttributeType>
class DictionaryAttributeColumnProvider
{
public:
using ColumnType =
std::conditional_t<std::is_same_v<DictionaryAttributeType, String>, ColumnString,
std::conditional_t<IsDecimalNumber<DictionaryAttributeType>, ColumnDecimal<DictionaryAttributeType>,
ColumnVector<DictionaryAttributeType>>>;
using ColumnPtr = typename ColumnType::MutablePtr;
static ColumnPtr getColumn(const DictionaryAttribute & dictionary_attribute, size_t size)
{
if constexpr (std::is_same_v<DictionaryAttributeType, String>)
{
return ColumnType::create();
}
if constexpr (IsDecimalNumber<DictionaryAttributeType>)
{
auto scale = getDecimalScale(*dictionary_attribute.nested_type);
return ColumnType::create(size, scale);
}
else if constexpr (IsNumber<DictionaryAttributeType>)
return ColumnType::create(size);
else
throw Exception{"Unsupported attribute type.", ErrorCodes::TYPE_MISMATCH};
}
};
/**
* DictionaryDefaultValueExtractor used to simplify getting default value for IDictionary function `getColumn`.
* Provides interface for getting default value with operator[];
*
* If default_values_column is null then attribute_default_value will be used.
* If default_values_column is not null in constructor than this column values will be used as default values.
*/
template <typename DictionaryAttributeType>
class DictionaryDefaultValueExtractor
{
using DefaultColumnType = typename DictionaryAttributeColumnProvider<DictionaryAttributeType>::ColumnType;
public:
using DefaultValueType = DictionaryValueType<DictionaryAttributeType>;
DictionaryDefaultValueExtractor(DictionaryAttributeType attribute_default_value, ColumnPtr default_values_column_ = nullptr)
: default_value(std::move(attribute_default_value))
{
if (default_values_column_ == nullptr)
use_default_value_from_column = false;
else
{
if (const auto * const default_col = checkAndGetColumn<DefaultColumnType>(*default_values_column_))
{
default_values_column = default_col;
use_default_value_from_column = true;
}
else if (const auto * const default_col_const = checkAndGetColumnConst<DefaultColumnType>(default_values_column_.get()))
{
default_value = default_col_const->template getValue<DictionaryAttributeType>();
use_default_value_from_column = false;
}
else
throw Exception{"Type of default column is not the same as dictionary attribute type.", ErrorCodes::TYPE_MISMATCH};
}
}
DefaultValueType operator[](size_t row)
{
if (!use_default_value_from_column)
return static_cast<DefaultValueType>(default_value);
assert(default_values_column != nullptr);
if constexpr (std::is_same_v<DefaultColumnType, ColumnString>)
return default_values_column->getDataAt(row);
else
return default_values_column->getData()[row];
}
private:
DictionaryAttributeType default_value;
const DefaultColumnType * default_values_column = nullptr;
bool use_default_value_from_column = false;
};
/**
* Returns ColumnVector data as PaddedPodArray.
* If column is constant parameter backup_storage is used to store values.
*/
template <typename T>
static const PaddedPODArray<T> & getColumnVectorData(
const IDictionaryBase * dictionary,
const ColumnPtr column,
PaddedPODArray<T> & backup_storage)
{
bool is_const_column = isColumnConst(*column);
auto full_column = column->convertToFullColumnIfConst();
auto vector_col = checkAndGetColumn<ColumnVector<T>>(full_column.get());
if (!vector_col)
{
throw Exception{ErrorCodes::TYPE_MISMATCH,
"{}: type mismatch: column has wrong type expected {}",
dictionary->getDictionaryID().getNameForLogs(),
TypeName<T>::get()};
}
if (is_const_column)
{
// With type conversion and const columns we need to use backup storage here
auto & data = vector_col->getData();
backup_storage.assign(data);
return backup_storage;
}
else
{
return vector_col->getData();
}
}
}

View File

@ -2,6 +2,8 @@
#include <Columns/IColumn.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeArray.h>
#include <Functions/FunctionHelpers.h>
#include <Formats/FormatSettings.h>
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
@ -12,7 +14,6 @@
#include <unordered_set>
#include <ext/range.h>
namespace DB
{
namespace ErrorCodes
@ -41,54 +42,46 @@ namespace
}
AttributeUnderlyingType getAttributeUnderlyingType(const std::string & type)
AttributeUnderlyingType getAttributeUnderlyingType(const DataTypePtr & type)
{
static const std::unordered_map<std::string, AttributeUnderlyingType> dictionary
auto type_index = type->getTypeId();
switch (type_index)
{
{"UInt8", AttributeUnderlyingType::utUInt8},
{"UInt16", AttributeUnderlyingType::utUInt16},
{"UInt32", AttributeUnderlyingType::utUInt32},
{"UInt64", AttributeUnderlyingType::utUInt64},
{"UUID", AttributeUnderlyingType::utUInt128},
{"Int8", AttributeUnderlyingType::utInt8},
{"Int16", AttributeUnderlyingType::utInt16},
{"Int32", AttributeUnderlyingType::utInt32},
{"Int64", AttributeUnderlyingType::utInt64},
{"Float32", AttributeUnderlyingType::utFloat32},
{"Float64", AttributeUnderlyingType::utFloat64},
{"String", AttributeUnderlyingType::utString},
{"Date", AttributeUnderlyingType::utUInt16},
};
case TypeIndex::UInt8: return AttributeUnderlyingType::utUInt8;
case TypeIndex::UInt16: return AttributeUnderlyingType::utUInt16;
case TypeIndex::UInt32: return AttributeUnderlyingType::utUInt32;
case TypeIndex::UInt64: return AttributeUnderlyingType::utUInt64;
case TypeIndex::UInt128: return AttributeUnderlyingType::utUInt128;
const auto it = dictionary.find(type);
if (it != std::end(dictionary))
return it->second;
case TypeIndex::Int8: return AttributeUnderlyingType::utInt8;
case TypeIndex::Int16: return AttributeUnderlyingType::utInt16;
case TypeIndex::Int32: return AttributeUnderlyingType::utInt32;
case TypeIndex::Int64: return AttributeUnderlyingType::utInt64;
/// Can contain arbitrary scale and timezone parameters.
if (type.find("DateTime64") == 0)
return AttributeUnderlyingType::utUInt64;
case TypeIndex::Float32: return AttributeUnderlyingType::utFloat32;
case TypeIndex::Float64: return AttributeUnderlyingType::utFloat64;
/// Can contain arbitrary timezone as parameter.
if (type.find("DateTime") == 0)
return AttributeUnderlyingType::utUInt32;
case TypeIndex::Decimal32: return AttributeUnderlyingType::utDecimal32;
case TypeIndex::Decimal64: return AttributeUnderlyingType::utDecimal64;
case TypeIndex::Decimal128: return AttributeUnderlyingType::utDecimal128;
if (type.find("Decimal") == 0)
{
size_t start = strlen("Decimal");
if (type.find("32", start) == start)
return AttributeUnderlyingType::utDecimal32;
if (type.find("64", start) == start)
return AttributeUnderlyingType::utDecimal64;
if (type.find("128", start) == start)
return AttributeUnderlyingType::utDecimal128;
case TypeIndex::Date: return AttributeUnderlyingType::utUInt16;
case TypeIndex::DateTime: return AttributeUnderlyingType::utUInt32;
case TypeIndex::DateTime64: return AttributeUnderlyingType::utUInt64;
case TypeIndex::UUID: return AttributeUnderlyingType::utUInt128;
case TypeIndex::String: return AttributeUnderlyingType::utString;
// Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries.
// TODO: This should be fixed by fully supporting arrays in dictionaries.
case TypeIndex::Array: return AttributeUnderlyingType::utString;
default: break;
}
// Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries.
// TODO: This should be fixed by fully supporting arrays in dictionaries.
if (type.find("Array") == 0)
return AttributeUnderlyingType::utString;
throw Exception{"Unknown type " + type, ErrorCodes::UNKNOWN_TYPE};
throw Exception{"Unknown type for dictionary" + type->getName(), ErrorCodes::UNKNOWN_TYPE};
}
@ -215,16 +208,32 @@ void DictionaryStructure::validateKeyTypes(const DataTypes & key_types) const
for (const auto i : ext::range(0, key_types.size()))
{
const auto & expected_type = (*key)[i].type->getName();
const auto & actual_type = key_types[i]->getName();
const auto & expected_type = (*key)[i].type;
const auto & actual_type = key_types[i];
if (expected_type != actual_type)
throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type + ", found "
+ actual_type,
ErrorCodes::TYPE_MISMATCH};
if (!areTypesEqual(expected_type, actual_type))
throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type->getName() + ", found "
+ actual_type->getName(),
ErrorCodes::TYPE_MISMATCH};
}
}
const DictionaryAttribute & DictionaryStructure::getAttribute(const String& attribute_name, const DataTypePtr & type) const
{
auto find_iter
= std::find_if(attributes.begin(), attributes.end(), [&](const auto & attribute) { return attribute.name == attribute_name; });
if (find_iter == attributes.end())
throw Exception{"No such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
const auto & attribute = *find_iter;
if (!areTypesEqual(attribute.type, type))
throw Exception{"Attribute type does not match, expected " + attribute.type->getName() + ", found " + type->getName(),
ErrorCodes::TYPE_MISMATCH};
return *find_iter;
}
std::string DictionaryStructure::getKeyDescription() const
{
@ -318,9 +327,20 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
if ((range_min && name == range_min->name) || (range_max && name == range_max->name))
continue;
const auto type_string = config.getString(prefix + "type");
const auto type = DataTypeFactory::instance().get(type_string);
const auto underlying_type = getAttributeUnderlyingType(type_string);
const auto initial_type = DataTypeFactory::instance().get(type_string);
auto type = initial_type;
bool is_array = false;
bool is_nullable = false;
if (type->isNullable())
{
is_nullable = true;
type = removeNullable(type);
}
const auto underlying_type = getAttributeUnderlyingType(type);
const auto expression = config.getString(prefix + "expression", "");
if (!expression.empty())
@ -333,7 +353,9 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
try
{
if (null_value_string.empty())
{
null_value = type->getDefault();
}
else
{
ReadBufferFromString null_value_buffer{null_value_string};
@ -365,8 +387,18 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
has_hierarchy = has_hierarchy || hierarchical;
res_attributes.emplace_back(
DictionaryAttribute{name, underlying_type, type, expression, null_value, hierarchical, injective, is_object_id});
res_attributes.emplace_back(DictionaryAttribute{
name,
underlying_type,
initial_type,
type,
expression,
null_value,
hierarchical,
injective,
is_object_id,
is_nullable,
is_array});
}
return res_attributes;

View File

@ -42,7 +42,6 @@ std::string toString(const AttributeUnderlyingType type);
/// Min and max lifetimes for a dictionary or it's entry
using DictionaryLifetime = ExternalLoadableLifetime;
/** Holds the description of a single dictionary attribute:
* - name, used for lookup into dictionary and source;
* - type, used in conjunction with DataTypeFactory and getAttributeUnderlyingTypeByname;
@ -57,13 +56,74 @@ struct DictionaryAttribute final
const std::string name;
const AttributeUnderlyingType underlying_type;
const DataTypePtr type;
const DataTypePtr nested_type;
const std::string expression;
const Field null_value;
const bool hierarchical;
const bool injective;
const bool is_object_id;
const bool is_nullable;
const bool is_array;
};
template <typename Type>
struct DictionaryAttributeType
{
using AttributeType = Type;
};
template <typename F>
void callOnDictionaryAttributeType(AttributeUnderlyingType type, F&& func)
{
switch (type)
{
case AttributeUnderlyingType::utUInt8:
func(DictionaryAttributeType<UInt8>());
break;
case AttributeUnderlyingType::utUInt16:
func(DictionaryAttributeType<UInt16>());
break;
case AttributeUnderlyingType::utUInt32:
func(DictionaryAttributeType<UInt32>());
break;
case AttributeUnderlyingType::utUInt64:
func(DictionaryAttributeType<UInt64>());
break;
case AttributeUnderlyingType::utUInt128:
func(DictionaryAttributeType<UInt128>());
break;
case AttributeUnderlyingType::utInt8:
func(DictionaryAttributeType<Int8>());
break;
case AttributeUnderlyingType::utInt16:
func(DictionaryAttributeType<Int16>());
break;
case AttributeUnderlyingType::utInt32:
func(DictionaryAttributeType<Int32>());
break;
case AttributeUnderlyingType::utInt64:
func(DictionaryAttributeType<Int64>());
break;
case AttributeUnderlyingType::utFloat32:
func(DictionaryAttributeType<Float32>());
break;
case AttributeUnderlyingType::utFloat64:
func(DictionaryAttributeType<Float64>());
break;
case AttributeUnderlyingType::utString:
func(DictionaryAttributeType<String>());
break;
case AttributeUnderlyingType::utDecimal32:
func(DictionaryAttributeType<Decimal32>());
break;
case AttributeUnderlyingType::utDecimal64:
func(DictionaryAttributeType<Decimal64>());
break;
case AttributeUnderlyingType::utDecimal128:
func(DictionaryAttributeType<Decimal128>());
break;
}
};
struct DictionarySpecialAttribute final
{
@ -94,10 +154,10 @@ struct DictionaryStructure final
DictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
void validateKeyTypes(const DataTypes & key_types) const;
const DictionaryAttribute &getAttribute(const String& attribute_name, const DataTypePtr & type) const;
std::string getKeyDescription() const;
bool isKeySizeFixed() const;
size_t getKeySize() const;
private:
/// range_min and range_max have to be parsed before this function call
std::vector<DictionaryAttribute> getAttributes(

View File

@ -3,7 +3,10 @@
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
#include <Core/Defines.h>
#include <Functions/FunctionHelpers.h>
#include <Columns/ColumnNullable.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Common/HashTable/HashSet.h>
namespace DB
{
@ -35,11 +38,13 @@ DirectDictionary::DirectDictionary(
void DirectDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
{
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
DictionaryDefaultValueExtractor<UInt64> extractor(null_value);
getItemsImpl<UInt64, UInt64>(
*hierarchical_attribute,
ids,
[&](const size_t row, const UInt64 value) { out[row] = value; },
[&](const size_t) { return null_value; });
[&](const size_t row, const UInt64 value, bool) { out[row] = value; },
extractor);
}
@ -128,395 +133,101 @@ void DirectDictionary::isInConstantVector(const Key child_id, const PaddedPODArr
isInImpl(child_id, ancestor_ids, out);
}
#define DECLARE(TYPE) \
void DirectDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
const auto null_value = std::get<TYPE>(attribute.null_values); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void DirectDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
ColumnPtr DirectDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes &,
const ColumnPtr default_values_column) const
{
ColumnPtr result;
PaddedPODArray<Key> backup_storage;
const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & null_value = std::get<StringRef>(attribute.null_values);
getItemsStringImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
[&](const size_t) { return String(null_value.data, null_value.size); });
}
auto keys_size = ids.size();
#define DECLARE(TYPE) \
void DirectDictionary::get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void DirectDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
{
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
getItemsStringImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
[&](const size_t row) { const auto ref = def->getDataAt(row); return String(ref.data, ref.size); });
}
#define DECLARE(TYPE) \
void DirectDictionary::get##TYPE( \
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void DirectDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
{
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
DirectDictionary::getItemsStringImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
[&](const size_t) { return def; });
}
void DirectDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
{
const auto & attribute = attributes.front();
switch (attribute.type)
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to = nullptr;
if (attribute.is_nullable)
{
case AttributeUnderlyingType::utUInt8:
has<UInt8>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt16:
has<UInt16>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt32:
has<UInt32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt64:
has<UInt64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt128:
has<UInt128>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt8:
has<Int8>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt16:
has<Int16>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt32:
has<Int32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt64:
has<Int64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utFloat32:
has<Float32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utFloat64:
has<Float64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utString:
has<String>(attribute, ids, out);
break;
case AttributeUnderlyingType::utDecimal32:
has<Decimal32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utDecimal64:
has<Decimal64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utDecimal128:
has<Decimal128>(attribute, ids, out);
break;
col_null_map_to = ColumnUInt8::create(keys_size, false);
vec_null_map_to = &col_null_map_to->getData();
}
}
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
void DirectDictionary::createAttributes()
{
const auto size = dict_struct.attributes.size();
attributes.reserve(size);
for (const auto & attribute : dict_struct.attributes)
auto type_call = [&](const auto &dictionary_attribute_type)
{
attribute_index_by_name.emplace(attribute.name, attributes.size());
attribute_name_by_index.emplace(attributes.size(), attribute.name);
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value, attribute.name));
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
if (attribute.hierarchical)
using ValueType = DictionaryValueType<AttributeType>;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<AttributeType, String>)
{
hierarchical_attribute = &attributes.back();
auto * out = column.get();
if (hierarchical_attribute->type != AttributeUnderlyingType::utUInt64)
throw Exception{full_name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH};
}
}
}
template <typename T>
void DirectDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
}
template <>
void DirectDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
{
attribute.string_arena = std::make_unique<Arena>();
const String & string = null_value.get<String>();
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
}
DirectDictionary::Attribute DirectDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & attr_name)
{
Attribute attr{type, {}, {}, attr_name};
switch (type)
{
case AttributeUnderlyingType::utUInt8:
createAttributeImpl<UInt8>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt16:
createAttributeImpl<UInt16>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt32:
createAttributeImpl<UInt32>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt64:
createAttributeImpl<UInt64>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::utInt8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::utInt16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::utInt32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::utInt64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat64:
createAttributeImpl<Float64>(attr, null_value);
break;
case AttributeUnderlyingType::utString:
createAttributeImpl<String>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal32:
createAttributeImpl<Decimal32>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
}
return attr;
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void DirectDictionary::getItemsImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
{
const auto rows = ext::size(ids);
HashMap<Key, OutputType> value_by_key;
for (const auto row : ext::range(0, rows))
value_by_key[ids[row]] = get_default(row);
std::vector<Key> to_load;
to_load.reserve(value_by_key.size());
for (auto it = value_by_key.begin(); it != value_by_key.end(); ++it)
to_load.emplace_back(static_cast<Key>(it->getKey()));
auto stream = source_ptr->loadIds(to_load);
stream->readPrefix();
while (const auto block = stream->read())
{
const IColumn & id_column = *block.safeGetByPosition(0).column;
for (const size_t attribute_idx : ext::range(0, attributes.size()))
{
const IColumn & attribute_column = *block.safeGetByPosition(attribute_idx + 1).column;
for (const auto row_idx : ext::range(0, id_column.size()))
{
const auto key = id_column[row_idx].get<UInt64>();
if (value_by_key.find(key) != value_by_key.end() && attribute.name == attribute_name_by_index.at(attribute_idx))
getItemsImpl<String, String>(
attribute,
ids,
[&](const size_t row, const String value, bool is_null)
{
if (attribute.type == AttributeUnderlyingType::utFloat32)
{
value_by_key[key] = static_cast<Float32>(attribute_column[row_idx].get<Float64>());
}
else
{
value_by_key[key] = static_cast<OutputType>(attribute_column[row_idx].get<AttributeType>());
}
if (attribute.is_nullable)
(*vec_null_map_to)[row] = is_null;
}
}
const auto ref = StringRef{value};
out->insertData(ref.data, ref.size);
},
default_value_extractor);
}
}
stream->readSuffix();
for (const auto row : ext::range(0, rows))
set_value(row, value_by_key[ids[row]]);
query_count.fetch_add(rows, std::memory_order_relaxed);
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void DirectDictionary::getItemsStringImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
{
const auto rows = ext::size(ids);
HashMap<Key, String> value_by_key;
for (const auto row : ext::range(0, rows))
value_by_key[ids[row]] = get_default(row);
std::vector<Key> to_load;
to_load.reserve(value_by_key.size());
for (auto it = value_by_key.begin(); it != value_by_key.end(); ++it)
to_load.emplace_back(static_cast<Key>(it->getKey()));
auto stream = source_ptr->loadIds(to_load);
stream->readPrefix();
while (const auto block = stream->read())
{
const IColumn & id_column = *block.safeGetByPosition(0).column;
for (const size_t attribute_idx : ext::range(0, attributes.size()))
else
{
auto & out = column->getData();
const IColumn & attribute_column = *block.safeGetByPosition(attribute_idx + 1).column;
for (const auto row_idx : ext::range(0, id_column.size()))
{
const auto key = id_column[row_idx].get<UInt64>();
if (value_by_key.find(key) != value_by_key.end() && attribute.name == attribute_name_by_index.at(attribute_idx))
getItemsImpl<AttributeType, AttributeType>(
attribute,
ids,
[&](const size_t row, const auto value, bool is_null)
{
const String from_source = attribute_column[row_idx].get<String>();
value_by_key[key] = from_source;
}
}
if (attribute.is_nullable)
(*vec_null_map_to)[row] = is_null;
out[row] = value;
},
default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
if (attribute.is_nullable)
{
result = ColumnNullable::create(result, std::move(col_null_map_to));
}
stream->readSuffix();
for (const auto row : ext::range(0, rows))
set_value(row, value_by_key[ids[row]]);
query_count.fetch_add(rows, std::memory_order_relaxed);
return result;
}
const DirectDictionary::Attribute & DirectDictionary::getAttribute(const std::string & attribute_name) const
ColumnUInt8::Ptr DirectDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
{
const auto it = attribute_index_by_name.find(attribute_name);
if (it == std::end(attribute_index_by_name))
throw Exception{full_name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
PaddedPODArray<Key> backup_storage;
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
return attributes[it->second];
}
auto result = ColumnUInt8::create(ext::size(ids));
auto& out = result->getData();
template <typename T>
void DirectDictionary::has(const Attribute &, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
{
const auto rows = ext::size(ids);
HashMap<Key, UInt8> has_key;
@ -548,6 +259,137 @@ void DirectDictionary::has(const Attribute &, const PaddedPODArray<Key> & ids, P
out[row] = has_key[ids[row]];
query_count.fetch_add(rows, std::memory_order_relaxed);
return result;
}
void DirectDictionary::createAttributes()
{
const auto size = dict_struct.attributes.size();
attributes.reserve(size);
for (const auto & attribute : dict_struct.attributes)
{
attribute_index_by_name.emplace(attribute.name, attributes.size());
attribute_name_by_index.emplace(attributes.size(), attribute.name);
attributes.push_back(createAttribute(attribute, attribute.null_value, attribute.name));
if (attribute.hierarchical)
{
hierarchical_attribute = &attributes.back();
if (hierarchical_attribute->type != AttributeUnderlyingType::utUInt64)
throw Exception{full_name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH};
}
}
}
template <typename T>
void DirectDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
}
template <>
void DirectDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
{
attribute.string_arena = std::make_unique<Arena>();
const String & string = null_value.get<String>();
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
}
DirectDictionary::Attribute DirectDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value, const std::string & attr_name)
{
Attribute attr{attribute.underlying_type, attribute.is_nullable, {}, {}, attr_name};
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
createAttributeImpl<AttributeType>(attr, null_value);
};
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
return attr;
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void DirectDictionary::getItemsImpl(
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto rows = ext::size(ids);
HashMap<Key, OutputType> value_by_key;
HashSet<Key> value_is_null;
for (const auto row : ext::range(0, rows))
{
auto key = ids[row];
value_by_key[key] = static_cast<AttributeType>(default_value_extractor[row]);
}
std::vector<Key> to_load;
to_load.reserve(value_by_key.size());
for (auto it = value_by_key.begin(); it != value_by_key.end(); ++it)
to_load.emplace_back(static_cast<Key>(it->getKey()));
auto stream = source_ptr->loadIds(to_load);
stream->readPrefix();
const auto it = attribute_index_by_name.find(attribute.name);
if (it == std::end(attribute_index_by_name))
throw Exception{full_name + ": no such attribute '" + attribute.name + "'", ErrorCodes::BAD_ARGUMENTS};
auto attribute_index = it->second;
while (const auto block = stream->read())
{
const IColumn & id_column = *block.safeGetByPosition(0).column;
const IColumn & attribute_column = *block.safeGetByPosition(attribute_index + 1).column;
for (const auto row_idx : ext::range(0, id_column.size()))
{
const auto key = id_column[row_idx].get<UInt64>();
if (value_by_key.find(key) != value_by_key.end())
{
auto value = attribute_column[row_idx];
if (value.isNull())
value_is_null.insert(key);
else
value_by_key[key] = static_cast<OutputType>(value.get<NearestFieldType<AttributeType>>());
}
}
}
stream->readSuffix();
for (const auto row : ext::range(0, rows))
{
auto key = ids[row];
set_value(row, value_by_key[key], value_is_null.find(key) != nullptr);
}
query_count.fetch_add(rows, std::memory_order_relaxed);
}
const DirectDictionary::Attribute & DirectDictionary::getAttribute(const std::string & attribute_name) const
{
const auto it = attribute_index_by_name.find(attribute_name);
if (it == std::end(attribute_index_by_name))
throw Exception{full_name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
return attributes[it->second];
}

View File

@ -13,11 +13,10 @@
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryHelpers.h"
namespace DB
{
using BlockPtr = std::shared_ptr<Block>;
class DirectDictionary final : public IDictionary
{
@ -65,76 +64,16 @@ public:
void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const override;
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
const;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
@ -142,6 +81,7 @@ private:
struct Attribute final
{
AttributeUnderlyingType type;
bool is_nullable;
std::variant<
UInt8,
UInt16,
@ -168,23 +108,17 @@ private:
template <typename T>
void addAttributeSize(const Attribute & attribute);
void calculateBytesAllocated();
template <typename T>
void createAttributeImpl(Attribute & attribute, const Field & null_value);
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & name);
static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value, const std::string & name);
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsStringImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename T>
void resize(Attribute & attribute, const Key id);
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename T>
void setAttributeValueImpl(Attribute & attribute, const Key id, const T & value);
@ -193,9 +127,6 @@ private:
const Attribute & getAttribute(const std::string & attribute_name) const;
template <typename T>
void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
Key getValueOrNullByKey(const Key & to_find) const;
template <typename ChildType, typename AncestorType>

View File

@ -1,9 +1,14 @@
#include "FlatDictionary.h"
#include <Core/Defines.h>
#include <DataTypes/DataTypesDecimal.h>
#include <IO/WriteHelpers.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnNullable.h>
#include <Functions/FunctionHelpers.h>
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
#include <Core/Defines.h>
namespace DB
{
@ -44,12 +49,13 @@ FlatDictionary::FlatDictionary(
void FlatDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
{
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
DictionaryDefaultValueExtractor<UInt64> extractor(null_value);
getItemsImpl<UInt64, UInt64>(
*hierarchical_attribute,
ids,
[&](const size_t row, const UInt64 value) { out[row] = value; },
[&](const size_t) { return null_value; });
extractor);
}
@ -102,186 +108,103 @@ void FlatDictionary::isInConstantVector(const Key child_id, const PaddedPODArray
isInImpl(child_id, ancestor_ids, out);
}
#define DECLARE(TYPE) \
void FlatDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
const auto null_value = std::get<TYPE>(attribute.null_values); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void FlatDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
ColumnPtr FlatDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes &,
const ColumnPtr default_values_column) const
{
ColumnPtr result;
PaddedPODArray<Key> backup_storage;
const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
auto size = ids.size();
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
const auto & null_value = std::get<StringRef>(attribute.null_values);
getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return null_value; });
}
#define DECLARE(TYPE) \
void FlatDictionary::get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void FlatDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
{
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t row) { return def->getDataAt(row); });
}
#define DECLARE(TYPE) \
void FlatDictionary::get##TYPE( \
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void FlatDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
{
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
FlatDictionary::getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return StringRef{def}; });
}
void FlatDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
{
const auto & attribute = attributes.front();
switch (attribute.type)
auto type_call = [&](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
has<UInt8>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt16:
has<UInt16>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt32:
has<UInt32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt64:
has<UInt64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt128:
has<UInt128>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt8:
has<Int8>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt16:
has<Int16>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt32:
has<Int32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt64:
has<Int64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utFloat32:
has<Float32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utFloat64:
has<Float64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utString:
has<String>(attribute, ids, out);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
case AttributeUnderlyingType::utDecimal32:
has<Decimal32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utDecimal64:
has<Decimal64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utDecimal128:
has<Decimal128>(attribute, ids, out);
break;
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, size);
if constexpr (std::is_same_v<ValueType, StringRef>)
{
auto * out = column.get();
getItemsImpl<ValueType, ValueType>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsImpl<ValueType, ValueType>(
attribute,
ids,
[&](const size_t row, const auto value) { out[row] = value; },
default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
if (attribute.nullable_set)
{
ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, false);
ColumnUInt8::Container& vec_null_map_to = col_null_map_to->getData();
for (size_t row = 0; row < ids.size(); ++row)
{
auto id = ids[row];
if (attribute.nullable_set->find(id) != nullptr)
vec_null_map_to[row] = true;
}
result = ColumnNullable::create(result, std::move(col_null_map_to));
}
return result;
}
ColumnUInt8::Ptr FlatDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
{
PaddedPODArray<Key> backup_storage;
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
auto result = ColumnUInt8::create(ext::size(ids));
auto& out = result->getData();
const auto ids_count = ext::size(ids);
for (const auto i : ext::range(0, ids_count))
{
const auto id = ids[i];
out[i] = id < loaded_ids.size() && loaded_ids[id];
}
query_count.fetch_add(ids_count, std::memory_order_relaxed);
return result;
}
void FlatDictionary::createAttributes()
{
const auto size = dict_struct.attributes.size();
@ -290,7 +213,7 @@ void FlatDictionary::createAttributes()
for (const auto & attribute : dict_struct.attributes)
{
attribute_index_by_name.emplace(attribute.name, attributes.size());
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
attributes.push_back(createAttribute(attribute, attribute.null_value));
if (attribute.hierarchical)
{
@ -416,6 +339,14 @@ void FlatDictionary::addAttributeSize(const Attribute & attribute)
bucket_count = array_ref.capacity();
}
template <>
void FlatDictionary::addAttributeSize<String>(const Attribute & attribute)
{
const auto & array_ref = std::get<ContainerType<StringRef>>(attribute.arrays);
bytes_allocated += sizeof(PaddedPODArray<StringRef>) + array_ref.allocated_bytes();
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
bucket_count = array_ref.capacity();
}
void FlatDictionary::calculateBytesAllocated()
{
@ -423,60 +354,15 @@ void FlatDictionary::calculateBytesAllocated()
for (const auto & attribute : attributes)
{
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
addAttributeSize<UInt8>(attribute);
break;
case AttributeUnderlyingType::utUInt16:
addAttributeSize<UInt16>(attribute);
break;
case AttributeUnderlyingType::utUInt32:
addAttributeSize<UInt32>(attribute);
break;
case AttributeUnderlyingType::utUInt64:
addAttributeSize<UInt64>(attribute);
break;
case AttributeUnderlyingType::utUInt128:
addAttributeSize<UInt128>(attribute);
break;
case AttributeUnderlyingType::utInt8:
addAttributeSize<Int8>(attribute);
break;
case AttributeUnderlyingType::utInt16:
addAttributeSize<Int16>(attribute);
break;
case AttributeUnderlyingType::utInt32:
addAttributeSize<Int32>(attribute);
break;
case AttributeUnderlyingType::utInt64:
addAttributeSize<Int64>(attribute);
break;
case AttributeUnderlyingType::utFloat32:
addAttributeSize<Float32>(attribute);
break;
case AttributeUnderlyingType::utFloat64:
addAttributeSize<Float64>(attribute);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
addAttributeSize<Decimal32>(attribute);
break;
case AttributeUnderlyingType::utDecimal64:
addAttributeSize<Decimal64>(attribute);
break;
case AttributeUnderlyingType::utDecimal128:
addAttributeSize<Decimal128>(attribute);
break;
addAttributeSize<AttributeType>(attribute);
};
case AttributeUnderlyingType::utString:
{
addAttributeSize<StringRef>(attribute);
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
break;
}
}
callOnDictionaryAttributeType(attribute.type, type_call);
}
}
@ -500,67 +386,31 @@ void FlatDictionary::createAttributeImpl<String>(Attribute & attribute, const Fi
}
FlatDictionary::Attribute FlatDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
FlatDictionary::Attribute FlatDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
{
Attribute attr{type, {}, {}, {}};
auto nullable_set = attribute.is_nullable ? std::make_optional<NullableSet>() : std::optional<NullableSet>{};
Attribute attr{attribute.underlying_type, std::move(nullable_set), {}, {}, {}};
switch (type)
auto type_call = [&](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
createAttributeImpl<UInt8>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt16:
createAttributeImpl<UInt16>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt32:
createAttributeImpl<UInt32>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt64:
createAttributeImpl<UInt64>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::utInt8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::utInt16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::utInt32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::utInt64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat64:
createAttributeImpl<Float64>(attr, null_value);
break;
case AttributeUnderlyingType::utString:
createAttributeImpl<String>(attr, null_value);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
createAttributeImpl<Decimal32>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
}
createAttributeImpl<AttributeType>(attr, null_value);
};
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
return attr;
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void FlatDictionary::getItemsImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto & attr = std::get<ContainerType<AttributeType>>(attribute.arrays);
const auto rows = ext::size(ids);
@ -568,7 +418,7 @@ void FlatDictionary::getItemsImpl(
for (const auto row : ext::range(0, rows))
{
const auto id = ids[row];
set_value(row, id < ext::size(attr) && loaded_ids[id] ? static_cast<OutputType>(attr[id]) : get_default(row));
set_value(row, id < ext::size(attr) && loaded_ids[id] ? static_cast<OutputType>(attr[id]) : default_value_extractor[row]);
}
query_count.fetch_add(rows, std::memory_order_relaxed);
@ -592,7 +442,6 @@ void FlatDictionary::resize(Attribute & attribute, const Key id)
template <typename T>
void FlatDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const T & value)
{
resize<T>(attribute, id);
auto & array = std::get<ContainerType<T>>(attribute.arrays);
array[id] = value;
loaded_ids[id] = true;
@ -601,64 +450,38 @@ void FlatDictionary::setAttributeValueImpl(Attribute & attribute, const Key id,
template <>
void FlatDictionary::setAttributeValueImpl<String>(Attribute & attribute, const Key id, const String & value)
{
resize<StringRef>(attribute, id);
const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
auto & array = std::get<ContainerType<StringRef>>(attribute.arrays);
array[id] = StringRef{string_in_arena, value.size()};
loaded_ids[id] = true;
setAttributeValueImpl(attribute, id, StringRef{string_in_arena, value.size()});
}
void FlatDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
{
switch (attribute.type)
auto type_call = [&](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>());
break;
case AttributeUnderlyingType::utUInt16:
setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>());
break;
case AttributeUnderlyingType::utUInt32:
setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>());
break;
case AttributeUnderlyingType::utUInt64:
setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>());
break;
case AttributeUnderlyingType::utUInt128:
setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>());
break;
case AttributeUnderlyingType::utInt8:
setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>());
break;
case AttributeUnderlyingType::utInt16:
setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>());
break;
case AttributeUnderlyingType::utInt32:
setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>());
break;
case AttributeUnderlyingType::utInt64:
setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>());
break;
case AttributeUnderlyingType::utFloat32:
setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>());
break;
case AttributeUnderlyingType::utFloat64:
setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>());
break;
case AttributeUnderlyingType::utString:
setAttributeValueImpl<String>(attribute, id, value.get<String>());
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ResizeType = std::conditional_t<std::is_same_v<AttributeType, String>, StringRef, AttributeType>;
case AttributeUnderlyingType::utDecimal32:
setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal32>());
break;
case AttributeUnderlyingType::utDecimal64:
setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal64>());
break;
case AttributeUnderlyingType::utDecimal128:
setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>());
break;
}
resize<ResizeType>(attribute, id);
if (attribute.nullable_set)
{
if (value.isNull())
{
attribute.nullable_set->insert(id);
loaded_ids[id] = true;
return;
}
else
{
attribute.nullable_set->erase(id);
}
}
setAttributeValueImpl<AttributeType>(attribute, id, value.get<NearestFieldType<AttributeType>>());
};
callOnDictionaryAttributeType(attribute.type, type_call);
}
@ -671,27 +494,13 @@ const FlatDictionary::Attribute & FlatDictionary::getAttribute(const std::string
return attributes[it->second];
}
template <typename T>
void FlatDictionary::has(const Attribute &, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
{
const auto ids_count = ext::size(ids);
for (const auto i : ext::range(0, ids_count))
{
const auto id = ids[i];
out[i] = id < loaded_ids.size() && loaded_ids[id];
}
query_count.fetch_add(ids_count, std::memory_order_relaxed);
}
PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const
{
const auto ids_count = ext::size(loaded_ids);
PaddedPODArray<Key> ids;
ids.reserve(ids_count);
for (auto idx : ext::range(0, ids_count))
if (loaded_ids[idx])
ids.push_back(idx);
@ -700,7 +509,7 @@ PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const
BlockInputStreamPtr FlatDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<FlatDictionary, Key>;
using BlockInputStreamType = DictionaryBlockInputStream<Key>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds(), column_names);
}

View File

@ -3,20 +3,25 @@
#include <atomic>
#include <variant>
#include <vector>
#include <optional>
#include <Common/HashTable/HashSet.h>
#include <Common/Arena.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Common/Arena.h>
#include <Columns/ColumnArray.h>
#include <DataTypes/IDataType.h>
#include <Core/Block.h>
#include <ext/range.h>
#include <ext/size.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryHelpers.h"
namespace DB
{
using BlockPtr = std::shared_ptr<Block>;
class FlatDictionary final : public IDictionary
{
@ -66,76 +71,16 @@ public:
void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const override;
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
const;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
@ -143,9 +88,13 @@ private:
template <typename Value>
using ContainerType = PaddedPODArray<Value>;
using NullableSet = HashSet<Key, DefaultHash<Key>>;
struct Attribute final
{
AttributeUnderlyingType type;
std::optional<NullableSet> nullable_set;
std::variant<
UInt8,
UInt16,
@ -180,6 +129,7 @@ private:
ContainerType<Float64>,
ContainerType<StringRef>>
arrays;
std::unique_ptr<Arena> string_arena;
};
@ -194,13 +144,16 @@ private:
void calculateBytesAllocated();
template <typename T>
void createAttributeImpl(Attribute & attribute, const Field & null_value);
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename T>
void resize(Attribute & attribute, const Key id);
@ -212,9 +165,6 @@ private:
const Attribute & getAttribute(const std::string & attribute_name) const;
template <typename T>
void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
template <typename ChildType, typename AncestorType>
void isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;

View File

@ -4,7 +4,10 @@
#include "DictionaryFactory.h"
#include "ClickHouseDictionarySource.h"
#include <Core/Defines.h>
#include <Functions/FunctionHelpers.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnNullable.h>
#include <DataTypes/DataTypesDecimal.h>
namespace
{
@ -57,12 +60,13 @@ HashedDictionary::HashedDictionary(
void HashedDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
{
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
DictionaryDefaultValueExtractor<UInt64> extractor(null_value);
getItemsImpl<UInt64, UInt64>(
*hierarchical_attribute,
ids,
[&](const size_t row, const UInt64 value) { out[row] = value; },
[&](const size_t) { return null_value; });
extractor);
}
@ -125,183 +129,105 @@ void HashedDictionary::isInConstantVector(const Key child_id, const PaddedPODArr
isInImpl(child_id, ancestor_ids, out);
}
#define DECLARE(TYPE) \
void HashedDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) \
const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
const auto null_value = std::get<TYPE>(attribute.null_values); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void HashedDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
ColumnPtr HashedDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes &,
const ColumnPtr default_values_column) const
{
ColumnPtr result;
PaddedPODArray<Key> backup_storage;
const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
auto size = ids.size();
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
auto type_call = [&](const auto & dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return null_value; });
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, size);
if constexpr (std::is_same_v<AttributeType, String>)
{
auto * out = column.get();
getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsImpl<AttributeType, AttributeType>(
attribute,
ids,
[&](const size_t row, const auto value) { return out[row] = value; },
default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
if (attribute.nullable_set)
{
ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, false);
ColumnUInt8::Container& vec_null_map_to = col_null_map_to->getData();
for (size_t row = 0; row < ids.size(); ++row)
{
auto id = ids[row];
if (attribute.nullable_set->find(id) != nullptr)
vec_null_map_to[row] = true;
}
result = ColumnNullable::create(result, std::move(col_null_map_to));
}
return result;
}
#define DECLARE(TYPE) \
void HashedDictionary::get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void HashedDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
ColumnUInt8::Ptr HashedDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
{
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
PaddedPODArray<Key> backup_storage;
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t row) { return def->getDataAt(row); });
}
size_t ids_count = ext::size(ids);
#define DECLARE(TYPE) \
void HashedDictionary::get##TYPE( \
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE & def, ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
auto result = ColumnUInt8::create(ext::size(ids));
auto& out = result->getData();
void HashedDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
{
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return StringRef{def}; });
}
void HashedDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
{
const auto & attribute = attributes.front();
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
has<UInt8>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt16:
has<UInt16>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt32:
has<UInt32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt64:
has<UInt64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt128:
has<UInt128>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt8:
has<Int8>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt16:
has<Int16>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt32:
has<Int32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt64:
has<Int64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utFloat32:
has<Float32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utFloat64:
has<Float64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utString:
has<StringRef>(attribute, ids, out);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
has<AttributeType>(attribute, ids, out);
};
case AttributeUnderlyingType::utDecimal32:
has<Decimal32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utDecimal64:
has<Decimal64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utDecimal128:
has<Decimal128>(attribute, ids, out);
break;
}
callOnDictionaryAttributeType(attribute.type, type_call);
query_count.fetch_add(ids_count, std::memory_order_relaxed);
return result;
}
void HashedDictionary::createAttributes()
@ -312,7 +238,7 @@ void HashedDictionary::createAttributes()
for (const auto & attribute : dict_struct.attributes)
{
attribute_index_by_name.emplace(attribute.name, attributes.size());
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
attributes.push_back(createAttribute(attribute, attribute.null_value));
if (attribute.hierarchical)
{
@ -429,6 +355,13 @@ void HashedDictionary::resize(Attribute & attribute, size_t added_rows)
map_ref->resize(added_rows);
}
}
template <>
void HashedDictionary::resize<String>(Attribute & attribute, size_t added_rows)
{
resize<StringRef>(attribute, added_rows);
}
void HashedDictionary::resize(size_t added_rows)
{
if (!added_rows)
@ -436,56 +369,14 @@ void HashedDictionary::resize(size_t added_rows)
for (auto & attribute : attributes)
{
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
resize<UInt8>(attribute, added_rows);
break;
case AttributeUnderlyingType::utUInt16:
resize<UInt16>(attribute, added_rows);
break;
case AttributeUnderlyingType::utUInt32:
resize<UInt32>(attribute, added_rows);
break;
case AttributeUnderlyingType::utUInt64:
resize<UInt64>(attribute, added_rows);
break;
case AttributeUnderlyingType::utUInt128:
resize<UInt128>(attribute, added_rows);
break;
case AttributeUnderlyingType::utInt8:
resize<Int8>(attribute, added_rows);
break;
case AttributeUnderlyingType::utInt16:
resize<Int16>(attribute, added_rows);
break;
case AttributeUnderlyingType::utInt32:
resize<Int32>(attribute, added_rows);
break;
case AttributeUnderlyingType::utInt64:
resize<Int64>(attribute, added_rows);
break;
case AttributeUnderlyingType::utFloat32:
resize<Float32>(attribute, added_rows);
break;
case AttributeUnderlyingType::utFloat64:
resize<Float64>(attribute, added_rows);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
resize<AttributeType>(attribute, added_rows);
};
case AttributeUnderlyingType::utDecimal32:
resize<Decimal32>(attribute, added_rows);
break;
case AttributeUnderlyingType::utDecimal64:
resize<Decimal64>(attribute, added_rows);
break;
case AttributeUnderlyingType::utDecimal128:
resize<Decimal128>(attribute, added_rows);
break;
case AttributeUnderlyingType::utString:
resize<StringRef>(attribute, added_rows);
break;
}
callOnDictionaryAttributeType(attribute.type, type_call);
}
}
@ -562,66 +453,27 @@ void HashedDictionary::addAttributeSize(const Attribute & attribute)
}
}
template <>
void HashedDictionary::addAttributeSize<String>(const Attribute & attribute)
{
addAttributeSize<StringRef>(attribute);
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
}
void HashedDictionary::calculateBytesAllocated()
{
bytes_allocated += attributes.size() * sizeof(attributes.front());
for (const auto & attribute : attributes)
{
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
addAttributeSize<UInt8>(attribute);
break;
case AttributeUnderlyingType::utUInt16:
addAttributeSize<UInt16>(attribute);
break;
case AttributeUnderlyingType::utUInt32:
addAttributeSize<UInt32>(attribute);
break;
case AttributeUnderlyingType::utUInt64:
addAttributeSize<UInt64>(attribute);
break;
case AttributeUnderlyingType::utUInt128:
addAttributeSize<UInt128>(attribute);
break;
case AttributeUnderlyingType::utInt8:
addAttributeSize<Int8>(attribute);
break;
case AttributeUnderlyingType::utInt16:
addAttributeSize<Int16>(attribute);
break;
case AttributeUnderlyingType::utInt32:
addAttributeSize<Int32>(attribute);
break;
case AttributeUnderlyingType::utInt64:
addAttributeSize<Int64>(attribute);
break;
case AttributeUnderlyingType::utFloat32:
addAttributeSize<Float32>(attribute);
break;
case AttributeUnderlyingType::utFloat64:
addAttributeSize<Float64>(attribute);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
addAttributeSize<AttributeType>(attribute);
};
case AttributeUnderlyingType::utDecimal32:
addAttributeSize<Decimal32>(attribute);
break;
case AttributeUnderlyingType::utDecimal64:
addAttributeSize<Decimal64>(attribute);
break;
case AttributeUnderlyingType::utDecimal128:
addAttributeSize<Decimal128>(attribute);
break;
case AttributeUnderlyingType::utString:
{
addAttributeSize<StringRef>(attribute);
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
break;
}
}
callOnDictionaryAttributeType(attribute.type, type_call);
}
}
@ -635,93 +487,66 @@ void HashedDictionary::createAttributeImpl(Attribute & attribute, const Field &
attribute.sparse_maps = std::make_unique<SparseCollectionType<T>>();
}
HashedDictionary::Attribute HashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
template <>
void HashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
{
Attribute attr{type, {}, {}, {}, {}};
attribute.string_arena = std::make_unique<Arena>();
const String & string = null_value.get<String>();
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
switch (type)
if (!sparse)
attribute.maps = std::make_unique<CollectionType<StringRef>>();
else
attribute.sparse_maps = std::make_unique<SparseCollectionType<StringRef>>();
}
HashedDictionary::Attribute HashedDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
{
auto nullable_set = attribute.is_nullable ? std::make_optional<NullableSet>() : std::optional<NullableSet>{};
Attribute attr{attribute.underlying_type, std::move(nullable_set), {}, {}, {}, {}};
auto type_call = [&, this](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
createAttributeImpl<UInt8>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt16:
createAttributeImpl<UInt16>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt32:
createAttributeImpl<UInt32>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt64:
createAttributeImpl<UInt64>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::utInt8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::utInt16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::utInt32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::utInt64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat64:
createAttributeImpl<Float64>(attr, null_value);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
createAttributeImpl<AttributeType>(attr, null_value);
};
case AttributeUnderlyingType::utDecimal32:
createAttributeImpl<Decimal32>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
case AttributeUnderlyingType::utString:
{
attr.null_values = null_value.get<String>();
if (!sparse)
attr.maps = std::make_unique<CollectionType<StringRef>>();
else
attr.sparse_maps = std::make_unique<SparseCollectionType<StringRef>>();
attr.string_arena = std::make_unique<Arena>();
break;
}
}
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
return attr;
}
template <typename OutputType, typename AttrType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename MapType, typename ValueSetter, typename DefaultValueExtractor>
void HashedDictionary::getItemsAttrImpl(
const AttrType & attr, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
const MapType & attr,
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto rows = ext::size(ids);
for (const auto i : ext::range(0, rows))
{
const auto it = attr.find(ids[i]);
set_value(i, it != attr.end() ? static_cast<OutputType>(second(*it)) : get_default(i));
set_value(i, it != attr.end() ? static_cast<OutputType>(second(*it)) : default_value_extractor[i]);
}
query_count.fetch_add(rows, std::memory_order_relaxed);
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void HashedDictionary::getItemsImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
if (!sparse)
return getItemsAttrImpl<OutputType>(*std::get<CollectionPtrType<AttributeType>>(attribute.maps), ids, set_value, get_default);
return getItemsAttrImpl<OutputType>(*std::get<SparseCollectionPtrType<AttributeType>>(attribute.sparse_maps), ids, set_value, get_default);
return getItemsAttrImpl<AttributeType, OutputType>(*std::get<CollectionPtrType<AttributeType>>(attribute.maps), ids, set_value, default_value_extractor);
return getItemsAttrImpl<AttributeType, OutputType>(*std::get<SparseCollectionPtrType<AttributeType>>(attribute.sparse_maps), ids, set_value, default_value_extractor);
}
@ -740,58 +565,41 @@ bool HashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id
}
}
template <>
bool HashedDictionary::setAttributeValueImpl<String>(Attribute & attribute, const Key id, const String value)
{
const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
return setAttributeValueImpl<StringRef>(attribute, id, StringRef{string_in_arena, value.size()});
}
bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
{
switch (attribute.type)
bool result = false;
auto type_call = [&, this](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
return setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>());
case AttributeUnderlyingType::utUInt16:
return setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>());
case AttributeUnderlyingType::utUInt32:
return setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>());
case AttributeUnderlyingType::utUInt64:
return setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>());
case AttributeUnderlyingType::utUInt128:
return setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>());
case AttributeUnderlyingType::utInt8:
return setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>());
case AttributeUnderlyingType::utInt16:
return setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>());
case AttributeUnderlyingType::utInt32:
return setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>());
case AttributeUnderlyingType::utInt64:
return setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>());
case AttributeUnderlyingType::utFloat32:
return setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>());
case AttributeUnderlyingType::utFloat64:
return setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>());
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
return setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal32>());
case AttributeUnderlyingType::utDecimal64:
return setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal64>());
case AttributeUnderlyingType::utDecimal128:
return setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>());
case AttributeUnderlyingType::utString:
if (attribute.nullable_set)
{
const auto & string = value.get<String>();
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
if (!sparse)
if (value.isNull())
{
auto & map = *std::get<CollectionPtrType<StringRef>>(attribute.maps);
return map.insert({id, StringRef{string_in_arena, string.size()}}).second;
result = attribute.nullable_set->insert(id).second;
return;
}
else
{
auto & map = *std::get<SparseCollectionPtrType<StringRef>>(attribute.sparse_maps);
return map.insert({id, StringRef{string_in_arena, string.size()}}).second;
attribute.nullable_set->erase(id);
}
}
}
throw Exception{"Invalid attribute type", ErrorCodes::BAD_ARGUMENTS};
result = setAttributeValueImpl<AttributeType>(attribute, id, value.get<NearestFieldType<AttributeType>>());
};
callOnDictionaryAttributeType(attribute.type, type_call);
return result;
}
const HashedDictionary::Attribute & HashedDictionary::getAttribute(const std::string & attribute_name) const
@ -810,9 +618,18 @@ void HashedDictionary::has(const Attribute & attribute, const PaddedPODArray<Key
const auto rows = ext::size(ids);
for (const auto i : ext::range(0, rows))
{
out[i] = attr.find(ids[i]) != nullptr;
query_count.fetch_add(rows, std::memory_order_relaxed);
if (attribute.nullable_set && !out[i])
out[i] = attribute.nullable_set->find(ids[i]) != nullptr;
}
}
template <>
void HashedDictionary::has<String>(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
{
has<StringRef>(attribute, ids, out);
}
template <typename T, typename AttrType>
@ -833,50 +650,39 @@ PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds(const Attribute &
return getIdsAttrImpl<T>(*std::get<SparseCollectionPtrType<T>>(attribute.sparse_maps));
}
template <>
PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds<String>(const Attribute & attribute) const
{
return getIds<StringRef>(attribute);
}
PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds() const
{
const auto & attribute = attributes.front();
PaddedPODArray<HashedDictionary::Key> result;
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
return getIds<UInt8>(attribute);
case AttributeUnderlyingType::utUInt16:
return getIds<UInt16>(attribute);
case AttributeUnderlyingType::utUInt32:
return getIds<UInt32>(attribute);
case AttributeUnderlyingType::utUInt64:
return getIds<UInt64>(attribute);
case AttributeUnderlyingType::utUInt128:
return getIds<UInt128>(attribute);
case AttributeUnderlyingType::utInt8:
return getIds<Int8>(attribute);
case AttributeUnderlyingType::utInt16:
return getIds<Int16>(attribute);
case AttributeUnderlyingType::utInt32:
return getIds<Int32>(attribute);
case AttributeUnderlyingType::utInt64:
return getIds<Int64>(attribute);
case AttributeUnderlyingType::utFloat32:
return getIds<Float32>(attribute);
case AttributeUnderlyingType::utFloat64:
return getIds<Float64>(attribute);
case AttributeUnderlyingType::utString:
return getIds<StringRef>(attribute);
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
/// TODO: Check if order is satisfied
result = getIds<AttributeType>(attribute);
case AttributeUnderlyingType::utDecimal32:
return getIds<Decimal32>(attribute);
case AttributeUnderlyingType::utDecimal64:
return getIds<Decimal64>(attribute);
case AttributeUnderlyingType::utDecimal128:
return getIds<Decimal128>(attribute);
}
return PaddedPODArray<Key>();
if (attribute.nullable_set)
{
for (const auto& value: *attribute.nullable_set)
result.push_back(value.getKey());
}
};
callOnDictionaryAttributeType(attribute.type, type_call);
return result;
}
BlockInputStreamPtr HashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<HashedDictionary, Key>;
using BlockInputStreamType = DictionaryBlockInputStream<Key>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds(), column_names);
}

View File

@ -3,15 +3,18 @@
#include <atomic>
#include <memory>
#include <variant>
#include <optional>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Core/Block.h>
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashSet.h>
#include <sparsehash/sparse_hash_map>
#include <ext/range.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryHelpers.h"
/** This dictionary stores all content in a hash table in memory
* (a separate Key -> Value map for each attribute)
@ -20,7 +23,6 @@
namespace DB
{
using BlockPtr = std::shared_ptr<Block>;
class HashedDictionary final : public IDictionary
{
@ -66,77 +68,16 @@ public:
void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override;
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const override;
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
const;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE & def, ResultArrayType<TYPE> & out) \
const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
void isInVectorVector(
const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
@ -162,9 +103,13 @@ private:
template <typename Value>
using SparseCollectionPtrType = std::unique_ptr<SparseCollectionType<Value>>;
using NullableSet = HashSet<Key, DefaultHash<Key>>;
struct Attribute final
{
AttributeUnderlyingType type;
std::optional<NullableSet> nullable_set;
std::variant<
UInt8,
UInt16,
@ -180,7 +125,7 @@ private:
Decimal128,
Float32,
Float64,
String>
StringRef>
null_values;
std::variant<
CollectionPtrType<UInt8>,
@ -235,14 +180,21 @@ private:
template <typename T>
void createAttributeImpl(Attribute & attribute, const Field & null_value);
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
template <typename OutputType, typename AttrType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename MapType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsAttrImpl(
const AttrType & attr, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
const MapType & attr,
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename T>
bool setAttributeValueImpl(Attribute & attribute, const Key id, const T value);

View File

@ -10,6 +10,8 @@
#include <common/StringRef.h>
#include "IDictionarySource.h"
#include <Dictionaries/DictionaryStructure.h>
#include <DataTypes/IDataType.h>
#include <Columns/ColumnsNumber.h>
#include <chrono>
#include <memory>
@ -20,15 +22,31 @@ namespace DB
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
extern const int TYPE_MISMATCH;
}
struct IDictionaryBase;
using DictionaryPtr = std::unique_ptr<IDictionaryBase>;
struct DictionaryStructure;
class ColumnString;
/** DictionaryKeyType provides IDictionary client information about
* which key type is supported by dictionary.
*
* Simple is for dictionaries that support UInt64 key column.
*
* Complex is for dictionaries that support any combination of key columns.
*
* Range is for dictionary that support combination of UInt64 key column,
* and numeric representable range key column.
*/
enum class DictionaryKeyType
{
simple,
complex,
range
};
/**
* Base class for Dictionaries implementation.
*/
struct IDictionaryBase : public IExternalLoadable
{
using Key = UInt64;
@ -85,6 +103,33 @@ struct IDictionaryBase : public IExternalLoadable
virtual bool isInjective(const std::string & attribute_name) const = 0;
/** Subclass must provide key type that is supported by dictionary.
* Client will use that key type to provide valid key columns for `getColumn` and `has` functions.
*/
virtual DictionaryKeyType getKeyType() const = 0;
/** Subclass must validate key columns and keys types
* and return column representation of dictionary attribute.
*
* Parameter default_values_column must be used to provide default values
* for keys that are not in dictionary. If null pointer is passed,
* then default attribute value must be used.
*/
virtual ColumnPtr getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const = 0;
/** Subclass must validate key columns and key types and return ColumnUInt8 that
* is bitmask representation of is key in dictionary or not.
* If key is in dictionary then value of associated row will be 1, otherwise 0.
*/
virtual ColumnUInt8::Ptr hasKeys(
const Columns & key_columns,
const DataTypes & key_types) const = 0;
virtual BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const = 0;
bool supportUpdates() const override { return true; }
@ -115,7 +160,6 @@ protected:
const String full_name;
};
struct IDictionary : IDictionaryBase
{
IDictionary(const StorageID & dict_id_) : IDictionaryBase(dict_id_) {}
@ -124,8 +168,7 @@ struct IDictionary : IDictionaryBase
virtual void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const = 0;
virtual void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const = 0;
/// TODO: Rewrite
/// Methods for hierarchy.
virtual void isInVectorVector(
@ -157,14 +200,4 @@ struct IDictionary : IDictionaryBase
}
};
/// Implicit conversions in dictGet functions is disabled.
inline void checkAttributeType(const IDictionaryBase * dictionary, const std::string & attribute_name,
AttributeUnderlyingType attribute_type, AttributeUnderlyingType to)
{
if (attribute_type != to)
throw Exception{ErrorCodes::TYPE_MISMATCH, "{}: type mismatch: attribute {} has type {}, expected {}",
dictionary->getDictionaryID().getNameForLogs(),
attribute_name, toString(attribute_type), toString(to)};
}
}

View File

@ -8,6 +8,7 @@
#include <Common/typeid_cast.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesDecimal.h>
#include <IO/WriteIntText.h>
#include <Poco/ByteOrder.h>
#include <Common/formatIPv6.h>
@ -16,6 +17,7 @@
#include <ext/range.h>
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
#include <Functions/FunctionHelpers.h>
namespace DB
{
@ -266,167 +268,75 @@ IPAddressDictionary::IPAddressDictionary(
calculateBytesAllocated();
}
#define DECLARE(TYPE) \
void IPAddressDictionary::get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
{ \
validateKeyTypes(key_types); \
\
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
const auto null_value = std::get<TYPE>(attribute.null_values); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, \
key_columns, \
[&](const size_t row, const auto value) { out[row] = value; }, \
[&](const size_t) { return null_value; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void IPAddressDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
{
validateKeyTypes(key_types);
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return null_value; });
}
#define DECLARE(TYPE) \
void IPAddressDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
validateKeyTypes(key_types); \
\
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, \
key_columns, \
[&](const size_t row, const auto value) { out[row] = value; }, \
[&](const size_t row) { return def[row]; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void IPAddressDictionary::getString(
ColumnPtr IPAddressDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const
const ColumnPtr default_values_column) const
{
validateKeyTypes(key_types);
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t row) { return def->getDataAt(row); });
}
#define DECLARE(TYPE) \
void IPAddressDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const \
{ \
validateKeyTypes(key_types); \
\
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void IPAddressDictionary::getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const
{
validateKeyTypes(key_types);
ColumnPtr result;
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return StringRef{def}; });
auto size = key_columns.front()->size();
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
const auto & null_value = std::get<AttributeType>(attribute.null_values);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, size);
if constexpr (std::is_same_v<AttributeType, String>)
{
auto * out = column.get();
getItemsImpl<ValueType, ValueType>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsImpl<ValueType, ValueType>(
attribute,
key_columns,
[&](const size_t row, const auto value) { return out[row] = value; },
default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
return result;
}
void IPAddressDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
ColumnUInt8::Ptr IPAddressDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
{
validateKeyTypes(key_types);
const auto first_column = key_columns.front();
const auto rows = first_column->size();
auto result = ColumnUInt8::create(rows);
auto& out = result->getData();
if (first_column->isNumeric())
{
uint8_t addrv6_buf[IPV6_BINARY_LENGTH];
@ -451,6 +361,8 @@ void IPAddressDictionary::has(const Columns & key_columns, const DataTypes & key
}
query_count.fetch_add(rows, std::memory_order_relaxed);
return result;
}
void IPAddressDictionary::createAttributes()
@ -652,6 +564,13 @@ void IPAddressDictionary::addAttributeSize(const Attribute & attribute)
bucket_count = vec.size();
}
template <>
void IPAddressDictionary::addAttributeSize<String>(const Attribute & attribute)
{
addAttributeSize<StringRef>(attribute);
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
}
void IPAddressDictionary::calculateBytesAllocated()
{
if (auto * ipv4_col = std::get_if<IPv4Container>(&ip_column))
@ -669,64 +588,18 @@ void IPAddressDictionary::calculateBytesAllocated()
for (const auto & attribute : attributes)
{
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
addAttributeSize<UInt8>(attribute);
break;
case AttributeUnderlyingType::utUInt16:
addAttributeSize<UInt16>(attribute);
break;
case AttributeUnderlyingType::utUInt32:
addAttributeSize<UInt32>(attribute);
break;
case AttributeUnderlyingType::utUInt64:
addAttributeSize<UInt64>(attribute);
break;
case AttributeUnderlyingType::utUInt128:
addAttributeSize<UInt128>(attribute);
break;
case AttributeUnderlyingType::utInt8:
addAttributeSize<Int8>(attribute);
break;
case AttributeUnderlyingType::utInt16:
addAttributeSize<Int16>(attribute);
break;
case AttributeUnderlyingType::utInt32:
addAttributeSize<Int32>(attribute);
break;
case AttributeUnderlyingType::utInt64:
addAttributeSize<Int64>(attribute);
break;
case AttributeUnderlyingType::utFloat32:
addAttributeSize<Float32>(attribute);
break;
case AttributeUnderlyingType::utFloat64:
addAttributeSize<Float64>(attribute);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
addAttributeSize<Decimal32>(attribute);
break;
case AttributeUnderlyingType::utDecimal64:
addAttributeSize<Decimal64>(attribute);
break;
case AttributeUnderlyingType::utDecimal128:
addAttributeSize<Decimal128>(attribute);
break;
addAttributeSize<AttributeType>(attribute);
};
case AttributeUnderlyingType::utString:
{
addAttributeSize<StringRef>(attribute);
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
break;
}
}
callOnDictionaryAttributeType(attribute.type, type_call);
}
}
template <typename T>
void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
@ -734,65 +607,27 @@ void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field
attribute.maps.emplace<ContainerType<T>>();
}
template <>
void IPAddressDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
{
attribute.null_values = null_value.isNull() ? String() : null_value.get<String>();
attribute.maps.emplace<ContainerType<StringRef>>();
attribute.string_arena = std::make_unique<Arena>();
}
IPAddressDictionary::Attribute IPAddressDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
{
Attribute attr{type, {}, {}, {}};
switch (type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
createAttributeImpl<UInt8>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt16:
createAttributeImpl<UInt16>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt32:
createAttributeImpl<UInt32>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt64:
createAttributeImpl<UInt64>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::utInt8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::utInt16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::utInt32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::utInt64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat64:
createAttributeImpl<Float64>(attr, null_value);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
createAttributeImpl<Decimal32>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
createAttributeImpl<AttributeType>(attr, null_value);
};
case AttributeUnderlyingType::utString:
{
attr.null_values = null_value.isNull() ? String() : null_value.get<String>();
attr.maps.emplace<ContainerType<StringRef>>();
attr.string_arena = std::make_unique<Arena>();
break;
}
}
callOnDictionaryAttributeType(type, type_call);
return attr;
}
@ -802,9 +637,12 @@ const uint8_t * IPAddressDictionary::getIPv6FromOffset(const IPAddressDictionary
return reinterpret_cast<const uint8_t *>(&ipv6_col[i * IPV6_BINARY_LENGTH]);
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto first_column = key_columns.front();
const auto rows = first_column->size();
@ -841,7 +679,7 @@ void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
set_value(i, static_cast<OutputType>(vec[row_idx[*found_it]]));
}
else
set_value(i, get_default(i));
set_value(i, default_value_extractor[i]);
}
return;
}
@ -876,13 +714,16 @@ void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
mask_column[*found_it] == mask))
set_value(i, static_cast<OutputType>(vec[row_idx[*found_it]]));
else
set_value(i, get_default(i));
set_value(i, default_value_extractor[i]);
}
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void IPAddressDictionary::getItemsImpl(
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto first_column = key_columns.front();
const auto rows = first_column->size();
@ -891,7 +732,7 @@ void IPAddressDictionary::getItemsImpl(
if (unlikely(key_columns.size() == 2))
{
getItemsByTwoKeyColumnsImpl<AttributeType, OutputType>(
attribute, key_columns, std::forward<ValueSetter>(set_value), std::forward<DefaultGetter>(get_default));
attribute, key_columns, std::forward<ValueSetter>(set_value), default_value_extractor);
query_count.fetch_add(rows, std::memory_order_relaxed);
return;
}
@ -909,7 +750,7 @@ void IPAddressDictionary::getItemsImpl(
if (found != ipNotFound())
set_value(i, static_cast<OutputType>(vec[*found]));
else
set_value(i, get_default(i));
set_value(i, default_value_extractor[i]);
}
}
else
@ -924,7 +765,7 @@ void IPAddressDictionary::getItemsImpl(
if (found != ipNotFound())
set_value(i, static_cast<OutputType>(vec[*found]));
else
set_value(i, get_default(i));
set_value(i, default_value_extractor[i]);
}
}
@ -940,45 +781,24 @@ void IPAddressDictionary::setAttributeValueImpl(Attribute & attribute, const T v
void IPAddressDictionary::setAttributeValue(Attribute & attribute, const Field & value)
{
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
return setAttributeValueImpl<UInt8>(attribute, value.get<UInt64>());
case AttributeUnderlyingType::utUInt16:
return setAttributeValueImpl<UInt16>(attribute, value.get<UInt64>());
case AttributeUnderlyingType::utUInt32:
return setAttributeValueImpl<UInt32>(attribute, value.get<UInt64>());
case AttributeUnderlyingType::utUInt64:
return setAttributeValueImpl<UInt64>(attribute, value.get<UInt64>());
case AttributeUnderlyingType::utUInt128:
return setAttributeValueImpl<UInt128>(attribute, value.get<UInt128>());
case AttributeUnderlyingType::utInt8:
return setAttributeValueImpl<Int8>(attribute, value.get<Int64>());
case AttributeUnderlyingType::utInt16:
return setAttributeValueImpl<Int16>(attribute, value.get<Int64>());
case AttributeUnderlyingType::utInt32:
return setAttributeValueImpl<Int32>(attribute, value.get<Int64>());
case AttributeUnderlyingType::utInt64:
return setAttributeValueImpl<Int64>(attribute, value.get<Int64>());
case AttributeUnderlyingType::utFloat32:
return setAttributeValueImpl<Float32>(attribute, value.get<Float64>());
case AttributeUnderlyingType::utFloat64:
return setAttributeValueImpl<Float64>(attribute, value.get<Float64>());
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
return setAttributeValueImpl<Decimal32>(attribute, value.get<Decimal32>());
case AttributeUnderlyingType::utDecimal64:
return setAttributeValueImpl<Decimal64>(attribute, value.get<Decimal64>());
case AttributeUnderlyingType::utDecimal128:
return setAttributeValueImpl<Decimal128>(attribute, value.get<Decimal128>());
case AttributeUnderlyingType::utString:
if constexpr (std::is_same_v<AttributeType, String>)
{
const auto & string = value.get<String>();
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
return setAttributeValueImpl<StringRef>(attribute, StringRef{string_in_arena, string.size()});
setAttributeValueImpl<StringRef>(attribute, StringRef{string_in_arena, string.size()});
}
}
else
{
setAttributeValueImpl<AttributeType>(attribute, value.get<NearestFieldType<AttributeType>>());
}
};
callOnDictionaryAttributeType(attribute.type, type_call);
}
const IPAddressDictionary::Attribute & IPAddressDictionary::getAttribute(const std::string & attribute_name) const
@ -1045,7 +865,7 @@ static auto keyViewGetter()
BlockInputStreamPtr IPAddressDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<IPAddressDictionary, UInt64>;
using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
const bool is_ipv4 = std::get_if<IPv4Container>(&ip_column) != nullptr;

View File

@ -16,6 +16,7 @@
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryHelpers.h"
namespace DB
{
@ -61,91 +62,16 @@ public:
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
}
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name,
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const;
const ColumnPtr default_values_column) const override;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const;
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
@ -211,17 +137,23 @@ private:
void calculateBytesAllocated();
template <typename T>
void createAttributeImpl(Attribute & attribute, const Field & null_value);
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
static Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsByTwoKeyColumnsImpl(
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void
getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename T>
void setAttributeValueImpl(Attribute & attribute, const T value);

View File

@ -5,6 +5,8 @@
#include <Columns/ColumnArray.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeArray.h>
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypesDecimal.h>
#include <numeric>
@ -92,6 +94,61 @@ bool IPolygonDictionary::isInjective(const std::string &) const
return false;
}
ColumnPtr IPolygonDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes &,
const ColumnPtr default_values_column) const
{
ColumnPtr result;
const auto index = getAttributeIndex(attribute_name);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
auto keys_size = key_columns.front()->size();
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
const auto & null_value = std::get<AttributeType>(null_values[index]);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<AttributeType, String>)
{
auto column_string = ColumnString::create();
auto * out = column.get();
getItemsImpl<String, StringRef>(
index,
key_columns,
[&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); },
default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsImpl<AttributeType, AttributeType>(
index,
key_columns,
[&](const size_t row, const auto value) { return out[row] = value; },
default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
return result;
}
BlockInputStreamPtr IPolygonDictionary::getBlockInputStream(const Names &, size_t) const
{
// TODO: In order for this to work one would first have to support retrieving arrays from dictionaries.
@ -255,8 +312,12 @@ std::vector<IPolygonDictionary::Point> IPolygonDictionary::extractPoints(const C
return result;
}
void IPolygonDictionary::has(const Columns & key_columns, const DataTypes &, PaddedPODArray<UInt8> & out) const
ColumnUInt8::Ptr IPolygonDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
{
auto size = key_columns.front()->size();
auto result = ColumnUInt8::create(size);
auto& out = result->getData();
size_t row = 0;
for (const auto & pt : extractPoints(key_columns))
{
@ -266,6 +327,8 @@ void IPolygonDictionary::has(const Columns & key_columns, const DataTypes &, Pad
}
query_count.fetch_add(row, std::memory_order_relaxed);
return result;
}
size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name) const
@ -276,152 +339,12 @@ size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name)
return it->second;
}
#define DECLARE(TYPE) \
void IPolygonDictionary::get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType<TYPE> & out) const \
{ \
const auto ind = getAttributeIndex(attribute_name); \
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
\
const auto null_value = std::get<TYPE>(null_values[ind]); \
\
getItemsImpl<TYPE, TYPE>( \
ind, \
key_columns, \
[&](const size_t row, const auto value) { out[row] = value; }, \
[&](const size_t) { return null_value; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void IPolygonDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ColumnString * out) const
{
const auto ind = getAttributeIndex(attribute_name);
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
const auto & null_value = StringRef{std::get<String>(null_values[ind])};
getItemsImpl<String, StringRef>(
ind,
key_columns,
[&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); },
[&](const size_t) { return null_value; });
}
#define DECLARE(TYPE) \
void IPolygonDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes &, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto ind = getAttributeIndex(attribute_name); \
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
ind, \
key_columns, \
[&](const size_t row, const auto value) { out[row] = value; }, \
[&](const size_t row) { return def[row]; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void IPolygonDictionary::getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes &,
const ColumnString * const def,
ColumnString * const out) const
{
const auto ind = getAttributeIndex(attribute_name);
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
getItemsImpl<String, StringRef>(
ind,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t row) { return def->getDataAt(row); });
}
#define DECLARE(TYPE) \
void IPolygonDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes &, \
const TYPE def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto ind = getAttributeIndex(attribute_name); \
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
ind, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void IPolygonDictionary::getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes &,
const String & def,
ColumnString * const out) const
{
const auto ind = getAttributeIndex(attribute_name);
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
getItemsImpl<String, StringRef>(
ind,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return StringRef{def}; });
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void IPolygonDictionary::getItemsImpl(
size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
size_t attribute_ind,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto points = extractPoints(key_columns);
@ -437,7 +360,7 @@ void IPolygonDictionary::getItemsImpl(
id = ids[id];
if (!found)
{
set_value(i, static_cast<OutputType>(get_default(i)));
set_value(i, static_cast<OutputType>(default_value_extractor[i]));
continue;
}
if constexpr (std::is_same<AttributeType, String>::value)

View File

@ -12,6 +12,7 @@
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryHelpers.h"
namespace DB
{
@ -78,101 +79,19 @@ public:
bool isInjective(const std::string & attribute_name) const override;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const override;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
/** Functions used to retrieve attributes of specific type by key. */
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes &, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes &,
const ColumnString * const def,
ColumnString * const out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes &, \
const TYPE def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const;
/** Checks whether or not a point can be found in one of the polygons in the dictionary.
* The check is performed for multiple points represented by columns of their x and y coordinates.
* The boolean result is written to out.
*/
// TODO: Refactor the whole dictionary design to perform stronger checks, i.e. make this an override.
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
/** Single coordinate type. */
using Coord = Float32;
/** A two-dimensional point in Euclidean coordinates. */
@ -224,8 +143,12 @@ private:
void appendNullValue(AttributeUnderlyingType type, const Field & value);
/** Helper function for retrieving the value of an attribute by key. */
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsImpl(size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
size_t attribute_ind,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
/** A mapping from the names of the attributes to their index in the two vectors defined below. */
std::map<std::string, size_t> attribute_index_by_name;

View File

@ -37,26 +37,6 @@ protected:
Block getBlock(size_t start, size_t length) const override;
private:
template <typename Type>
using DictionaryGetter = void (DictionaryType::*)(
const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, PaddedPODArray<Type> &) const;
template <typename Type>
using DictionaryDecimalGetter = void (DictionaryType::*)(
const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, DecimalPaddedPODArray<Type> &) const;
template <typename AttributeType, typename Getter>
ColumnPtr getColumnFromAttribute(
Getter getter,
const PaddedPODArray<Key> & ids_to_fill,
const PaddedPODArray<Int64> & dates,
const DictionaryAttribute & attribute,
const DictionaryType & concrete_dictionary) const;
ColumnPtr getColumnFromAttributeString(
const PaddedPODArray<Key> & ids_to_fill,
const PaddedPODArray<Int64> & dates,
const DictionaryAttribute & attribute,
const DictionaryType & concrete_dictionary) const;
template <typename T>
ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array) const;
@ -122,41 +102,6 @@ Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getBlock(
return fillBlock(block_ids, block_start_dates, block_end_dates);
}
template <typename DictionaryType, typename RangeType, typename Key>
template <typename AttributeType, typename Getter>
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttribute(
Getter getter,
const PaddedPODArray<Key> & ids_to_fill,
const PaddedPODArray<Int64> & dates,
const DictionaryAttribute & attribute,
const DictionaryType & concrete_dictionary) const
{
if constexpr (IsDecimalNumber<AttributeType>)
{
auto column = ColumnDecimal<AttributeType>::create(ids_to_fill.size(), 0); /// NOTE: There's wrong scale here, but it's unused.
(concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column->getData());
return column;
}
else
{
auto column_vector = ColumnVector<AttributeType>::create(ids_to_fill.size());
(concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column_vector->getData());
return column_vector;
}
}
template <typename DictionaryType, typename RangeType, typename Key>
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttributeString(
const PaddedPODArray<Key> & ids_to_fill,
const PaddedPODArray<Int64> & dates,
const DictionaryAttribute & attribute,
const DictionaryType & concrete_dictionary) const
{
auto column_string = ColumnString::create();
concrete_dictionary.getString(attribute.name, ids_to_fill, dates, column_string.get());
return column_string;
}
template <typename DictionaryType, typename RangeType, typename Key>
template <typename T>
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromPODArray(const PaddedPODArray<T> & array) const
@ -168,7 +113,6 @@ ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getCo
return column_vector;
}
template <typename DictionaryType, typename RangeType, typename Key>
template <typename DictionarySpecialAttributeType, typename T>
void RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::addSpecialColumn(
@ -216,68 +160,24 @@ Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::fillBlock
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
addSpecialColumn(structure.id, std::make_shared<DataTypeUInt64>(), "ID", names, ids_to_fill, columns);
auto ids_column = columns.back().column;
addSpecialColumn(structure.range_min, structure.range_max->type, "Range Start", names, block_start_dates, columns);
addSpecialColumn(structure.range_max, structure.range_max->type, "Range End", names, block_end_dates, columns);
auto date_key = makeDateKey(block_start_dates, block_end_dates);
auto date_column = getColumnFromPODArray(date_key);
for (const auto idx : ext::range(0, structure.attributes.size()))
{
const DictionaryAttribute & attribute = structure.attributes[idx];
if (names.find(attribute.name) != names.end())
{
ColumnPtr column;
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids_to_fill, date_key, attribute, *dictionary)
switch (attribute.underlying_type)
{
case AttributeUnderlyingType::utUInt8:
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
break;
case AttributeUnderlyingType::utUInt16:
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
break;
case AttributeUnderlyingType::utUInt32:
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
break;
case AttributeUnderlyingType::utUInt64:
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
break;
case AttributeUnderlyingType::utUInt128:
GET_COLUMN_FORM_ATTRIBUTE(UInt128);
break;
case AttributeUnderlyingType::utInt8:
GET_COLUMN_FORM_ATTRIBUTE(Int8);
break;
case AttributeUnderlyingType::utInt16:
GET_COLUMN_FORM_ATTRIBUTE(Int16);
break;
case AttributeUnderlyingType::utInt32:
GET_COLUMN_FORM_ATTRIBUTE(Int32);
break;
case AttributeUnderlyingType::utInt64:
GET_COLUMN_FORM_ATTRIBUTE(Int64);
break;
case AttributeUnderlyingType::utFloat32:
GET_COLUMN_FORM_ATTRIBUTE(Float32);
break;
case AttributeUnderlyingType::utFloat64:
GET_COLUMN_FORM_ATTRIBUTE(Float64);
break;
case AttributeUnderlyingType::utDecimal32:
GET_COLUMN_FORM_ATTRIBUTE(Decimal32);
break;
case AttributeUnderlyingType::utDecimal64:
GET_COLUMN_FORM_ATTRIBUTE(Decimal64);
break;
case AttributeUnderlyingType::utDecimal128:
GET_COLUMN_FORM_ATTRIBUTE(Decimal128);
break;
case AttributeUnderlyingType::utString:
column = getColumnFromAttributeString(ids_to_fill, date_key, attribute, *dictionary);
break;
}
#undef GET_COLUMN_FORM_ATTRIBUTE
ColumnPtr column = dictionary->getColumn(
attribute.name,
attribute.type,
{ids_column, date_column},
{std::make_shared<DataTypeUInt64>(), std::make_shared<DataTypeInt64>()},
nullptr);
columns.emplace_back(column, attribute.type, attribute.name);
}
}

View File

@ -5,6 +5,8 @@
#include <ext/range.h>
#include "DictionaryFactory.h"
#include "RangeDictionaryBlockInputStream.h"
#include <Interpreters/castColumn.h>
#include <DataTypes/DataTypesDecimal.h>
namespace
{
@ -50,6 +52,7 @@ namespace ErrorCodes
extern const int DICTIONARY_IS_EMPTY;
extern const int TYPE_MISMATCH;
extern const int UNSUPPORTED_METHOD;
extern const int NOT_IMPLEMENTED;
}
bool RangeHashedDictionary::Range::isCorrectDate(const RangeStorageType & date)
@ -85,66 +88,101 @@ RangeHashedDictionary::RangeHashedDictionary(
calculateBytesAllocated();
}
#define DECLARE_MULTIPLE_GETTER(TYPE) \
void RangeHashedDictionary::get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<RangeStorageType> & dates, \
ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::ut##TYPE); \
getItems<TYPE>(attribute, ids, dates, out); \
}
DECLARE_MULTIPLE_GETTER(UInt8)
DECLARE_MULTIPLE_GETTER(UInt16)
DECLARE_MULTIPLE_GETTER(UInt32)
DECLARE_MULTIPLE_GETTER(UInt64)
DECLARE_MULTIPLE_GETTER(UInt128)
DECLARE_MULTIPLE_GETTER(Int8)
DECLARE_MULTIPLE_GETTER(Int16)
DECLARE_MULTIPLE_GETTER(Int32)
DECLARE_MULTIPLE_GETTER(Int64)
DECLARE_MULTIPLE_GETTER(Float32)
DECLARE_MULTIPLE_GETTER(Float64)
DECLARE_MULTIPLE_GETTER(Decimal32)
DECLARE_MULTIPLE_GETTER(Decimal64)
DECLARE_MULTIPLE_GETTER(Decimal128)
#undef DECLARE_MULTIPLE_GETTER
void RangeHashedDictionary::getString(
ColumnPtr RangeHashedDictionary::getColumn(
const std::string & attribute_name,
const PaddedPODArray<Key> & ids,
const PaddedPODArray<RangeStorageType> & dates,
ColumnString * out) const
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const
{
const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::utString);
const auto & attr = *std::get<Ptr<StringRef>>(attribute.maps);
const auto & null_value = std::get<String>(attribute.null_values);
ColumnPtr result;
for (const auto i : ext::range(0, ids.size()))
const auto & attribute = getAttribute(attribute_name);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
auto keys_size = key_columns.front()->size();
/// Cast second column to storage type
Columns modified_key_columns = key_columns;
auto range_storage_column = key_columns[1];
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types[1], ""};
auto range_column_storage_type = std::make_shared<DataTypeInt64>();
modified_key_columns[1] = castColumnAccurate(column_to_cast, range_column_storage_type);
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to = nullptr;
if (attribute.is_nullable)
{
const auto * it = attr.find(ids[i]);
if (it)
{
const auto date = dates[i];
const auto & ranges_and_values = it->getMapped();
const auto val_it
= std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<StringRef> & v)
{
return v.range.contains(date);
});
col_null_map_to = ColumnUInt8::create(keys_size, false);
vec_null_map_to = &col_null_map_to->getData();
}
const auto string_ref = val_it != std::end(ranges_and_values) ? val_it->value : StringRef{null_value};
out->insertData(string_ref.data, string_ref.size);
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<AttributeType, String>)
{
auto * out = column.get();
getItemsImpl<ValueType, ValueType>(
attribute,
modified_key_columns,
[&](const size_t row, const StringRef value, bool is_null)
{
if (attribute.is_nullable)
(*vec_null_map_to)[row] = is_null;
out->insertData(value.data, value.size);
},
default_value_extractor);
}
else
out->insertData(null_value.data(), null_value.size());
{
auto & out = column->getData();
getItemsImpl<ValueType, ValueType>(
attribute,
modified_key_columns,
[&](const size_t row, const auto value, bool is_null)
{
if (attribute.is_nullable)
(*vec_null_map_to)[row] = is_null;
out[row] = value;
},
default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
if (attribute.is_nullable)
{
result = ColumnNullable::create(result, std::move(col_null_map_to));
}
query_count.fetch_add(ids.size(), std::memory_order_relaxed);
return result;
}
ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Columns &, const DataTypes &) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"Has not supported", getDictionaryID().getNameForLogs());
}
void RangeHashedDictionary::createAttributes()
{
@ -154,7 +192,7 @@ void RangeHashedDictionary::createAttributes()
for (const auto & attribute : dict_struct.attributes)
{
attribute_index_by_name.emplace(attribute.name, attributes.size());
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
attributes.push_back(createAttribute(attribute, attribute.null_value));
if (attribute.hierarchical)
throw Exception{ErrorCodes::BAD_ARGUMENTS, "Hierarchical attributes not supported by {} dictionary.",
@ -220,66 +258,27 @@ void RangeHashedDictionary::addAttributeSize(const Attribute & attribute)
bucket_count = map_ref->getBufferSizeInCells();
}
template <>
void RangeHashedDictionary::addAttributeSize<String>(const Attribute & attribute)
{
addAttributeSize<StringRef>(attribute);
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
}
void RangeHashedDictionary::calculateBytesAllocated()
{
bytes_allocated += attributes.size() * sizeof(attributes.front());
for (const auto & attribute : attributes)
{
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
addAttributeSize<UInt8>(attribute);
break;
case AttributeUnderlyingType::utUInt16:
addAttributeSize<UInt16>(attribute);
break;
case AttributeUnderlyingType::utUInt32:
addAttributeSize<UInt32>(attribute);
break;
case AttributeUnderlyingType::utUInt64:
addAttributeSize<UInt64>(attribute);
break;
case AttributeUnderlyingType::utUInt128:
addAttributeSize<UInt128>(attribute);
break;
case AttributeUnderlyingType::utInt8:
addAttributeSize<Int8>(attribute);
break;
case AttributeUnderlyingType::utInt16:
addAttributeSize<Int16>(attribute);
break;
case AttributeUnderlyingType::utInt32:
addAttributeSize<Int32>(attribute);
break;
case AttributeUnderlyingType::utInt64:
addAttributeSize<Int64>(attribute);
break;
case AttributeUnderlyingType::utFloat32:
addAttributeSize<Float32>(attribute);
break;
case AttributeUnderlyingType::utFloat64:
addAttributeSize<Float64>(attribute);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
addAttributeSize<AttributeType>(attribute);
};
case AttributeUnderlyingType::utDecimal32:
addAttributeSize<Decimal32>(attribute);
break;
case AttributeUnderlyingType::utDecimal64:
addAttributeSize<Decimal64>(attribute);
break;
case AttributeUnderlyingType::utDecimal128:
addAttributeSize<Decimal128>(attribute);
break;
case AttributeUnderlyingType::utString:
{
addAttributeSize<StringRef>(attribute);
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
break;
}
}
callOnDictionaryAttributeType(attribute.type, type_call);
}
}
@ -290,125 +289,80 @@ void RangeHashedDictionary::createAttributeImpl(Attribute & attribute, const Fie
attribute.maps = std::make_unique<Collection<T>>();
}
RangeHashedDictionary::Attribute
RangeHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
template <>
void RangeHashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
{
Attribute attr{type, {}, {}, {}};
attribute.string_arena = std::make_unique<Arena>();
const String & string = null_value.get<String>();
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
attribute.maps = std::make_unique<Collection<StringRef>>();
}
switch (type)
RangeHashedDictionary::Attribute
RangeHashedDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
{
Attribute attr{attribute.underlying_type, attribute.is_nullable, {}, {}, {}};
auto type_call = [&](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
createAttributeImpl<UInt8>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt16:
createAttributeImpl<UInt16>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt32:
createAttributeImpl<UInt32>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt64:
createAttributeImpl<UInt64>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::utInt8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::utInt16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::utInt32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::utInt64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat64:
createAttributeImpl<Float64>(attr, null_value);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
createAttributeImpl<AttributeType>(attr, null_value);
};
case AttributeUnderlyingType::utDecimal32:
createAttributeImpl<Decimal32>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
case AttributeUnderlyingType::utString:
{
attr.null_values = null_value.get<String>();
attr.maps = std::make_unique<Collection<StringRef>>();
attr.string_arena = std::make_unique<Arena>();
break;
}
}
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
return attr;
}
template <typename OutputType>
void RangeHashedDictionary::getItems(
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
const PaddedPODArray<RangeStorageType> & dates,
PaddedPODArray<OutputType> & out) const
{
if (false) {} // NOLINT
#define DISPATCH(TYPE) else if (attribute.type == AttributeUnderlyingType::ut##TYPE) getItemsImpl<TYPE, OutputType>(attribute, ids, dates, out);
DISPATCH(UInt8)
DISPATCH(UInt16)
DISPATCH(UInt32)
DISPATCH(UInt64)
DISPATCH(UInt128)
DISPATCH(Int8)
DISPATCH(Int16)
DISPATCH(Int32)
DISPATCH(Int64)
DISPATCH(Float32)
DISPATCH(Float64)
DISPATCH(Decimal32)
DISPATCH(Decimal64)
DISPATCH(Decimal128)
#undef DISPATCH
else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
}
template <typename AttributeType, typename OutputType>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void RangeHashedDictionary::getItemsImpl(
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
const PaddedPODArray<RangeStorageType> & dates,
PaddedPODArray<OutputType> & out) const
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
const auto null_value = std::get<AttributeType>(attribute.null_values);
PaddedPODArray<Key> key_backup_storage;
PaddedPODArray<RangeStorageType> range_backup_storage;
for (const auto i : ext::range(0, ids.size()))
const PaddedPODArray<Key> & ids = getColumnVectorData(this, key_columns[0], key_backup_storage);
const PaddedPODArray<RangeStorageType> & dates = getColumnVectorData(this, key_columns[1], range_backup_storage);
const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
for (const auto row : ext::range(0, ids.size()))
{
const auto it = attr.find(ids[i]);
const auto it = attr.find(ids[row]);
if (it)
{
const auto date = dates[i];
const auto date = dates[row];
const auto & ranges_and_values = it->getMapped();
const auto val_it
= std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<AttributeType> & v)
{
return v.range.contains(date);
});
const auto val_it = std::find_if(
std::begin(ranges_and_values),
std::end(ranges_and_values),
[date](const Value<AttributeType> & v)
{
return v.range.contains(date);
});
out[i] = static_cast<OutputType>(val_it != std::end(ranges_and_values) ? val_it->value : null_value); // NOLINT
if (val_it != std::end(ranges_and_values))
{
auto& value = val_it->value;
if (value)
set_value(row, static_cast<OutputType>(*value), false); // NOLINT
else
set_value(row, default_value_extractor[row], true);
}
else
{
set_value(row, default_value_extractor[row], false);
}
}
else
{
out[i] = static_cast<OutputType>(null_value); // NOLINT
set_value(row, default_value_extractor[row], false);
}
}
@ -417,9 +371,32 @@ void RangeHashedDictionary::getItemsImpl(
template <typename T>
void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const T value)
void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const Field & value)
{
auto & map = *std::get<Ptr<T>>(attribute.maps);
using ValueType = std::conditional_t<std::is_same_v<T, String>, StringRef, T>;
auto & map = *std::get<Ptr<ValueType>>(attribute.maps);
Value<ValueType> value_to_insert;
if (attribute.is_nullable && value.isNull())
{
value_to_insert = { range, {} };
}
else
{
if constexpr (std::is_same_v<T, String>)
{
const auto & string = value.get<String>();
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
const StringRef string_ref{string_in_arena, string.size()};
value_to_insert = Value<ValueType>{ range, { string_ref }};
}
else
{
value_to_insert = Value<ValueType>{ range, { value.get<NearestFieldType<ValueType>>() }};
}
}
const auto it = map.find(id);
if (it)
@ -427,92 +404,28 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const K
auto & values = it->getMapped();
const auto insert_it
= std::lower_bound(std::begin(values), std::end(values), range, [](const Value<T> & lhs, const Range & rhs_range)
= std::lower_bound(std::begin(values), std::end(values), range, [](const Value<ValueType> & lhs, const Range & rhs_range)
{
return lhs.range < rhs_range;
});
values.insert(insert_it, Value<T>{range, value});
values.insert(insert_it, std::move(value_to_insert));
}
else
map.insert({id, Values<T>{Value<T>{range, value}}});
map.insert({id, Values<ValueType>{std::move(value_to_insert)}});
}
void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value)
{
switch (attribute.type)
auto type_call = [&](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
setAttributeValueImpl<UInt8>(attribute, id, range, value.get<UInt64>());
break;
case AttributeUnderlyingType::utUInt16:
setAttributeValueImpl<UInt16>(attribute, id, range, value.get<UInt64>());
break;
case AttributeUnderlyingType::utUInt32:
setAttributeValueImpl<UInt32>(attribute, id, range, value.get<UInt64>());
break;
case AttributeUnderlyingType::utUInt64:
setAttributeValueImpl<UInt64>(attribute, id, range, value.get<UInt64>());
break;
case AttributeUnderlyingType::utUInt128:
setAttributeValueImpl<UInt128>(attribute, id, range, value.get<UInt128>());
break;
case AttributeUnderlyingType::utInt8:
setAttributeValueImpl<Int8>(attribute, id, range, value.get<Int64>());
break;
case AttributeUnderlyingType::utInt16:
setAttributeValueImpl<Int16>(attribute, id, range, value.get<Int64>());
break;
case AttributeUnderlyingType::utInt32:
setAttributeValueImpl<Int32>(attribute, id, range, value.get<Int64>());
break;
case AttributeUnderlyingType::utInt64:
setAttributeValueImpl<Int64>(attribute, id, range, value.get<Int64>());
break;
case AttributeUnderlyingType::utFloat32:
setAttributeValueImpl<Float32>(attribute, id, range, value.get<Float64>());
break;
case AttributeUnderlyingType::utFloat64:
setAttributeValueImpl<Float64>(attribute, id, range, value.get<Float64>());
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
setAttributeValueImpl<Decimal32>(attribute, id, range, value.get<Decimal32>());
break;
case AttributeUnderlyingType::utDecimal64:
setAttributeValueImpl<Decimal64>(attribute, id, range, value.get<Decimal64>());
break;
case AttributeUnderlyingType::utDecimal128:
setAttributeValueImpl<Decimal128>(attribute, id, range, value.get<Decimal128>());
break;
setAttributeValueImpl<AttributeType>(attribute, id, range, value);
};
case AttributeUnderlyingType::utString:
{
auto & map = *std::get<Ptr<StringRef>>(attribute.maps);
const auto & string = value.get<String>();
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
const StringRef string_ref{string_in_arena, string.size()};
auto * it = map.find(id);
if (it)
{
auto & values = it->getMapped();
const auto insert_it = std::lower_bound(
std::begin(values), std::end(values), range, [](const Value<StringRef> & lhs, const Range & rhs_range)
{
return lhs.range < rhs_range;
});
values.insert(insert_it, Value<StringRef>{range, string_ref});
}
else
map.insert({id, Values<StringRef>{Value<StringRef>{range, string_ref}}});
break;
}
}
callOnDictionaryAttributeType(attribute.type, type_call);
}
const RangeHashedDictionary::Attribute & RangeHashedDictionary::getAttribute(const std::string & attribute_name) const
@ -541,55 +454,18 @@ void RangeHashedDictionary::getIdsAndDates(
{
const auto & attribute = attributes.front();
switch (attribute.type)
auto type_call = [&](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
getIdsAndDates<UInt8>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utUInt16:
getIdsAndDates<UInt16>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utUInt32:
getIdsAndDates<UInt32>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utUInt64:
getIdsAndDates<UInt64>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utUInt128:
getIdsAndDates<UInt128>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utInt8:
getIdsAndDates<Int8>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utInt16:
getIdsAndDates<Int16>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utInt32:
getIdsAndDates<Int32>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utInt64:
getIdsAndDates<Int64>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utFloat32:
getIdsAndDates<Float32>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utFloat64:
getIdsAndDates<Float64>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utString:
getIdsAndDates<StringRef>(attribute, ids, start_dates, end_dates);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
getIdsAndDates<Decimal32>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utDecimal64:
getIdsAndDates<Decimal64>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utDecimal128:
getIdsAndDates<Decimal128>(attribute, ids, start_dates, end_dates);
break;
}
if constexpr (std::is_same_v<AttributeType, String>)
getIdsAndDates<StringRef>(attribute, ids, start_dates, end_dates);
else
getIdsAndDates<AttributeType>(attribute, ids, start_dates, end_dates);
};
callOnDictionaryAttributeType(attribute.type, type_call);
}
template <typename T, typename RangeType>

View File

@ -1,16 +1,18 @@
#pragma once
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Common/HashTable/HashMap.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include <atomic>
#include <memory>
#include <variant>
#include <optional>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashSet.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryHelpers.h"
namespace DB
{
@ -52,38 +54,18 @@ public:
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
}
typedef Int64 RangeStorageType;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::range; }
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const override;
#define DECLARE_MULTIPLE_GETTER(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<RangeStorageType> & dates, \
ResultArrayType<TYPE> & out) const;
DECLARE_MULTIPLE_GETTER(UInt8)
DECLARE_MULTIPLE_GETTER(UInt16)
DECLARE_MULTIPLE_GETTER(UInt32)
DECLARE_MULTIPLE_GETTER(UInt64)
DECLARE_MULTIPLE_GETTER(UInt128)
DECLARE_MULTIPLE_GETTER(Int8)
DECLARE_MULTIPLE_GETTER(Int16)
DECLARE_MULTIPLE_GETTER(Int32)
DECLARE_MULTIPLE_GETTER(Int64)
DECLARE_MULTIPLE_GETTER(Float32)
DECLARE_MULTIPLE_GETTER(Float64)
DECLARE_MULTIPLE_GETTER(Decimal32)
DECLARE_MULTIPLE_GETTER(Decimal64)
DECLARE_MULTIPLE_GETTER(Decimal128)
#undef DECLARE_MULTIPLE_GETTER
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
void getString(
const std::string & attribute_name,
const PaddedPODArray<Key> & ids,
const PaddedPODArray<RangeStorageType> & dates,
ColumnString * out) const;
using RangeStorageType = Int64;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
@ -101,7 +83,7 @@ private:
struct Value final
{
Range range;
T value;
std::optional<T> value;
};
template <typename T>
@ -111,10 +93,14 @@ private:
template <typename T>
using Ptr = std::unique_ptr<Collection<T>>;
using NullableSet = HashSet<Key, DefaultHash<Key>>;
struct Attribute final
{
public:
AttributeUnderlyingType type;
bool is_nullable;
std::variant<
UInt8,
UInt16,
@ -130,7 +116,7 @@ private:
Decimal128,
Float32,
Float64,
String>
StringRef>
null_values;
std::variant<
Ptr<UInt8>,
@ -162,30 +148,21 @@ private:
void calculateBytesAllocated();
template <typename T>
void createAttributeImpl(Attribute & attribute, const Field & null_value);
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
template <typename OutputType>
void getItems(
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
const PaddedPODArray<RangeStorageType> & dates,
PaddedPODArray<OutputType> & out) const;
template <typename AttributeType, typename OutputType>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
const PaddedPODArray<RangeStorageType> & dates,
PaddedPODArray<OutputType> & out) const;
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename T>
void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const T value);
static void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const Field & value);
void setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value);
static void setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value);
const Attribute & getAttribute(const std::string & attribute_name) const;

View File

@ -22,7 +22,8 @@
#include <filesystem>
#include <city.h>
#include <fcntl.h>
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypesDecimal.h>
namespace ProfileEvents
{
@ -445,7 +446,7 @@ void SSDCachePartition::flush()
template <typename Out, typename GetDefault>
void SSDCachePartition::getValue(const size_t attribute_index, const PaddedPODArray<UInt64> & ids,
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & default_value_extractor,
std::chrono::system_clock::time_point now) const
{
auto set_value = [&](const size_t index, ReadBuffer & buf)
@ -456,7 +457,7 @@ void SSDCachePartition::getValue(const size_t attribute_index, const PaddedPODAr
if (metadata.expiresAt() > now)
{
if (metadata.isDefault())
out[index] = get_default(index);
out[index] = default_value_extractor[index];
else
{
ignoreFromBufferToAttributeIndex(attribute_index, buf);
@ -939,14 +940,14 @@ SSDCacheStorage::~SSDCacheStorage()
template <typename Out, typename GetDefault>
void SSDCacheStorage::getValue(const size_t attribute_index, const PaddedPODArray<UInt64> & ids,
ResultArrayType<Out> & out, std::unordered_map<Key, std::vector<size_t>> & not_found,
GetDefault & get_default, std::chrono::system_clock::time_point now) const
GetDefault & default_value_extractor, std::chrono::system_clock::time_point now) const
{
std::vector<bool> found(ids.size(), false);
{
std::shared_lock lock(rw_lock);
for (const auto & partition : partitions)
partition->getValue<Out>(attribute_index, ids, out, found, get_default, now);
partition->getValue<Out>(attribute_index, ids, out, found, default_value_extractor, now);
}
for (size_t i = 0; i < ids.size(); ++i)
@ -1327,102 +1328,62 @@ SSDCacheDictionary::SSDCacheDictionary(
createAttributes();
}
#define DECLARE(TYPE) \
void SSDCacheDictionary::get##TYPE( \
const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \
{ \
const auto index = getAttributeIndex(attribute_name); \
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
const auto null_value = std::get<TYPE>(null_values[index]); /* NOLINT */ \
getItemsNumberImpl<TYPE, TYPE>(index, ids, out, [&](const size_t) { return null_value; }); /* NOLINT */ \
}
ColumnPtr SSDCacheDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes &,
const ColumnPtr default_values_column) const
{
ColumnPtr result;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
PaddedPODArray<Key> backup_storage;
const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
auto keys_size = ids.size();
#define DECLARE(TYPE) \
void SSDCacheDictionary::get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto index = getAttributeIndex(attribute_name); \
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
getItemsNumberImpl<TYPE, TYPE>( \
index, \
ids, \
out, \
[&](const size_t row) { return def[row]; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
const auto index = getAttributeIndex(attribute_name);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
#define DECLARE(TYPE) \
void SSDCacheDictionary::get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const TYPE def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto index = getAttributeIndex(attribute_name); \
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
getItemsNumberImpl<TYPE, TYPE>( \
index, \
ids, \
out, \
[&](const size_t) { return def; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
const auto & null_value = std::get<AttributeType>(null_values[index]);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<AttributeType, String>)
{
getItemsStringImpl(index, ids, column.get(), default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsNumberImpl<AttributeType, AttributeType>(index, ids, out, default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
return result;
}
template <typename AttributeType, typename OutputType, typename DefaultGetter>
void SSDCacheDictionary::getItemsNumberImpl(
const size_t attribute_index, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
const size_t attribute_index,
const PaddedPODArray<Key> & ids,
ResultArrayType<OutputType> & out,
DefaultGetter & default_value_extractor) const
{
const auto now = std::chrono::system_clock::now();
std::unordered_map<Key, std::vector<size_t>> not_found_ids;
storage.getValue<OutputType>(attribute_index, ids, out, not_found_ids, get_default, now);
storage.getValue<OutputType>(attribute_index, ids, out, not_found_ids, default_value_extractor, now);
if (not_found_ids.empty())
return;
@ -1440,42 +1401,17 @@ void SSDCacheDictionary::getItemsNumberImpl(
[&](const size_t id)
{
for (const size_t row : not_found_ids[id])
out[row] = get_default(row);
out[row] = default_value_extractor[row];
},
getLifetime());
}
void SSDCacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
{
const auto index = getAttributeIndex(attribute_name);
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
const auto null_value = StringRef{std::get<String>(null_values[index])};
getItemsStringImpl(index, ids, out, [&](const size_t) { return null_value; });
}
void SSDCacheDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
{
const auto index = getAttributeIndex(attribute_name);
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
getItemsStringImpl(index, ids, out, [&](const size_t row) { return def->getDataAt(row); });
}
void SSDCacheDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
{
const auto index = getAttributeIndex(attribute_name);
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
getItemsStringImpl(index, ids, out, [&](const size_t) { return StringRef{def}; });
}
template <typename DefaultGetter>
void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const PaddedPODArray<Key> & ids,
ColumnString * out, DefaultGetter && get_default) const
void SSDCacheDictionary::getItemsStringImpl(
const size_t attribute_index,
const PaddedPODArray<Key> & ids,
ColumnString * out,
DefaultGetter & default_value_extractor) const
{
const auto now = std::chrono::system_clock::now();
@ -1494,7 +1430,7 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const
{
if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
{
auto to_insert = get_default(row);
auto to_insert = default_value_extractor[row];
out->insertData(to_insert.data, to_insert.size);
++default_index;
}
@ -1525,7 +1461,7 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const
const auto & id = ids[row];
if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
{
auto to_insert = get_default(row);
auto to_insert = default_value_extractor[row];
out->insertData(to_insert.data, to_insert.size);
++default_index;
}
@ -1539,20 +1475,30 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const
}
else
{
auto to_insert = get_default(row);
auto to_insert = default_value_extractor[row];
out->insertData(to_insert.data, to_insert.size);
}
}
}
void SSDCacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
ColumnUInt8::Ptr SSDCacheDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
{
PaddedPODArray<Key> backup_storage;
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
auto result = ColumnUInt8::create(ext::size(ids));
auto& out = result->getData();
const auto rows = ext::size(ids);
for (const auto row : ext::range(0, rows))
out[row] = false;
const auto now = std::chrono::system_clock::now();
std::unordered_map<Key, std::vector<size_t>> not_found_ids;
storage.has(ids, out, not_found_ids, now);
if (not_found_ids.empty())
return;
return result;
std::vector<Key> required_ids(not_found_ids.size());
std::transform(std::begin(not_found_ids), std::end(not_found_ids), std::begin(required_ids), [](const auto & pair) { return pair.first; });
@ -1571,11 +1517,13 @@ void SSDCacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UIn
out[row] = false;
},
getLifetime());
return result;
}
BlockInputStreamPtr SSDCacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<SSDCacheDictionary, Key>;
using BlockInputStreamType = DictionaryBlockInputStream<Key>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, storage.getCachedIds(), column_names);
}

View File

@ -2,11 +2,15 @@
#if defined(__linux__) || defined(__FreeBSD__)
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include <atomic>
#include <chrono>
#include <list>
#include <shared_mutex>
#include <variant>
#include <vector>
#include <Poco/Logger.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Common/ArenaWithFreeLists.h>
@ -16,12 +20,11 @@
#include <Core/Block.h>
#include <Dictionaries/BucketCache.h>
#include <IO/HashingWriteBuffer.h>
#include <list>
#include <pcg_random.hpp>
#include <Poco/Logger.h>
#include <shared_mutex>
#include <variant>
#include <vector>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryHelpers.h"
namespace DB
{
@ -109,7 +112,7 @@ public:
template <typename Out, typename GetDefault>
void getValue(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & default_value_extractor,
std::chrono::system_clock::time_point now) const;
void getString(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
@ -232,7 +235,7 @@ public:
template <typename Out, typename GetDefault>
void getValue(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
ResultArrayType<Out> & out, std::unordered_map<Key, std::vector<size_t>> & not_found,
GetDefault & get_default, std::chrono::system_clock::time_point now) const;
GetDefault & default_value_extractor, std::chrono::system_clock::time_point now) const;
void getString(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
StringRefs & refs, ArenaWithFreeLists & arena, std::unordered_map<Key, std::vector<size_t>> & not_found,
@ -351,77 +354,20 @@ public:
std::exception_ptr getLastException() const override { return storage.getLastException(); }
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const override;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
template <typename T>
using ResultArrayType = SSDCacheStorage::ResultArrayType<T>;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * def, ColumnString * out)
const;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * out) const;
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
@ -434,11 +380,17 @@ private:
template <typename AttributeType, typename OutputType, typename DefaultGetter>
void getItemsNumberImpl(
size_t attribute_index, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
size_t attribute_index,
const PaddedPODArray<Key> & ids,
ResultArrayType<OutputType> & out,
DefaultGetter & default_value_extractor) const;
template <typename DefaultGetter>
void getItemsStringImpl(size_t attribute_index, const PaddedPODArray<Key> & ids,
ColumnString * out, DefaultGetter && get_default) const;
void getItemsStringImpl(
size_t attribute_index,
const PaddedPODArray<Key> & ids,
ColumnString * out,
DefaultGetter & default_value_extractor) const;
const std::string name;
const DictionaryStructure dict_struct;

View File

@ -9,6 +9,7 @@
#include <Common/ProfilingScopedRWLock.h>
#include <Common/MemorySanitizer.h>
#include <DataStreams/IBlockInputStream.h>
#include <DataTypes/DataTypesDecimal.h>
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
#include <IO/AIO.h>
@ -23,7 +24,7 @@
#include <filesystem>
#include <city.h>
#include <fcntl.h>
#include <Functions/FunctionHelpers.h>
namespace ProfileEvents
{
@ -461,8 +462,12 @@ void SSDComplexKeyCachePartition::flush()
template <typename Out, typename GetDefault>
void SSDComplexKeyCachePartition::getValue(
const size_t attribute_index, const Columns & key_columns, const DataTypes & key_types,
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
const size_t attribute_index,
const Columns & key_columns,
const DataTypes & key_types,
ResultArrayType<Out> & out,
std::vector<bool> & found,
GetDefault & default_value_extractor,
std::chrono::system_clock::time_point now) const
{
auto set_value = [&](const size_t index, ReadBuffer & buf)
@ -474,7 +479,7 @@ void SSDComplexKeyCachePartition::getValue(
if (metadata.expiresAt() > now)
{
if (metadata.isDefault())
out[index] = get_default(index);
out[index] = default_value_extractor[index];
else
{
ignoreFromBufferToAttributeIndex(attribute_index, buf);
@ -520,7 +525,7 @@ void SSDComplexKeyCachePartition::getString(const size_t attribute_index,
getImpl(key_columns, key_types, set_value, found);
}
void SSDComplexKeyCachePartition::has(
void SSDComplexKeyCachePartition::hasKeys(
const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
std::vector<bool> & found, std::chrono::system_clock::time_point now) const
{
@ -1018,7 +1023,7 @@ void SSDComplexKeyCacheStorage::getString(
hit_count.fetch_add(n - count_not_found, std::memory_order_release);
}
void SSDComplexKeyCacheStorage::has(
void SSDComplexKeyCacheStorage::hasKeys(
const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
std::unordered_map<KeyRef, std::vector<size_t>> & not_found,
TemporalComplexKeysPool & not_found_pool, std::chrono::system_clock::time_point now) const
@ -1031,7 +1036,7 @@ void SSDComplexKeyCacheStorage::has(
{
std::shared_lock lock(rw_lock);
for (const auto & partition : partitions)
partition->has(key_columns, key_types, out, found, now);
partition->hasKeys(key_columns, key_types, out, found, now);
}
size_t count_not_found = 0;
@ -1376,96 +1381,64 @@ SSDComplexKeyCacheDictionary::SSDComplexKeyCacheDictionary(
createAttributes();
}
#define DECLARE(TYPE) \
void SSDComplexKeyCacheDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
ResultArrayType<TYPE> & out) const \
{ \
const auto index = getAttributeIndex(attribute_name); \
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
const auto null_value = std::get<TYPE>(null_values[index]); /* NOLINT */ \
getItemsNumberImpl<TYPE, TYPE>(index, key_columns, key_types, out, [&](const size_t) { return null_value; }); /* NOLINT */ \
}
ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const
{
ColumnPtr result;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
dict_struct.validateKeyTypes(key_types);
#define DECLARE(TYPE) \
void SSDComplexKeyCacheDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto index = getAttributeIndex(attribute_name); \
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
getItemsNumberImpl<TYPE, TYPE>(index, key_columns, key_types, out, [&](const size_t row) { return def[row]; }); /* NOLINT */ \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
const auto index = getAttributeIndex(attribute_name);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
#define DECLARE(TYPE) \
void SSDComplexKeyCacheDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto index = getAttributeIndex(attribute_name); \
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
getItemsNumberImpl<TYPE, TYPE>(index, key_columns, key_types, out, [&](const size_t) { return def; }); /* NOLINT */ \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
auto keys_size = key_columns.front()->size();
template <typename AttributeType, typename OutputType, typename DefaultGetter>
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
const auto & null_value = std::get<AttributeType>(null_values[index]);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<AttributeType, String>)
{
auto * out = column.get();
getItemsStringImpl(index, key_columns, key_types, out, default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsNumberImpl<AttributeType, AttributeType>(
index,
key_columns,
key_types,
out,
default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
return result;
}
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
const size_t attribute_index,
const Columns & key_columns, const DataTypes & key_types,
ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
const Columns & key_columns,
const DataTypes & key_types,
ResultArrayType<OutputType> & out,
DefaultValueExtractor & default_value_extractor) const
{
assert(dict_struct.key);
assert(key_columns.size() == key_types.size());
@ -1476,7 +1449,7 @@ void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
TemporalComplexKeysPool not_found_pool;
std::unordered_map<KeyRef, std::vector<size_t>> not_found_keys;
storage.getValue<OutputType>(attribute_index, key_columns, key_types, out, not_found_keys, not_found_pool, get_default, now);
storage.getValue<OutputType>(attribute_index, key_columns, key_types, out, not_found_keys, not_found_pool, default_value_extractor, now);
if (not_found_keys.empty())
return;
@ -1503,54 +1476,17 @@ void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
[&](const auto key)
{
for (const size_t row : not_found_keys[key])
out[row] = get_default(row);
out[row] = default_value_extractor[row];
},
getLifetime());
}
void SSDComplexKeyCacheDictionary::getString(
const std::string & attribute_name,
const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
{
const auto index = getAttributeIndex(attribute_name);
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
const auto null_value = StringRef{std::get<String>(null_values[index])};
getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t) { return null_value; });
}
void SSDComplexKeyCacheDictionary::getString(
const std::string & attribute_name,
const Columns & key_columns, const DataTypes & key_types,
const ColumnString * const def, ColumnString * const out) const
{
const auto index = getAttributeIndex(attribute_name);
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t row) { return def->getDataAt(row); });
}
void SSDComplexKeyCacheDictionary::getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const
{
const auto index = getAttributeIndex(attribute_name);
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t) { return StringRef{def}; });
}
template <typename DefaultGetter>
void SSDComplexKeyCacheDictionary::getItemsStringImpl(
const size_t attribute_index,
const Columns & key_columns,
const DataTypes & key_types,
ColumnString * out,
DefaultGetter && get_default) const
DictionaryDefaultValueExtractor<String> & default_value_extractor) const
{
dict_struct.validateKeyTypes(key_types);
@ -1576,7 +1512,7 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
{
if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
{
auto to_insert = get_default(row);
auto to_insert = default_value_extractor[row];
out->insertData(to_insert.data, to_insert.size);
++default_index;
}
@ -1619,7 +1555,7 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
SCOPE_EXIT(tmp_keys_pool.rollback(key));
if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
{
auto to_insert = get_default(row);
auto to_insert = default_value_extractor[row];
out->insertData(to_insert.data, to_insert.size);
++default_index;
}
@ -1633,26 +1569,31 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
}
else
{
auto to_insert = get_default(row);
auto to_insert = default_value_extractor[row];
out->insertData(to_insert.data, to_insert.size);
}
}
}
void SSDComplexKeyCacheDictionary::has(
const Columns & key_columns,
const DataTypes & key_types,
PaddedPODArray<UInt8> & out) const
ColumnUInt8::Ptr SSDComplexKeyCacheDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
{
dict_struct.validateKeyTypes(key_types);
const auto rows_num = key_columns.front()->size();
auto result = ColumnUInt8::create(rows_num);
auto& out = result->getData();
for (const auto row : ext::range(0, rows_num))
out[row] = false;
const auto now = std::chrono::system_clock::now();
std::unordered_map<KeyRef, std::vector<size_t>> not_found_keys;
TemporalComplexKeysPool not_found_pool;
storage.has(key_columns, key_types, out, not_found_keys, not_found_pool, now);
storage.hasKeys(key_columns, key_types, out, not_found_keys, not_found_pool, now);
if (not_found_keys.empty())
return;
return result;
std::vector<KeyRef> required_keys(not_found_keys.size());
std::transform(std::begin(not_found_keys), std::end(not_found_keys), std::begin(required_keys), [](const auto & pair) { return pair.first; });
@ -1681,6 +1622,8 @@ void SSDComplexKeyCacheDictionary::has(
out[row] = false;
},
getLifetime());
return result;
}
BlockInputStreamPtr SSDComplexKeyCacheDictionary::getBlockInputStream(

View File

@ -2,11 +2,13 @@
#if defined(OS_LINUX) || defined(__FreeBSD__)
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include <atomic>
#include <chrono>
#include <list>
#include <shared_mutex>
#include <variant>
#include <vector>
#include <Poco/Logger.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Common/Arena.h>
@ -19,13 +21,11 @@
#include <Dictionaries/BucketCache.h>
#include <ext/scope_guard.h>
#include <IO/HashingWriteBuffer.h>
#include <list>
#include <pcg_random.hpp>
#include <Poco/Logger.h>
#include <shared_mutex>
#include <variant>
#include <vector>
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryStructure.h"
#include "DictionaryHelpers.h"
namespace DB
{
@ -313,7 +313,7 @@ public:
template <typename Out, typename GetDefault>
void getValue(const size_t attribute_index,
const Columns & key_columns, const DataTypes & key_types,
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & default_value_extractor,
std::chrono::system_clock::time_point now) const;
void getString(const size_t attribute_index,
@ -321,7 +321,7 @@ public:
StringRefs & refs, ArenaWithFreeLists & arena, std::vector<bool> & found,
std::vector<size_t> & default_ids, std::chrono::system_clock::time_point now) const;
void has(const Columns & key_columns, const DataTypes & key_types,
void hasKeys(const Columns & key_columns, const DataTypes & key_types,
ResultArrayType<UInt8> & out, std::vector<bool> & found,
std::chrono::system_clock::time_point now) const;
@ -459,7 +459,7 @@ public:
TemporalComplexKeysPool & not_found_pool,
std::vector<size_t> & default_ids, std::chrono::system_clock::time_point now) const;
void has(const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
void hasKeys(const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
std::unordered_map<KeyRef, std::vector<size_t>> & not_found,
TemporalComplexKeysPool & not_found_pool, std::chrono::system_clock::time_point now) const;
@ -569,88 +569,20 @@ public:
std::exception_ptr getLastException() const override { return storage.getLastException(); }
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const override;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
template <typename T>
using ResultArrayType = SSDComplexKeyCacheStorage::ResultArrayType<T>;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const Columns & key_columns,
const DataTypes & key_types, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const Columns & key_columns,
const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const Columns & key_columns,
const DataTypes & key_types, const String & def, ColumnString * const out) const;
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
@ -661,17 +593,20 @@ private:
AttributeValueVariant createAttributeNullValueWithType(const AttributeUnderlyingType type, const Field & null_value);
void createAttributes();
template <typename AttributeType, typename OutputType, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
void getItemsNumberImpl(
const size_t attribute_index,
const Columns & key_columns, const DataTypes & key_types,
ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
const Columns & key_columns,
const DataTypes & key_types,
ResultArrayType<OutputType> & out,
DefaultValueExtractor & default_value_extractor) const;
template <typename DefaultGetter>
void getItemsStringImpl(
const size_t attribute_index,
const Columns & key_columns, const DataTypes & key_types,
ColumnString * out, DefaultGetter && get_default) const;
const Columns & key_columns,
const DataTypes & key_types,
ColumnString * out,
DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
const std::string name;
const DictionaryStructure dict_struct;

View File

@ -21,7 +21,7 @@
#include "registerDictionaries.h"
#if USE_ODBC
# include <Poco/Data/ODBC/Connector.h>
# include <Poco/Data/ODBC/Connector.h> // Y_IGNORE
#endif
namespace DB

View File

@ -6,31 +6,25 @@ LIBRARY()
PEERDIR(
clickhouse/src/Common
contrib/libs/poco/Data
contrib/libs/poco/Data/ODBC
contrib/libs/poco/MongoDB
contrib/libs/poco/Redis
contrib/libs/sparsehash
)
IF (USE_ODBC)
PEERDIR(contrib/libs/poco/Data/ODBC)
ENDIF ()
NO_COMPILER_WARNINGS()
SRCS(
CacheDictionary.cpp
CacheDictionary_generate1.cpp
CacheDictionary_generate2.cpp
CacheDictionary_generate3.cpp
CassandraBlockInputStream.cpp
CassandraDictionarySource.cpp
CassandraHelpers.cpp
ClickHouseDictionarySource.cpp
ComplexKeyCacheDictionary.cpp
ComplexKeyCacheDictionary_createAttributeWithType.cpp
ComplexKeyCacheDictionary_generate1.cpp
ComplexKeyCacheDictionary_generate2.cpp
ComplexKeyCacheDictionary_generate3.cpp
ComplexKeyCacheDictionary_setAttributeValue.cpp
ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp
ComplexKeyDirectDictionary.cpp
ComplexKeyHashedDictionary.cpp
DictionaryBlockInputStreamBase.cpp

View File

@ -5,12 +5,15 @@ LIBRARY()
PEERDIR(
clickhouse/src/Common
contrib/libs/poco/Data
contrib/libs/poco/Data/ODBC
contrib/libs/poco/MongoDB
contrib/libs/poco/Redis
contrib/libs/sparsehash
)
IF (USE_ODBC)
PEERDIR(contrib/libs/poco/Data/ODBC)
ENDIF ()
NO_COMPILER_WARNINGS()

View File

@ -146,20 +146,32 @@ Block MySQLBlockInputStream::readImpl()
const auto value = row[position_mapping[index]];
const auto & sample = description.sample_block.getByPosition(index);
bool is_type_nullable = description.types[index].second;
if (!value.isNull())
{
if (description.types[index].second)
if (is_type_nullable)
{
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[index]);
const auto & data_type = assert_cast<const DataTypeNullable &>(*sample.type);
insertValue(*data_type.getNestedType(), column_nullable.getNestedColumn(), description.types[index].first, value);
column_nullable.getNullMapData().emplace_back(0);
column_nullable.getNullMapData().emplace_back(false);
}
else
{
insertValue(*sample.type, *columns[index], description.types[index].first, value);
}
}
else
{
insertDefaultValue(*columns[index], *sample.column);
if (is_type_nullable)
{
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[index]);
column_nullable.getNullMapData().back() = true;
}
}
}
++num_rows;

View File

@ -504,7 +504,7 @@ private:
using namespace traits_;
using namespace impl_;
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
class FunctionBinaryArithmetic : public IFunction
{
static constexpr const bool is_plus = IsOperation<Op>::plus;
@ -542,16 +542,54 @@ class FunctionBinaryArithmetic : public IFunction
>(type, std::forward<F>(f));
}
template <typename F>
static bool castTypeNoFloats(const IDataType * type, F && f)
{
return castTypeToEither<
DataTypeUInt8,
DataTypeUInt16,
DataTypeUInt32,
DataTypeUInt64,
DataTypeUInt256,
DataTypeInt8,
DataTypeInt16,
DataTypeInt32,
DataTypeInt64,
DataTypeInt128,
DataTypeInt256,
DataTypeDate,
DataTypeDateTime,
DataTypeDecimal<Decimal32>,
DataTypeDecimal<Decimal64>,
DataTypeDecimal<Decimal128>,
DataTypeDecimal<Decimal256>,
DataTypeFixedString
>(type, std::forward<F>(f));
}
template <typename F>
static bool castBothTypes(const IDataType * left, const IDataType * right, F && f)
{
return castType(left, [&](const auto & left_)
if constexpr (valid_on_float_arguments)
{
return castType(right, [&](const auto & right_)
return castType(left, [&](const auto & left_)
{
return f(left_, right_);
return castType(right, [&](const auto & right_)
{
return f(left_, right_);
});
});
});
}
else
{
return castTypeNoFloats(left, [&](const auto & left_)
{
return castTypeNoFloats(right, [&](const auto & right_)
{
return f(left_, right_);
});
});
}
}
static FunctionOverloadResolverPtr
@ -1319,11 +1357,11 @@ public:
};
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
class FunctionBinaryArithmeticWithConstants : public FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
class FunctionBinaryArithmeticWithConstants : public FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>
{
public:
using Base = FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>;
using Base = FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>;
using Monotonicity = typename Base::Monotonicity;
static FunctionPtr create(
@ -1488,7 +1526,7 @@ private:
DataTypePtr return_type;
};
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
class BinaryArithmeticOverloadResolver : public IFunctionOverloadResolverImpl
{
public:
@ -1512,14 +1550,14 @@ public:
|| (arguments[1].column && isColumnConst(*arguments[1].column))))
{
return std::make_unique<DefaultFunction>(
FunctionBinaryArithmeticWithConstants<Op, Name, valid_on_default_arguments>::create(
FunctionBinaryArithmeticWithConstants<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::create(
arguments[0], arguments[1], return_type, context),
ext::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
return_type);
}
return std::make_unique<DefaultFunction>(
FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>::create(context),
FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::create(context),
ext::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
return_type);
}
@ -1530,7 +1568,7 @@ public:
throw Exception(
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + ", should be 2",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
return FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>::getReturnTypeImplStatic(arguments, context);
return FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::getReturnTypeImplStatic(arguments, context);
}
private:

View File

@ -212,4 +212,12 @@ checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments)
return {nested_columns, offsets->data()};
}
bool areTypesEqual(const DataTypePtr & lhs, const DataTypePtr & rhs)
{
const auto & lhs_name = lhs->getName();
const auto & rhs_name = rhs->getName();
return lhs_name == rhs_name;
}
}

View File

@ -152,4 +152,8 @@ void validateFunctionArgumentTypes(const IFunction & func, const ColumnsWithType
std::pair<std::vector<const IColumn *>, const ColumnArray::Offset *>
checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments);
/// Check if two types are equal
bool areTypesEqual(const DataTypePtr & lhs, const DataTypePtr & rhs);
}

View File

@ -38,8 +38,8 @@ void registerFunctionsExternalDictionaries(FunctionFactory & factory)
factory.registerFunction<FunctionDictGetDateTimeOrDefault>();
factory.registerFunction<FunctionDictGetUUIDOrDefault>();
factory.registerFunction<FunctionDictGetStringOrDefault>();
factory.registerFunction<FunctionDictGetNoType>();
factory.registerFunction<FunctionDictGetNoTypeOrDefault>();
factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::get>>();
factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::getOrDefault>>();
}
}

File diff suppressed because it is too large Load Diff

View File

@ -101,7 +101,8 @@ struct IntegerRoundingComputation
return scale;
}
static ALWAYS_INLINE T computeImpl(T x, T scale)
/// Integer overflow is Ok.
static ALWAYS_INLINE_NO_SANITIZE_UNDEFINED T computeImpl(T x, T scale)
{
switch (rounding_mode)
{

View File

@ -21,7 +21,7 @@ namespace ErrorCodes
}
template <class T>
inline constexpr bool is_gcd_lcm_implemeted = !(is_big_int_v<T> || std::is_floating_point_v<T>);
inline constexpr bool is_gcd_lcm_implemeted = !is_big_int_v<T>;
template <typename A, typename B, typename Impl, typename Name>
struct GCDLCMImpl
@ -33,7 +33,7 @@ struct GCDLCMImpl
static inline std::enable_if_t<!is_gcd_lcm_implemeted<Result>, Result>
apply(A, B)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not implemented for big integers and floats", Name::name);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not implemented for big integers", Name::name);
}
template <typename Result = ResultType>

View File

@ -353,6 +353,9 @@ bool FunctionArrayEnumerateExtended<Derived>::execute128bit(
keys_bytes += key_sizes[j];
}
if (keys_bytes > 16)
return false;
executeMethod<MethodFixed>(offsets, columns, key_sizes, nullptr, res_values);
return true;
}

View File

@ -37,7 +37,7 @@ struct BitAndImpl
};
struct NameBitAnd { static constexpr auto name = "bitAnd"; };
using FunctionBitAnd = BinaryArithmeticOverloadResolver<BitAndImpl, NameBitAnd, true>;
using FunctionBitAnd = BinaryArithmeticOverloadResolver<BitAndImpl, NameBitAnd, true, false>;
}

View File

@ -36,7 +36,7 @@ struct BitOrImpl
};
struct NameBitOr { static constexpr auto name = "bitOr"; };
using FunctionBitOr = BinaryArithmeticOverloadResolver<BitOrImpl, NameBitOr, true>;
using FunctionBitOr = BinaryArithmeticOverloadResolver<BitOrImpl, NameBitOr, true, false>;
}

View File

@ -43,7 +43,7 @@ struct BitRotateLeftImpl
};
struct NameBitRotateLeft { static constexpr auto name = "bitRotateLeft"; };
using FunctionBitRotateLeft = BinaryArithmeticOverloadResolver<BitRotateLeftImpl, NameBitRotateLeft>;
using FunctionBitRotateLeft = BinaryArithmeticOverloadResolver<BitRotateLeftImpl, NameBitRotateLeft, true, false>;
}

View File

@ -42,7 +42,7 @@ struct BitRotateRightImpl
};
struct NameBitRotateRight { static constexpr auto name = "bitRotateRight"; };
using FunctionBitRotateRight = BinaryArithmeticOverloadResolver<BitRotateRightImpl, NameBitRotateRight>;
using FunctionBitRotateRight = BinaryArithmeticOverloadResolver<BitRotateRightImpl, NameBitRotateRight, true, false>;
}

View File

@ -42,7 +42,7 @@ struct BitShiftLeftImpl
};
struct NameBitShiftLeft { static constexpr auto name = "bitShiftLeft"; };
using FunctionBitShiftLeft = BinaryArithmeticOverloadResolver<BitShiftLeftImpl, NameBitShiftLeft>;
using FunctionBitShiftLeft = BinaryArithmeticOverloadResolver<BitShiftLeftImpl, NameBitShiftLeft, true, false>;
}

View File

@ -42,7 +42,7 @@ struct BitShiftRightImpl
};
struct NameBitShiftRight { static constexpr auto name = "bitShiftRight"; };
using FunctionBitShiftRight = BinaryArithmeticOverloadResolver<BitShiftRightImpl, NameBitShiftRight>;
using FunctionBitShiftRight = BinaryArithmeticOverloadResolver<BitShiftRightImpl, NameBitShiftRight, true, false>;
}

View File

@ -34,7 +34,7 @@ struct BitTestImpl
};
struct NameBitTest { static constexpr auto name = "bitTest"; };
using FunctionBitTest = BinaryArithmeticOverloadResolver<BitTestImpl, NameBitTest>;
using FunctionBitTest = BinaryArithmeticOverloadResolver<BitTestImpl, NameBitTest, true, false>;
}

View File

@ -36,7 +36,7 @@ struct BitXorImpl
};
struct NameBitXor { static constexpr auto name = "bitXor"; };
using FunctionBitXor = BinaryArithmeticOverloadResolver<BitXorImpl, NameBitXor, true>;
using FunctionBitXor = BinaryArithmeticOverloadResolver<BitXorImpl, NameBitXor, true, false>;
}

View File

@ -23,7 +23,7 @@ struct GCDImpl : public GCDLCMImpl<A, B, GCDImpl<A, B>, NameGCD>
}
};
using FunctionGCD = BinaryArithmeticOverloadResolver<GCDImpl, NameGCD, false>;
using FunctionGCD = BinaryArithmeticOverloadResolver<GCDImpl, NameGCD, false, false>;
}

View File

@ -95,7 +95,7 @@ void geodistInit()
sphere_metric_meters_lut[i] = static_cast<float>(sqr((EARTH_DIAMETER * PI / 360) * cos(latitude)));
sphere_metric_lut[i] = cosf(latitude);
sphere_metric_lut[i] = sqrf(cosf(latitude));
}
}
@ -182,7 +182,7 @@ float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg)
/// (Remember how a plane flies from Moscow to New York)
/// But if longitude is close but latitude is different enough, there is no difference between meridian and great circle line.
float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, KTABLE] indexes
float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, METRIC_LUT_SIZE] indexes
size_t latitude_midpoint_index = floatToIndex(latitude_midpoint) & (METRIC_LUT_SIZE - 1);
/// This is linear interpolation between two table items at index "latitude_midpoint_index" and "latitude_midpoint_index + 1".

View File

@ -54,7 +54,7 @@ struct LCMImpl : public GCDLCMImpl<A, B, LCMImpl<A, B>, NameLCM>
}
};
using FunctionLCM = BinaryArithmeticOverloadResolver<LCMImpl, NameLCM, false>;
using FunctionLCM = BinaryArithmeticOverloadResolver<LCMImpl, NameLCM, false, false>;
}

View File

@ -69,6 +69,10 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion()
boost::algorithm::to_lower(matched_region);
region = matched_region;
}
else
{
region = Aws::Region::AWS_GLOBAL;
}
}
}

View File

@ -609,10 +609,10 @@ bool ActionsDAG::hasStatefulFunctions() const
return false;
}
bool ActionsDAG::empty() const
bool ActionsDAG::trivial() const
{
for (const auto & node : nodes)
if (node.type != ActionType::INPUT)
if (node.type == ActionType::FUNCTION || node.type == ActionType::ARRAY_JOIN)
return false;
return true;

View File

@ -223,7 +223,7 @@ public:
bool hasArrayJoin() const;
bool hasStatefulFunctions() const;
bool empty() const; /// If actions only contain inputs.
bool trivial() const; /// If actions has no functions or array join.
const ActionsSettings & getSettings() const { return settings; }

View File

@ -50,7 +50,6 @@
#include <Interpreters/SystemLog.h>
#include <Interpreters/Context.h>
#include <Interpreters/DDLWorker.h>
#include <Common/DNSResolver.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/UncompressedCache.h>
#include <Parsers/ASTCreateQuery.h>

View File

@ -51,7 +51,6 @@
#include <Interpreters/Context.h>
#include <Common/ProfileEvents.h>
#include <Interpreters/DNSCacheUpdater.h>
#include <Common/SensitiveDataMasker.h>
#include <Processors/Transforms/LimitsCheckingTransform.h>

View File

@ -0,0 +1,56 @@
#pragma once
#include <Processors/QueryPlan/QueryPlan.h>
#include <array>
namespace DB
{
namespace QueryPlanOptimizations
{
/// This is the main function which optimizes the whole QueryPlan tree.
void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes);
/// Optimization is a function applied to QueryPlan::Node.
/// It can read and update subtree of specified node.
/// It return the number of updated layers of subtree if some change happened.
/// It must guarantee that the structure of tree is correct.
///
/// New nodes should be added to QueryPlan::Nodes list.
/// It is not needed to remove old nodes from the list.
struct Optimization
{
using Function = size_t (*)(QueryPlan::Node *, QueryPlan::Nodes &);
const Function apply = nullptr;
const char * name;
};
/// Move ARRAY JOIN up if possible.
size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
/// Move LimitStep down if possible.
size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes);
/// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep
/// Replace chain `FilterStep -> ExpressionStep` to single FilterStep
size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
inline const auto & getOptimizations()
{
static const std::array<Optimization, 4> optimizations =
{{
{tryLiftUpArrayJoin, "liftUpArrayJoin"},
{tryPushDownLimit, "pushDownLimit"},
{trySplitFilter, "splitFilter"},
{tryMergeExpressions, "mergeExpressions"},
}};
return optimizations;
}
}
}

View File

@ -0,0 +1,85 @@
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <Processors/QueryPlan/FilterStep.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Processors/QueryPlan/ArrayJoinStep.h>
#include <Interpreters/ActionsDAG.h>
#include <Interpreters/ArrayJoinAction.h>
namespace DB::QueryPlanOptimizations
{
size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
{
if (parent_node->children.size() != 1)
return 0;
QueryPlan::Node * child_node = parent_node->children.front();
auto & parent = parent_node->step;
auto & child = child_node->step;
auto * expression_step = typeid_cast<ExpressionStep *>(parent.get());
auto * filter_step = typeid_cast<FilterStep *>(parent.get());
auto * array_join_step = typeid_cast<ArrayJoinStep *>(child.get());
if (!(expression_step || filter_step) || !array_join_step)
return 0;
const auto & array_join = array_join_step->arrayJoin();
const auto & expression = expression_step ? expression_step->getExpression()
: filter_step->getExpression();
auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns);
/// No actions can be moved before ARRAY JOIN.
if (split_actions.first->trivial())
return 0;
auto description = parent->getStepDescription();
/// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin.
if (split_actions.second->trivial())
{
auto expected_header = parent->getOutputStream().header;
/// Expression/Filter -> ArrayJoin
std::swap(parent, child);
/// ArrayJoin -> Expression/Filter
if (expression_step)
child = std::make_unique<ExpressionStep>(child_node->children.at(0)->step->getOutputStream(),
std::move(split_actions.first));
else
child = std::make_unique<FilterStep>(child_node->children.at(0)->step->getOutputStream(),
std::move(split_actions.first),
filter_step->getFilterColumnName(),
filter_step->removesFilterColumn());
child->setStepDescription(std::move(description));
array_join_step->updateInputStream(child->getOutputStream(), expected_header);
return 2;
}
/// Add new expression step before ARRAY JOIN.
/// Expression/Filter -> ArrayJoin -> Something
auto & node = nodes.emplace_back();
node.children.swap(child_node->children);
child_node->children.emplace_back(&node);
/// Expression/Filter -> ArrayJoin -> node -> Something
node.step = std::make_unique<ExpressionStep>(node.children.at(0)->step->getOutputStream(),
std::move(split_actions.first));
node.step->setStepDescription(description);
array_join_step->updateInputStream(node.step->getOutputStream(), {});
if (expression_step)
parent = std::make_unique<ExpressionStep>(array_join_step->getOutputStream(), split_actions.second);
else
parent = std::make_unique<FilterStep>(array_join_step->getOutputStream(), split_actions.second,
filter_step->getFilterColumnName(), filter_step->removesFilterColumn());
parent->setStepDescription(description + " [split]");
return 3;
}
}

View File

@ -0,0 +1,114 @@
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <Processors/QueryPlan/ITransformingStep.h>
#include <Processors/QueryPlan/LimitStep.h>
#include <Processors/QueryPlan/TotalsHavingStep.h>
#include <Processors/QueryPlan/MergingSortedStep.h>
#include <Processors/QueryPlan/FinishSortingStep.h>
#include <Processors/QueryPlan/MergeSortingStep.h>
#include <Processors/QueryPlan/PartialSortingStep.h>
#include <Common/typeid_cast.h>
namespace DB::QueryPlanOptimizations
{
/// If plan looks like Limit -> Sorting, update limit for Sorting
static bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit)
{
if (limit == 0)
return false;
QueryPlanStepPtr & step = node->step;
QueryPlan::Node * child = nullptr;
bool updated = false;
if (auto * merging_sorted = typeid_cast<MergingSortedStep *>(step.get()))
{
/// TODO: remove LimitStep here.
merging_sorted->updateLimit(limit);
updated = true;
child = node->children.front();
}
else if (auto * finish_sorting = typeid_cast<FinishSortingStep *>(step.get()))
{
/// TODO: remove LimitStep here.
finish_sorting->updateLimit(limit);
updated = true;
}
else if (auto * merge_sorting = typeid_cast<MergeSortingStep *>(step.get()))
{
merge_sorting->updateLimit(limit);
updated = true;
child = node->children.front();
}
else if (auto * partial_sorting = typeid_cast<PartialSortingStep *>(step.get()))
{
partial_sorting->updateLimit(limit);
updated = true;
}
/// We often have chain PartialSorting -> MergeSorting -> MergingSorted
/// Try update limit for them also if possible.
if (child)
tryUpdateLimitForSortingSteps(child, limit);
return updated;
}
size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
{
if (parent_node->children.size() != 1)
return 0;
QueryPlan::Node * child_node = parent_node->children.front();
auto & parent = parent_node->step;
auto & child = child_node->step;
auto * limit = typeid_cast<LimitStep *>(parent.get());
if (!limit)
return 0;
/// Skip LIMIT WITH TIES by now.
if (limit->withTies())
return 0;
const auto * transforming = dynamic_cast<const ITransformingStep *>(child.get());
/// Skip everything which is not transform.
if (!transforming)
return 0;
/// Special cases for sorting steps.
if (tryUpdateLimitForSortingSteps(child_node, limit->getLimitForSorting()))
return 0;
/// Special case for TotalsHaving. Totals may be incorrect if we push down limit.
if (typeid_cast<const TotalsHavingStep *>(child.get()))
return 0;
/// Now we should decide if pushing down limit possible for this step.
const auto & transform_traits = transforming->getTransformTraits();
const auto & data_stream_traits = transforming->getDataStreamTraits();
/// Cannot push down if child changes the number of rows.
if (!transform_traits.preserves_number_of_rows)
return 0;
/// Cannot push down if data was sorted exactly by child stream.
if (!child->getOutputStream().sort_description.empty() && !data_stream_traits.preserves_sorting)
return 0;
/// Now we push down limit only if it doesn't change any stream properties.
/// TODO: some of them may be changed and, probably, not important for following streams. We may add such info.
if (!limit->getOutputStream().hasEqualPropertiesWith(transforming->getOutputStream()))
return 0;
/// Input stream for Limit have changed.
limit->updateInputStream(transforming->getInputStreams().front());
parent.swap(child);
return 2;
}
}

View File

@ -0,0 +1,65 @@
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <Processors/QueryPlan/FilterStep.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Interpreters/ActionsDAG.h>
namespace DB::QueryPlanOptimizations
{
size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
{
if (parent_node->children.size() != 1)
return false;
QueryPlan::Node * child_node = parent_node->children.front();
auto & parent = parent_node->step;
auto & child = child_node->step;
auto * parent_expr = typeid_cast<ExpressionStep *>(parent.get());
auto * parent_filter = typeid_cast<FilterStep *>(parent.get());
auto * child_expr = typeid_cast<ExpressionStep *>(child.get());
if (parent_expr && child_expr)
{
const auto & child_actions = child_expr->getExpression();
const auto & parent_actions = parent_expr->getExpression();
/// We cannot combine actions with arrayJoin and stateful function because we not always can reorder them.
/// Example: select rowNumberInBlock() from (select arrayJoin([1, 2]))
/// Such a query will return two zeroes if we combine actions together.
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
return 0;
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
auto expr = std::make_unique<ExpressionStep>(child_expr->getInputStreams().front(), merged);
expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
parent_node->step = std::move(expr);
parent_node->children.swap(child_node->children);
return 1;
}
else if (parent_filter && child_expr)
{
const auto & child_actions = child_expr->getExpression();
const auto & parent_actions = parent_filter->getExpression();
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
return 0;
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
auto filter = std::make_unique<FilterStep>(child_expr->getInputStreams().front(), merged,
parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn());
filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
parent_node->step = std::move(filter);
parent_node->children.swap(child_node->children);
return 1;
}
return 0;
}
}

View File

@ -0,0 +1,75 @@
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <stack>
namespace DB::QueryPlanOptimizations
{
void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
{
const auto & optimizations = getOptimizations();
struct Frame
{
QueryPlan::Node * node;
/// If not zero, traverse only depth_limit layers of tree (if no other optimizations happen).
/// Otherwise, traverse all children.
size_t depth_limit = 0;
/// Next child to process.
size_t next_child = 0;
};
std::stack<Frame> stack;
stack.push(Frame{.node = &root});
while (!stack.empty())
{
auto & frame = stack.top();
/// If traverse_depth_limit == 0, then traverse without limit (first entrance)
/// If traverse_depth_limit > 1, then traverse with (limit - 1)
if (frame.depth_limit != 1)
{
/// Traverse all children first.
if (frame.next_child < frame.node->children.size())
{
stack.push(Frame
{
.node = frame.node->children[frame.next_child],
.depth_limit = frame.depth_limit ? (frame.depth_limit - 1) : 0,
});
++frame.next_child;
continue;
}
}
size_t max_update_depth = 0;
/// Apply all optimizations.
for (const auto & optimization : optimizations)
{
/// Just in case, skip optimization if it is not initialized.
if (!optimization.apply)
continue;
/// Try to apply optimization.
auto update_depth = optimization.apply(frame.node, nodes);
max_update_depth = std::max<size_t>(max_update_depth, update_depth);
}
/// Traverse `max_update_depth` layers of tree again.
if (max_update_depth)
{
frame.depth_limit = max_update_depth;
frame.next_child = 0;
continue;
}
/// Nothing was applied.
stack.pop();
}
}
}

View File

@ -0,0 +1,50 @@
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <Processors/QueryPlan/FilterStep.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Interpreters/ActionsDAG.h>
namespace DB::QueryPlanOptimizations
{
/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
{
auto * filter_step = typeid_cast<FilterStep *>(node->step.get());
if (!filter_step)
return 0;
const auto & expr = filter_step->getExpression();
/// Do not split if there are function like runningDifference.
if (expr->hasStatefulFunctions())
return 0;
auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName());
if (split.second->trivial())
return 0;
if (filter_step->removesFilterColumn())
split.second->removeUnusedInput(filter_step->getFilterColumnName());
auto description = filter_step->getStepDescription();
auto & filter_node = nodes.emplace_back();
node->children.swap(filter_node.children);
node->children.push_back(&filter_node);
filter_node.step = std::make_unique<FilterStep>(
filter_node.children.at(0)->step->getOutputStream(),
std::move(split.first),
filter_step->getFilterColumnName(),
filter_step->removesFilterColumn());
node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputStream(), std::move(split.second));
filter_node.step->setStepDescription("(" + description + ")[split]");
node->step->setStepDescription(description);
return 2;
}
}

View File

@ -6,15 +6,7 @@
#include <Interpreters/ActionsDAG.h>
#include <Interpreters/ArrayJoinAction.h>
#include <stack>
#include <Processors/QueryPlan/LimitStep.h>
#include "MergingSortedStep.h"
#include "FinishSortingStep.h"
#include "MergeSortingStep.h"
#include "PartialSortingStep.h"
#include "TotalsHavingStep.h"
#include "ExpressionStep.h"
#include "ArrayJoinStep.h"
#include "FilterStep.h"
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
namespace DB
{
@ -341,318 +333,9 @@ void QueryPlan::explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptio
}
}
/// If plan looks like Limit -> Sorting, update limit for Sorting
bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit)
{
if (limit == 0)
return false;
QueryPlanStepPtr & step = node->step;
QueryPlan::Node * child = nullptr;
bool updated = false;
if (auto * merging_sorted = typeid_cast<MergingSortedStep *>(step.get()))
{
/// TODO: remove LimitStep here.
merging_sorted->updateLimit(limit);
updated = true;
child = node->children.front();
}
else if (auto * finish_sorting = typeid_cast<FinishSortingStep *>(step.get()))
{
/// TODO: remove LimitStep here.
finish_sorting->updateLimit(limit);
updated = true;
}
else if (auto * merge_sorting = typeid_cast<MergeSortingStep *>(step.get()))
{
merge_sorting->updateLimit(limit);
updated = true;
child = node->children.front();
}
else if (auto * partial_sorting = typeid_cast<PartialSortingStep *>(step.get()))
{
partial_sorting->updateLimit(limit);
updated = true;
}
/// We often have chain PartialSorting -> MergeSorting -> MergingSorted
/// Try update limit for them also if possible.
if (child)
tryUpdateLimitForSortingSteps(child, limit);
return updated;
}
/// Move LimitStep down if possible.
static void tryPushDownLimit(QueryPlanStepPtr & parent, QueryPlan::Node * child_node)
{
auto & child = child_node->step;
auto * limit = typeid_cast<LimitStep *>(parent.get());
if (!limit)
return;
/// Skip LIMIT WITH TIES by now.
if (limit->withTies())
return;
const auto * transforming = dynamic_cast<const ITransformingStep *>(child.get());
/// Skip everything which is not transform.
if (!transforming)
return;
/// Special cases for sorting steps.
if (tryUpdateLimitForSortingSteps(child_node, limit->getLimitForSorting()))
return;
/// Special case for TotalsHaving. Totals may be incorrect if we push down limit.
if (typeid_cast<const TotalsHavingStep *>(child.get()))
return;
/// Now we should decide if pushing down limit possible for this step.
const auto & transform_traits = transforming->getTransformTraits();
const auto & data_stream_traits = transforming->getDataStreamTraits();
/// Cannot push down if child changes the number of rows.
if (!transform_traits.preserves_number_of_rows)
return;
/// Cannot push down if data was sorted exactly by child stream.
if (!child->getOutputStream().sort_description.empty() && !data_stream_traits.preserves_sorting)
return;
/// Now we push down limit only if it doesn't change any stream properties.
/// TODO: some of them may be changed and, probably, not important for following streams. We may add such info.
if (!limit->getOutputStream().hasEqualPropertiesWith(transforming->getOutputStream()))
return;
/// Input stream for Limit have changed.
limit->updateInputStream(transforming->getInputStreams().front());
parent.swap(child);
}
/// Move ARRAY JOIN up if possible.
static void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * child_node, QueryPlan::Nodes & nodes)
{
auto & parent = parent_node->step;
auto & child = child_node->step;
auto * expression_step = typeid_cast<ExpressionStep *>(parent.get());
auto * filter_step = typeid_cast<FilterStep *>(parent.get());
auto * array_join_step = typeid_cast<ArrayJoinStep *>(child.get());
if (!(expression_step || filter_step) || !array_join_step)
return;
const auto & array_join = array_join_step->arrayJoin();
const auto & expression = expression_step ? expression_step->getExpression()
: filter_step->getExpression();
auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns);
/// No actions can be moved before ARRAY JOIN.
if (split_actions.first->empty())
return;
auto description = parent->getStepDescription();
/// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin.
if (split_actions.second->empty())
{
auto expected_header = parent->getOutputStream().header;
/// Expression/Filter -> ArrayJoin
std::swap(parent, child);
/// ArrayJoin -> Expression/Filter
if (expression_step)
child = std::make_unique<ExpressionStep>(child_node->children.at(0)->step->getOutputStream(),
std::move(split_actions.first));
else
child = std::make_unique<FilterStep>(child_node->children.at(0)->step->getOutputStream(),
std::move(split_actions.first),
filter_step->getFilterColumnName(),
filter_step->removesFilterColumn());
child->setStepDescription(std::move(description));
array_join_step->updateInputStream(child->getOutputStream(), expected_header);
return;
}
/// Add new expression step before ARRAY JOIN.
/// Expression/Filter -> ArrayJoin -> Something
auto & node = nodes.emplace_back();
node.children.swap(child_node->children);
child_node->children.emplace_back(&node);
/// Expression/Filter -> ArrayJoin -> node -> Something
node.step = std::make_unique<ExpressionStep>(node.children.at(0)->step->getOutputStream(),
std::move(split_actions.first));
node.step->setStepDescription(description);
array_join_step->updateInputStream(node.step->getOutputStream(), {});
if (expression_step)
parent = std::make_unique<ExpressionStep>(array_join_step->getOutputStream(), split_actions.second);
else
parent = std::make_unique<FilterStep>(array_join_step->getOutputStream(), split_actions.second,
filter_step->getFilterColumnName(), filter_step->removesFilterColumn());
parent->setStepDescription(description + " [split]");
}
/// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep
/// Replace chain `FilterStep -> ExpressionStep` to single FilterStep
static bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Node * child_node)
{
auto & parent = parent_node->step;
auto & child = child_node->step;
auto * parent_expr = typeid_cast<ExpressionStep *>(parent.get());
auto * parent_filter = typeid_cast<FilterStep *>(parent.get());
auto * child_expr = typeid_cast<ExpressionStep *>(child.get());
if (parent_expr && child_expr)
{
const auto & child_actions = child_expr->getExpression();
const auto & parent_actions = parent_expr->getExpression();
/// We cannot combine actions with arrayJoin and stateful function because we not always can reorder them.
/// Example: select rowNumberInBlock() from (select arrayJoin([1, 2]))
/// Such a query will return two zeroes if we combine actions together.
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
return false;
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
auto expr = std::make_unique<ExpressionStep>(child_expr->getInputStreams().front(), merged);
expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
parent_node->step = std::move(expr);
parent_node->children.swap(child_node->children);
return true;
}
else if (parent_filter && child_expr)
{
const auto & child_actions = child_expr->getExpression();
const auto & parent_actions = parent_filter->getExpression();
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
return false;
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
auto filter = std::make_unique<FilterStep>(child_expr->getInputStreams().front(), merged,
parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn());
filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
parent_node->step = std::move(filter);
parent_node->children.swap(child_node->children);
return true;
}
return false;
}
/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
static bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
{
auto * filter_step = typeid_cast<FilterStep *>(node->step.get());
if (!filter_step)
return false;
const auto & expr = filter_step->getExpression();
/// Do not split if there are function like runningDifference.
if (expr->hasStatefulFunctions())
return false;
auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName());
if (split.second->empty())
return false;
if (filter_step->removesFilterColumn())
split.second->removeUnusedInput(filter_step->getFilterColumnName());
auto description = filter_step->getStepDescription();
auto & filter_node = nodes.emplace_back();
node->children.swap(filter_node.children);
node->children.push_back(&filter_node);
filter_node.step = std::make_unique<FilterStep>(
filter_node.children.at(0)->step->getOutputStream(),
std::move(split.first),
filter_step->getFilterColumnName(),
filter_step->removesFilterColumn());
node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputStream(), std::move(split.second));
filter_node.step->setStepDescription("(" + description + ")[split]");
node->step->setStepDescription(description);
return true;
}
void QueryPlan::optimize()
{
/* Stack contains info for every nodes in the path from tree root to the current node.
* Every optimization changes only current node and it's children.
* Optimization may change QueryPlanStep, but not QueryPlan::Node (only add a new one).
* So, QueryPlan::Node::children will be always valid.
*/
struct Frame
{
Node * node;
size_t next_child = 0;
};
std::stack<Frame> stack;
stack.push(Frame{.node = root});
while (!stack.empty())
{
auto & frame = stack.top();
if (frame.next_child == 0)
{
if (frame.node->children.size() == 1)
{
tryPushDownLimit(frame.node->step, frame.node->children.front());
while (tryMergeExpressions(frame.node, frame.node->children.front()));
if (frame.node->children.size() == 1)
tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes);
trySplitFilter(frame.node, nodes);
}
}
if (frame.next_child < frame.node->children.size())
{
stack.push(Frame{frame.node->children[frame.next_child]});
++frame.next_child;
}
else
{
if (frame.node->children.size() == 1)
{
while (tryMergeExpressions(frame.node, frame.node->children.front()));
trySplitFilter(frame.node, nodes);
tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes);
}
stack.pop();
}
}
QueryPlanOptimizations::optimizeTree(*root, nodes);
}
}

View File

@ -116,6 +116,11 @@ SRCS(
QueryPlan/MergingFinal.cpp
QueryPlan/MergingSortedStep.cpp
QueryPlan/OffsetStep.cpp
QueryPlan/Optimizations/liftUpArrayJoin.cpp
QueryPlan/Optimizations/limitPushDown.cpp
QueryPlan/Optimizations/mergeExpressions.cpp
QueryPlan/Optimizations/optimizeTree.cpp
QueryPlan/Optimizations/splitFilter.cpp
QueryPlan/PartialSortingStep.cpp
QueryPlan/QueryPlan.cpp
QueryPlan/ReadFromPreparedSource.cpp

View File

@ -414,18 +414,19 @@ size_t MergeTreeRangeReader::ReadResult::numZerosInTail(const UInt8 * begin, con
end -= 64;
const auto * pos = end;
UInt64 val =
static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos)),
zero16)))
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 16)),
zero16))) << 16u)
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 32)),
zero16))) << 32u)
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 48)),
zero16))) << 48u);
val = ~val;
if (val == 0)
count += 64;
else

Some files were not shown because too many files have changed in this diff Show More