Merge branch 'master' into clickhouse-test-log-comment

This commit is contained in:
Alexey Milovidov 2021-01-31 01:38:27 +03:00
commit a923b94bdb
144 changed files with 4466 additions and 7579 deletions

2
.gitmodules vendored
View File

@ -184,7 +184,7 @@
url = https://github.com/ClickHouse-Extras/krb5
[submodule "contrib/cyrus-sasl"]
path = contrib/cyrus-sasl
url = https://github.com/cyrusimap/cyrus-sasl
url = https://github.com/ClickHouse-Extras/cyrus-sasl
branch = cyrus-sasl-2.1
[submodule "contrib/croaring"]
path = contrib/croaring

View File

@ -84,10 +84,12 @@
# define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined")))
# define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address")))
# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined")))
#else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it.
# define NO_SANITIZE_UNDEFINED
# define NO_SANITIZE_ADDRESS
# define NO_SANITIZE_THREAD
# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED ALWAYS_INLINE
#endif
/// A template function for suppressing warnings about unused variables or function results.

View File

@ -31,7 +31,7 @@ static void *volatile vdso_func = (void *)getcpu_init;
int sched_getcpu(void)
{
int r;
unsigned cpu;
unsigned cpu = 0;
#ifdef VDSO_GETCPU_SYM
getcpu_f f = (getcpu_f)vdso_func;

View File

@ -62,6 +62,7 @@ RUN python3 -m pip install \
avro \
cassandra-driver \
confluent-kafka \
dict2xml \
dicttoxml \
docker \
docker-compose==1.22.0 \

View File

@ -53,10 +53,12 @@ function run_tests()
if [ "$NUM_TRIES" -gt "1" ]; then
ADDITIONAL_OPTIONS+=('--skip')
ADDITIONAL_OPTIONS+=('00000_no_tests_to_skip')
ADDITIONAL_OPTIONS+=('--jobs')
ADDITIONAL_OPTIONS+=('4')
fi
clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
--test-runs "$NUM_TRIES" --jobs 4 \
--test-runs "$NUM_TRIES" \
"$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt

View File

@ -7,8 +7,6 @@ toc_title: EmbeddedRocksDB
This engine allows integrating ClickHouse with [rocksdb](http://rocksdb.org/).
`EmbeddedRocksDB` lets you:
## Creating a Table {#table_engine-EmbeddedRocksDB-creating-a-table}
``` sql
@ -23,6 +21,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Required parameters:
- `primary_key_name` any column name in the column list.
- `primary key` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a `rocksdb key`.
- columns other than the primary key will be serialized in binary as `rocksdb` value in corresponding order.
- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from `rocksdb`.
Example:
@ -38,8 +39,4 @@ ENGINE = EmbeddedRocksDB
PRIMARY KEY key
```
## Description {#description}
- `primary key` must be specified, it only supports one column in primary key. The primary key will serialized in binary as rocksdb key.
- columns other than the primary key will be serialized in binary as rocksdb value in corresponding order.
- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from rocksdb.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/embedded-rocksdb/) <!--hide-->

View File

@ -428,7 +428,7 @@ Possible values:
- `'basic'` — Use basic parser.
ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `'2019-08-20 10:18:56'` or `2019-08-20`.
ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `2019-08-20 10:18:56` or `2019-08-20`.
Default value: `'basic'`.
@ -443,19 +443,19 @@ Allows choosing different output formats of the text representation of date and
Possible values:
- `'simple'` - Simple output format.
- `simple` - Simple output format.
Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `'2019-08-20 10:18:56'`. Calculation is performed according to the data type's time zone (if present) or server time zone.
Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `2019-08-20 10:18:56`. The calculation is performed according to the data type's time zone (if present) or server time zone.
- `'iso'` - ISO output format.
- `iso` - ISO output format.
Clickhouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `'2019-08-20T10:18:56Z'`. Note that output is in UTC (`Z` means UTC).
Clickhouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `2019-08-20T10:18:56Z`. Note that output is in UTC (`Z` means UTC).
- `'unix_timestamp'` - Unix timestamp output format.
- `unix_timestamp` - Unix timestamp output format.
Clickhouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `'1566285536'`.
Clickhouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `1566285536`.
Default value: `'simple'`.
Default value: `simple`.
See also:

View File

@ -241,7 +241,7 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN)
**Parameters**
- `window` — Length of the sliding window in seconds.
- `window` — Length of the sliding window. The unit of `window` depends on the timestamp itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`.
- `mode` - It is an optional argument.
- `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values.
- `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, its value cant exceed the Int64 maximum, which is 2^63 - 1).

View File

@ -11,7 +11,7 @@ Key length depends on encryption mode. It is 16, 24, and 32 bytes long for `-128
Initialization vector length is always 16 bytes (bytes in excess of 16 are ignored).
Note that these functions work slowly.
Note that these functions work slowly until ClickHouse 21.1.
## encrypt {#encrypt}
@ -41,7 +41,7 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad])
**Returned value**
- Ciphered String. [String](../../sql-reference/data-types/string.md#string).
- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
**Examples**
@ -52,57 +52,38 @@ Query:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
`comment` String,
`secret` String
)
ENGINE = Memory
```
Insert this data:
Insert some data (please avoid storing the keys/ivs in the database as this undermines the whole concept of encryption), also storing 'hints' is unsafe too and used only for illustrative purposes:
Query:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
INSERT INTO encryption_test VALUES('aes-256-cfb128 no IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212')),\
('aes-256-cfb128 no IV, different key', encrypt('aes-256-cfb128', 'Secret', 'keykeykeykeykeykeykeykeykeykeyke')),\
('aes-256-cfb128 with IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')),\
('aes-256-cbc no IV', encrypt('aes-256-cbc', 'Secret', '12345678910121314151617181920212'));
```
Example without `iv`:
Query:
``` sql
SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test;
SELECT comment, hex(secret) FROM encryption_test;
```
Result:
``` text
┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐
│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │
│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │
│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │
└─────────────┴──────────────────────────────────────────────────────────────────┘
```
Example with `iv`:
Query:
``` sql
SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
```
Result:
``` text
┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐
│ aes-256-ctr │ │
│ aes-256-ctr │ 7FB039F7 │
│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │
└─────────────┴───────────────────────────────────────────────┘
┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
│ aes-256-cfb128 no IV │ B4972BDC4459 │
│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │
│ aes-256-cfb128 with IV │ 5E6CB398F653 │
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
└─────────────────────────────────────┴──────────────────────────────────┘
```
Example with `-gcm`:
@ -110,40 +91,26 @@ Example with `-gcm`:
Query:
``` sql
SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
INSERT INTO encryption_test VALUES('aes-256-gcm', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')), \
('aes-256-gcm with AAD', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv', 'aad'));
SELECT comment, hex(secret) FROM encryption_test WHERE comment LIKE '%gcm%';
```
Result:
``` text
┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐
│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │
│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │
│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │
└─────────────┴────────────────────────────────────────────────────────────────────────┘
```
Example with `-gcm` mode and with `aad`:
Query:
``` sql
SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test;
```
Result:
``` text
┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐
│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │
│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │
│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │
└─────────────┴────────────────────────────────────────────────────────────────────────┘
┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
└──────────────────────┴──────────────────────────────────────────────┘
```
## aes_encrypt_mysql {#aes_encrypt_mysql}
Compatible with mysql encryption and can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
Compatible with mysql encryption and resulting ciphertext can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
Will produce same ciphertext as `encrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_encrypt_mysql` will stick to what MySQL's `aes_encrypt` does: 'fold' `key` and ignore excess bits of `IV`.
Supported encryption modes:
@ -156,7 +123,7 @@ Supported encryption modes:
**Syntax**
```sql
``` sql
aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
```
@ -164,78 +131,98 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string).
- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string).
- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Optinal, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string).
**Returned value**
- Ciphered String. [String](../../sql-reference/data-types/string.md#string).
- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
**Examples**
Create this table:
Given equal input `encrypt` and `aes_encrypt_mysql` produce the same ciphertext:
Query:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
SELECT encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') = aes_encrypt_mysql('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') AS ciphertexts_equal;
```
Insert this data:
Result:
Query:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
```
┌─ciphertexts_equal─┐
│ 1 │
└───────────────────┘
```
Example without `iv`:
But `encrypt` fails when `key` or `iv` is longer than expected:
Query:
``` sql
SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test;
SELECT encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123');
```
Result:
``` text
┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐
│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │
│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │
│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │
└─────────────┴──────────────────────────────────────────────────────────────────┘
Received exception from server (version 21.1.2):
Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123').
```
Example with `iv`:
While `aes_encrypt_mysql` produces MySQL-compatitalbe output:
Query:
``` sql
SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test;
SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123')) AS ciphertext;
```
Result:
```text
┌─ciphertext───┐
│ 24E9E4966469 │
└──────────────┘
```
Notice how supplying even longer `IV` produces the same result
Query:
``` sql
SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456')) AS ciphertext
```
Result:
``` text
┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐
│ aes-256-cfb128 │ │
│ aes-256-cfb128 │ 7FB039F7 │
│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │
└────────────────┴────────────────────────────────────────────────────────────┘
┌─ciphertext───┐
│ 24E9E4966469 │
└──────────────┘
```
Which is binary equal to what MySQL produces on same inputs:
``` sql
mysql> SET block_encryption_mode='aes-256-cfb128';
Query OK, 0 rows affected (0.00 sec)
mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
+------------------------+
| ciphertext |
+------------------------+
| 0x24E9E4966469 |
+------------------------+
1 row in set (0.00 sec)
```
## decrypt {#decrypt}
This function decrypts data using these modes:
This function decrypts ciphertext into a plaintext using these modes:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
@ -247,7 +234,7 @@ This function decrypts data using these modes:
**Syntax**
```sql
``` sql
decrypt('mode', 'ciphertext', 'key' [, iv, aad])
```
@ -265,51 +252,56 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad])
**Examples**
Create this table:
Re-using table from [encrypt](./encryption-functions.md#encrypt).
Query:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
```
Insert this data:
Query:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
```
Query:
``` sql
SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test;
SELECT comment, hex(secret) FROM encryption_test;
```
Result:
```text
┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐
│ aes-128-ecb │ │
│ aes-128-ecb │ text │
│ aes-128-ecb │ What Is ClickHouse? │
└─────────────┴─────────────────────────────────────────────────────────────────────┘
``` text
┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
└──────────────────────┴──────────────────────────────────────────────┘
┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
│ aes-256-cfb128 no IV │ B4972BDC4459 │
│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │
│ aes-256-cfb128 with IV │ 5E6CB398F653 │
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
└─────────────────────────────────────┴──────────────────────────────────┘
```
Now let's try to decrypt all that data.
Query:
``` sql
SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920212') as plaintext FROM encryption_test
```
Result:
``` text
┌─comment─────────────────────────────┬─plaintext─┐
│ aes-256-cfb128 no IV │ Secret │
│ aes-256-cfb128 no IV, different key │ <20>4<EFBFBD>
<20>
│ aes-256-cfb128 with IV │ <20><><EFBFBD>6<EFBFBD>~ │
│aes-256-cbc no IV │ <20>2*4<>h3c<33>4w<34><77>@
└─────────────────────────────────────┴───────────┘
```
Notice how only portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption.
## aes_decrypt_mysql {#aes_decrypt_mysql}
Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function.
Will produce same plaintext as `decrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_decrypt_mysql` will stick to what MySQL's `aes_decrypt` does: 'fold' `key` and ignore excess bits of `IV`.
Supported decryption modes:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
@ -321,7 +313,7 @@ Supported decryption modes:
**Syntax**
```sql
``` sql
aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
```
@ -338,44 +330,30 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
**Examples**
Create this table:
Query:
Let's decrypt data we've previously encrypted with MySQL:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
```
mysql> SET block_encryption_mode='aes-256-cfb128';
Query OK, 0 rows affected (0.00 sec)
Insert this data:
Query:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
+------------------------+
| ciphertext |
+------------------------+
| 0x24E9E4966469 |
+------------------------+
1 row in set (0.00 sec)
```
Query:
``` sql
SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test;
SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext
```
Result:
``` text
┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐
│ aes-128-cbc │ │
│ aes-128-cbc │ text │
│ aes-128-cbc │ What Is ClickHouse? │
└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘
┌─plaintext─┐
│ Secret │
└───────────┘
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/encryption_functions/) <!--hide-->

View File

@ -0,0 +1,44 @@
---
toc_priority: 6
toc_title: EmbeddedRocksDB
---
# Движок EmbeddedRocksDB {#EmbeddedRocksDB-engine}
Этот движок позволяет интегрировать ClickHouse с [rocksdb](http://rocksdb.org/).
## Создание таблицы {#table_engine-EmbeddedRocksDB-creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = EmbeddedRocksDB
PRIMARY KEY(primary_key_name);
```
Обязательные параметры:
- `primary_key_name` может быть любое имя столбца из списка столбцов.
- Указание первичного ключа `primary key` является обязательным. Он будет сериализован в двоичном формате как ключ `rocksdb`.
- Поддерживается только один столбец в первичном ключе.
- Столбцы, которые отличаются от первичного ключа, будут сериализованы в двоичном формате как значение `rockdb` в соответствующем порядке.
- Запросы с фильтрацией по ключу `equals` или `in` оптимизируются для поиска по нескольким ключам из `rocksdb`.
Пример:
``` sql
CREATE TABLE test
(
`key` String,
`v1` UInt32,
`v2` String,
`v3` Float32,
)
ENGINE = EmbeddedRocksDB
PRIMARY KEY key;
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/embedded-rocksdb/) <!--hide-->

View File

@ -406,21 +406,46 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
Возможные значения:
- `'best_effort'` — включает расширенный парсинг.
- `best_effort` — включает расширенный парсинг.
ClickHouse может парсить базовый формат `YYYY-MM-DD HH:MM:SS` и все форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). Например, `'2018-06-08T01:02:03.000Z'`.
ClickHouse может парсить базовый формат `YYYY-MM-DD HH:MM:SS` и все форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). Например, `2018-06-08T01:02:03.000Z`.
- `'basic'` — используется базовый парсер.
- `basic` — используется базовый парсер.
ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS` или `YYYY-MM-DD`. Например, `'2019-08-20 10:18:56'` или `2019-08-20`.
ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS` или `YYYY-MM-DD`. Например, `2019-08-20 10:18:56` или `2019-08-20`.
Значение по умолчанию: `'basic'`.
Значение по умолчанию: `basic`.
См. также:
- [Тип данных DateTime.](../../sql-reference/data-types/datetime.md)
- [Функции для работы с датой и временем.](../../sql-reference/functions/date-time-functions.md)
## date_time_output_format {#settings-date_time_output_format}
Позволяет выбрать разные выходные форматы текстового представления даты и времени.
Возможные значения:
- `simple` - простой выходной формат.
Выходные дата и время Clickhouse в формате `YYYY-MM-DD hh:mm:ss`. Например, `2019-08-20 10:18:56`. Расчет выполняется в соответствии с часовым поясом типа данных (если он есть) или часовым поясом сервера.
- `iso` - выходной формат ISO.
Выходные дата и время Clickhouse в формате [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ`. Например, `2019-08-20T10:18:56Z`. Обратите внимание, что выходные данные отображаются в формате UTC (`Z` означает UTC).
- `unix_timestamp` - выходной формат Unix.
Выходные дата и время в формате [Unix](https://en.wikipedia.org/wiki/Unix_time). Например `1566285536`.
Значение по умолчанию: `simple`.
См. также:
- [Тип данных DateTime](../../sql-reference/data-types/datetime.md)
- [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md)
## join_default_strictness {#settings-join_default_strictness}
Устанавливает строгость по умолчанию для [JOIN](../../sql-reference/statements/select/join.md#select-join).

View File

@ -27,7 +27,7 @@ DateTime([timezone])
Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`.
ClickHouse отображает значения типа `DateTime` в формате `YYYY-MM-DD hh:mm:ss`. Отображение можно поменять с помощью функции [formatDateTime](../../sql-reference/data-types/datetime.md#formatdatetime).
ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/settings.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime).
При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format).

View File

@ -118,7 +118,7 @@ for (auto & stream : streams)
stream.second->finalize();
```
**18.** 行的尾不应该包含空格。
**18.** 行的尾不应该包含空格。
**19.** 源文件应该用 UTF-8 编码。

View File

@ -401,7 +401,7 @@ TTL date_time + INTERVAL 15 HOUR
### 列 TTL {#mergetree-column-ttl}
当列中的值过期时, ClickHouse会将它们替换成该列数据类型的默认值。如果数据片段中列的所有值均已过期则ClickHouse 会从文件系统中的数据片段中此列。
当列中的值过期时, ClickHouse会将它们替换成该列数据类型的默认值。如果数据片段中列的所有值均已过期则ClickHouse 会从文件系统中的数据片段中删除此列。
`TTL`子句不能被用于主键字段。

View File

@ -932,6 +932,10 @@ private:
std::cerr << "Received exception from server (version "
<< server_version << "):" << std::endl << "Code: "
<< server_exception->code() << ". " << text << std::endl;
if (is_interactive)
{
std::cerr << std::endl;
}
}
if (client_exception)
@ -939,6 +943,10 @@ private:
fmt::print(stderr,
"Error on processing query '{}':\n{}\n",
full_query, client_exception->message());
if (is_interactive)
{
fmt::print(stderr, "\n");
}
}
// A debug check -- at least some exception must be set, if the error

View File

@ -190,6 +190,7 @@ add_object_library(clickhouse_processors_sources Processors/Sources)
add_object_library(clickhouse_processors_merges Processors/Merges)
add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms)
add_object_library(clickhouse_processors_queryplan Processors/QueryPlan)
add_object_library(clickhouse_processors_queryplan_optimizations Processors/QueryPlan/Optimizations)
set (DBMS_COMMON_LIBRARIES)
# libgcc_s does not provide an implementation of an atomics library. Instead,

View File

@ -0,0 +1,2 @@
add_executable(test-connect test_connect.cpp)
target_link_libraries (test-connect PRIVATE dbms)

View File

@ -0,0 +1,99 @@
#include <sys/types.h>
#include <sys/socket.h>
#include <unistd.h>
#include <iostream>
#include <thread>
#include <atomic>
#include <Poco/Net/StreamSocket.h>
#include <Common/Exception.h>
#include <Common/Stopwatch.h>
#include <IO/ReadHelpers.h>
/** In a loop it connects to the server and immediately breaks the connection.
* Using the SO_LINGER option, we ensure that the connection is terminated by sending a RST packet (not FIN).
* Long time ago this behavior caused a bug in the TCPServer implementation in the Poco library.
*/
int main(int argc, char ** argv)
try
{
size_t num_iterations = 1;
size_t num_threads = 1;
std::string host = "localhost";
uint16_t port = 9000;
if (argc >= 2)
num_iterations = DB::parse<size_t>(argv[1]);
if (argc >= 3)
num_threads = DB::parse<size_t>(argv[2]);
if (argc >= 4)
host = argv[3];
if (argc >= 5)
port = DB::parse<uint16_t>(argv[4]);
std::atomic_bool cancel{false};
std::vector<std::thread> threads(num_threads);
for (auto & thread : threads)
{
thread = std::thread([&]
{
for (size_t i = 0; i < num_iterations && !cancel.load(std::memory_order_relaxed); ++i)
{
std::cerr << ".";
Poco::Net::SocketAddress address(host, port);
int fd = socket(PF_INET, SOCK_STREAM, IPPROTO_IP);
if (fd < 0)
DB::throwFromErrno("Cannot create socket", 0);
linger linger_value;
linger_value.l_onoff = 1;
linger_value.l_linger = 0;
if (0 != setsockopt(fd, SOL_SOCKET, SO_LINGER, &linger_value, sizeof(linger_value)))
DB::throwFromErrno("Cannot set linger", 0);
try
{
Stopwatch watch;
int res = connect(fd, address.addr(), address.length());
if (res != 0 && errno != EINPROGRESS && errno != EWOULDBLOCK)
{
close(fd);
DB::throwFromErrno("Cannot connect", 0);
}
close(fd);
if (watch.elapsedSeconds() > 0.1)
{
std::cerr << watch.elapsedSeconds() << "\n";
cancel = true;
break;
}
}
catch (const Poco::Exception & e)
{
std::cerr << e.displayText() << "\n";
}
}
});
}
for (auto & thread : threads)
thread.join();
std::cerr << "\n";
}
catch (const Poco::Exception & e)
{
std::cerr << e.displayText() << "\n";
}

View File

@ -289,7 +289,8 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
while (filt_pos < filt_end_sse)
{
int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
mask = ~mask;
if (0 == mask)
{

View File

@ -356,7 +356,8 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
while (filt_pos < filt_end_sse)
{
int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
mask = ~mask;
if (0 == mask)
{

View File

@ -17,13 +17,17 @@ namespace DB
static UInt64 toBits64(const Int8 * bytes64)
{
static const __m128i zero16 = _mm_setzero_si128();
return static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16)))
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16)))
<< 16)
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16)))
<< 32)
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16)))
<< 48);
UInt64 res =
static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16)))
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16))) << 16)
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16))) << 32)
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16))) << 48);
return ~res;
}
#endif
@ -49,7 +53,7 @@ size_t countBytesInFilter(const UInt8 * filt, size_t sz)
#endif
for (; pos < end; ++pos)
count += *pos > 0;
count += *pos != 0;
return count;
}
@ -82,7 +86,7 @@ size_t countBytesInFilterWithNull(const IColumn::Filter & filt, const UInt8 * nu
#endif
for (; pos < end; ++pos)
count += (*pos & ~*pos2) > 0;
count += (*pos & ~*pos2) != 0;
return count;
}
@ -232,9 +236,10 @@ namespace
while (filt_pos < filt_end_aligned)
{
const auto mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)),
zero_vec));
mask = ~mask;
if (mask == 0)
{

View File

@ -120,9 +120,10 @@ inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_siz
for (size_t offset = min_size; offset < max_size; offset += 16)
{
uint16_t mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
uint16_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(longest + offset)),
zero16));
mask = ~mask;
if (mask)
{

View File

@ -163,6 +163,7 @@ private:
friend class ActionsDAG;
};
using BlockPtr = std::shared_ptr<Block>;
using Blocks = std::vector<Block>;
using BlocksList = std::list<Block>;
using BlocksPtr = std::shared_ptr<Blocks>;

View File

@ -6,8 +6,10 @@
#include <Core/MySQL/PacketsProtocolText.h>
#include <Core/MySQL/PacketsReplication.h>
#include <Core/MySQL/MySQLReplication.h>
#include <Common/DNSResolver.h>
#include <Poco/String.h>
namespace DB
{
using namespace Generic;

View File

@ -7,7 +7,6 @@
#include <IO/WriteHelpers.h>
#include <Poco/Net/NetException.h>
#include <Poco/Net/StreamSocket.h>
#include <Common/DNSResolver.h>
#include <Common/Exception.h>
#include <Common/NetException.h>
#include <Core/MySQL/IMySQLWritePacket.h>

View File

@ -139,6 +139,7 @@ class IColumn;
\
M(UInt64, min_bytes_to_use_direct_io, 0, "The minimum number of bytes for reading the data with O_DIRECT option during SELECT queries execution. 0 - disabled.", 0) \
M(UInt64, min_bytes_to_use_mmap_io, 0, "The minimum number of bytes for reading the data with mmap option during SELECT queries execution. 0 - disabled.", 0) \
M(Bool, checksum_on_read, true, "Validate checksums on reading. It is enabled by default and should be always enabled in production. Please do not expect any benefits in disabling this setting. It may only be used for experiments and benchmarks. The setting only applicable for tables of MergeTree family. Checksums are always validated for other table engines and when receiving data over network.", 0) \
\
M(Bool, force_index_by_date, 0, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \
M(Bool, force_primary_key, 0, "Throw an exception if there is primary key in a table, and it is not used.", 0) \

View File

@ -885,15 +885,17 @@ MutableColumnUniquePtr DataTypeLowCardinality::createColumnUniqueImpl(const IDat
if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(&keys_type))
type = nullable_type->getNestedType().get();
if (isString(type))
WhichDataType which(type);
if (which.isString())
return creator(static_cast<ColumnString *>(nullptr));
if (isFixedString(type))
else if (which.isFixedString())
return creator(static_cast<ColumnFixedString *>(nullptr));
if (typeid_cast<const DataTypeDate *>(type))
else if (which.isDate())
return creator(static_cast<ColumnVector<UInt16> *>(nullptr));
if (typeid_cast<const DataTypeDateTime *>(type))
else if (which.isDateTime())
return creator(static_cast<ColumnVector<UInt32> *>(nullptr));
if (isColumnedAsNumber(type))
else if (which.isInt() || which.isUInt() || which.isFloat())
{
MutableColumnUniquePtr column;
TypeListNativeNumbers::forEach(CreateColumnVector(column, *type, creator));

View File

@ -31,6 +31,7 @@ public:
bool canBeUsedInBitOperations() const override { return true; }
bool canBeInsideNullable() const override { return true; }
bool canBeInsideLowCardinality() const override { return false; }
bool canBePromoted() const override { return false; }
};

View File

@ -13,11 +13,13 @@
#include <IO/WriteBufferFromOStream.h>
#include <ext/range.h>
#include <ext/size.h>
#include <ext/map.h>
#include <ext/chrono_io.h>
#include <Common/setThreadName.h>
#include "CacheDictionary.inc.h"
#include <DataTypes/DataTypesDecimal.h>
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
#include <Functions/FunctionHelpers.h>
namespace ProfileEvents
{
@ -130,8 +132,8 @@ const IDictionarySource * CacheDictionary::getSource() const
void CacheDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
{
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_value);
getItemsNumberImpl<UInt64, UInt64>(*hierarchical_attribute, ids, out, [&](const size_t) { return null_value; });
DictionaryDefaultValueExtractor<UInt64> default_value_extractor(null_value);
getItemsNumberImpl<UInt64, UInt64>(*hierarchical_attribute, ids, out, default_value_extractor);
}
@ -249,34 +251,384 @@ void CacheDictionary::isInConstantVector(const Key child_id, const PaddedPODArra
out[i] = std::find(ancestors.begin(), ancestors.end(), ancestor_ids[i]) != ancestors.end();
}
void CacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
ColumnPtr CacheDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes &,
const ColumnPtr default_values_column) const
{
ColumnPtr result;
PaddedPODArray<Key> backup_storage;
const auto & keys = getColumnVectorData(this, key_columns.front(), backup_storage);
auto keys_size = keys.size();
auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
const auto null_value = StringRef{std::get<String>(attribute.null_value)};
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
getItemsString(attribute, ids, out, [&](const size_t) { return null_value; });
const auto & null_value = std::get<AttributeType>(attribute.null_value);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<AttributeType, String>)
{
getItemsString(attribute, keys, column.get(), default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsNumberImpl<AttributeType, AttributeType>(attribute, keys, out, default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
return result;
}
void CacheDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
void CacheDictionary::getItemsNumberImpl(
Attribute & attribute,
const PaddedPODArray<Key> & ids,
ResultArrayType<OutputType> & out,
DefaultValueExtractor & default_value_extractor) const
{
auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
/// First fill everything with default values
const auto rows = ext::size(ids);
for (const auto row : ext::range(0, rows))
out[row] = default_value_extractor[row];
getItemsString(attribute, ids, out, [&](const size_t row) { return def->getDataAt(row); });
/// Maybe there are duplicate keys, so we remember their indices.
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
size_t cache_hit = 0;
size_t cache_not_found_count = 0;
size_t cache_expired_cound = 0;
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
auto insert_to_answer_routine = [&](size_t row, size_t idx)
{
auto & cell = cells[idx];
if (!cell.isDefault())
out[row] = static_cast<OutputType>(attribute_array[idx]);
};
/// fetch up-to-date values, decide which ones require update
for (const auto row : ext::range(0, rows))
{
const auto id = ids[row];
/** cell should be updated if either:
* 1. ids do not match,
* 2. cell has expired,
* 3. explicit defaults were specified and cell was set default. */
const auto [cell_idx, state] = findCellIdxForGet(id, now);
if (state == ResultState::FoundAndValid)
{
++cache_hit;
insert_to_answer_routine(row, cell_idx);
}
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
{
++cache_not_found_count;
cache_expired_or_not_found_ids[id].push_back(row);
}
else if (state == ResultState::FoundButExpired)
{
cache_expired_cound++;
cache_expired_or_not_found_ids[id].push_back(row);
if (allow_read_expired_keys)
insert_to_answer_routine(row, cell_idx);
}
}
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_cound);
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
query_count.fetch_add(rows, std::memory_order_relaxed);
hit_count.fetch_add(rows - cache_not_found_count - cache_expired_cound, std::memory_order_release);
if (!cache_not_found_count)
{
/// Nothing to update - return
if (!cache_expired_cound)
return;
/// Update async only if allow_read_expired_keys_is_enabledadd condvar usage and better code
if (allow_read_expired_keys)
{
std::vector<Key> required_expired_ids;
required_expired_ids.reserve(cache_expired_cound);
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
/// request new values
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
tryPushToUpdateQueueOrThrow(update_unit_ptr);
/// Nothing to do - return
return;
}
}
/// From this point we have to update all keys sync.
/// Maybe allow_read_expired_keys_from_cache_dictionary is disabled
/// and there no cache_not_found_ids but some cache_expired.
std::vector<Key> required_ids;
required_ids.reserve(cache_not_found_count + cache_expired_cound);
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
/// Request new values
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
tryPushToUpdateQueueOrThrow(update_unit_ptr);
waitForCurrentUpdateFinish(update_unit_ptr);
/// Add updated keys to answer.
const size_t attribute_index = getAttributeIndex(attribute.name);
for (auto & [key, value] : update_unit_ptr->found_ids)
{
if (value.found)
{
for (const size_t row : cache_expired_or_not_found_ids[key])
out[row] = std::get<OutputType>(value.values[attribute_index]);
}
}
}
void CacheDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
void CacheDictionary::getItemsString(
Attribute & attribute,
const PaddedPODArray<Key> & ids,
ColumnString * out,
DictionaryDefaultValueExtractor<String> & default_value_extractor) const
{
auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto rows = ext::size(ids);
getItemsString(attribute, ids, out, [&](const size_t) { return StringRef{def}; });
/// Save on some allocations.
out->getOffsets().reserve(rows);
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
auto found_outdated_values = false;
/// Perform optimistic version, fallback to pessimistic if failed.
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
/// Fetch up-to-date values, discard on fail.
for (const auto row : ext::range(0, rows))
{
const auto id = ids[row];
const auto [cell_idx, state] = findCellIdxForGet(id, now);
if (state == ResultState::FoundAndValid)
{
auto & cell = cells[cell_idx];
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
out->insertData(string_ref.data, string_ref.size);
}
else
{
found_outdated_values = true;
break;
}
}
}
/// Optimistic code completed successfully.
if (!found_outdated_values)
{
query_count.fetch_add(rows, std::memory_order_relaxed);
hit_count.fetch_add(rows, std::memory_order_release);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, ids.size());
return;
}
/// Now onto the pessimistic one, discard possible partial results from the optimistic path.
out->getChars().resize_assume_reserved(0);
out->getOffsets().resize_assume_reserved(0);
/// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
/// we are going to store every string separately
std::unordered_map<Key, String> local_cache;
size_t cache_not_found_count = 0;
size_t cache_expired_count = 0;
size_t total_length = 0;
size_t cache_hit = 0;
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
auto insert_value_routine = [&](size_t row, size_t id, size_t cell_idx)
{
const auto & cell = cells[cell_idx];
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
/// Do not store default, but count it in total length.
if (!cell.isDefault())
local_cache[id] = String{string_ref};
total_length += string_ref.size + 1;
};
for (const auto row : ext::range(0, ids.size()))
{
const auto id = ids[row];
const auto [cell_idx, state] = findCellIdxForGet(id, now);
if (state == ResultState::FoundAndValid)
{
++cache_hit;
insert_value_routine(row, id, cell_idx);
}
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
{
++cache_not_found_count;
cache_expired_or_not_found_ids[id].push_back(row);
}
else if (state == ResultState::FoundButExpired)
{
++cache_expired_count;
cache_expired_or_not_found_ids[id].push_back(row);
if (allow_read_expired_keys)
insert_value_routine(row, id, cell_idx);
}
}
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_count);
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
query_count.fetch_add(rows, std::memory_order_relaxed);
hit_count.fetch_add(rows - cache_expired_count - cache_not_found_count, std::memory_order_release);
/// Async update of expired keys.
if (!cache_not_found_count)
{
if (allow_read_expired_keys && cache_expired_count)
{
std::vector<Key> required_expired_ids;
required_expired_ids.reserve(cache_expired_count);
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
tryPushToUpdateQueueOrThrow(update_unit_ptr);
/// Insert all found keys and defaults to output array.
out->getChars().reserve(total_length);
for (const auto row : ext::range(0, ext::size(ids)))
{
const auto id = ids[row];
StringRef value;
/// Previously we stored found keys in map.
const auto it = local_cache.find(id);
if (it != local_cache.end())
value = StringRef(it->second);
else
value = default_value_extractor[row];
out->insertData(value.data, value.size);
}
/// Nothing to do else.
return;
}
}
/// We will request both cache_not_found_ids and cache_expired_ids sync.
std::vector<Key> required_ids;
required_ids.reserve(cache_not_found_count + cache_expired_count);
std::transform(
std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
tryPushToUpdateQueueOrThrow(update_unit_ptr);
waitForCurrentUpdateFinish(update_unit_ptr);
const size_t attribute_index = getAttributeIndex(attribute.name);
/// Only calculate the total length.
for (auto & [key, value] : update_unit_ptr->found_ids)
{
if (value.found)
{
const auto found_value_ref = std::get<String>(value.values[attribute_index]);
total_length += (found_value_ref.size() + 1) * cache_expired_or_not_found_ids[key].size();
}
else
{
for (const auto row : cache_expired_or_not_found_ids[key])
total_length += default_value_extractor[row].size + 1;
}
}
out->getChars().reserve(total_length);
for (const auto row : ext::range(0, ext::size(ids)))
{
const auto id = ids[row];
StringRef value;
/// We have two maps: found in cache and found in source.
const auto local_it = local_cache.find(id);
if (local_it != local_cache.end())
value = StringRef(local_it->second);
else
{
const auto found_it = update_unit_ptr->found_ids.find(id);
/// Previously we didn't store defaults in local cache.
if (found_it != update_unit_ptr->found_ids.end() && found_it->second.found)
value = std::get<String>(found_it->second.values[attribute_index]);
else
value = default_value_extractor[row];
}
out->insertData(value.data, value.size);
}
}
template<class... Ts>
struct Overloaded : Ts... {using Ts::operator()...;};
@ -375,8 +727,14 @@ size_t CacheDictionary::findCellIdxForSet(const Key & id) const
return oldest_id;
}
void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
ColumnUInt8::Ptr CacheDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
{
PaddedPODArray<Key> backup_storage;
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
auto result = ColumnUInt8::create(ext::size(ids));
auto& out = result->getData();
/// There are three types of ids.
/// - Valid ids. These ids are presented in local cache and their lifetime is not expired.
/// - CacheExpired ids. Ids that are in local cache, but their values are rotted (lifetime is expired).
@ -444,7 +802,7 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
{
/// Nothing to update - return;
if (!cache_expired_count)
return;
return result;
if (allow_read_expired_keys)
{
@ -458,7 +816,7 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
tryPushToUpdateQueueOrThrow(update_unit_ptr);
/// Update is async - no need to wait.
return;
return result;
}
}
@ -483,6 +841,8 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
for (const auto row : cache_expired_or_not_found_ids[key])
out[row] = true;
}
return result;
}
@ -707,7 +1067,7 @@ PaddedPODArray<CacheDictionary::Key> CacheDictionary::getCachedIds() const
BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<CacheDictionary, Key>;
using BlockInputStreamType = DictionaryBlockInputStream<Key>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getCachedIds(), column_names);
}

View File

@ -21,6 +21,7 @@
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryHelpers.h"
namespace CurrentMetrics
{
@ -119,77 +120,20 @@ public:
std::exception_ptr getLastException() const override;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const override;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
const;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
@ -260,12 +204,18 @@ private:
/* NOLINTNEXTLINE(readability-convert-member-functions-to-static) */
Attribute createAttributeWithTypeAndName(const AttributeUnderlyingType type, const String & name, const Field & null_value);
template <typename AttributeType, typename OutputType, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
void getItemsNumberImpl(
Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
Attribute & attribute,
const PaddedPODArray<Key> & ids,
ResultArrayType<OutputType> & out,
DefaultValueExtractor & default_value_extractor) const;
template <typename DefaultGetter>
void getItemsString(Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const;
void getItemsString(
Attribute & attribute,
const PaddedPODArray<Key> & ids,
ColumnString * out,
DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
PaddedPODArray<Key> getCachedIds() const;
@ -456,5 +406,6 @@ private:
mutable std::condition_variable is_update_finished;
std::atomic<bool> finished{false};
};
};
}

View File

@ -1,368 +0,0 @@
#pragma once
#include <stdexcept>
#include "CacheDictionary.h"
#include <Columns/ColumnsNumber.h>
#include <Common/ProfilingScopedRWLock.h>
#include <Common/typeid_cast.h>
#include <DataStreams/IBlockInputStream.h>
#include <ext/chrono_io.h>
#include <ext/map.h>
#include <ext/range.h>
#include <ext/size.h>
namespace ProfileEvents
{
extern const Event DictCacheKeysRequested;
extern const Event DictCacheKeysRequestedMiss;
extern const Event DictCacheKeysRequestedFound;
extern const Event DictCacheKeysExpired;
extern const Event DictCacheKeysNotFound;
extern const Event DictCacheKeysHit;
extern const Event DictCacheRequestTimeNs;
extern const Event DictCacheRequests;
extern const Event DictCacheLockWriteNs;
extern const Event DictCacheLockReadNs;
}
namespace CurrentMetrics
{
extern const Metric DictCacheRequests;
}
namespace DB
{
namespace ErrorCodes
{
}
template <typename AttributeType, typename OutputType, typename DefaultGetter>
void CacheDictionary::getItemsNumberImpl(
Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
{
/// First fill everything with default values
const auto rows = ext::size(ids);
for (const auto row : ext::range(0, rows))
out[row] = get_default(row);
/// Maybe there are duplicate keys, so we remember their indices.
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
size_t cache_hit = 0;
size_t cache_not_found_count = 0;
size_t cache_expired_cound = 0;
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
auto insert_to_answer_routine = [&](size_t row, size_t idx)
{
auto & cell = cells[idx];
if (!cell.isDefault())
out[row] = static_cast<OutputType>(attribute_array[idx]);
};
/// fetch up-to-date values, decide which ones require update
for (const auto row : ext::range(0, rows))
{
const auto id = ids[row];
/** cell should be updated if either:
* 1. ids do not match,
* 2. cell has expired,
* 3. explicit defaults were specified and cell was set default. */
const auto [cell_idx, state] = findCellIdxForGet(id, now);
if (state == ResultState::FoundAndValid)
{
++cache_hit;
insert_to_answer_routine(row, cell_idx);
}
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
{
++cache_not_found_count;
cache_expired_or_not_found_ids[id].push_back(row);
}
else if (state == ResultState::FoundButExpired)
{
cache_expired_cound++;
cache_expired_or_not_found_ids[id].push_back(row);
if (allow_read_expired_keys)
insert_to_answer_routine(row, cell_idx);
}
}
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_cound);
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
query_count.fetch_add(rows, std::memory_order_relaxed);
hit_count.fetch_add(rows - cache_not_found_count - cache_expired_cound, std::memory_order_release);
if (!cache_not_found_count)
{
/// Nothing to update - return
if (!cache_expired_cound)
return;
/// Update async only if allow_read_expired_keys_is_enabledadd condvar usage and better code
if (allow_read_expired_keys)
{
std::vector<Key> required_expired_ids;
required_expired_ids.reserve(cache_expired_cound);
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
/// request new values
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
tryPushToUpdateQueueOrThrow(update_unit_ptr);
/// Nothing to do - return
return;
}
}
/// From this point we have to update all keys sync.
/// Maybe allow_read_expired_keys_from_cache_dictionary is disabled
/// and there no cache_not_found_ids but some cache_expired.
std::vector<Key> required_ids;
required_ids.reserve(cache_not_found_count + cache_expired_cound);
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
/// Request new values
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
tryPushToUpdateQueueOrThrow(update_unit_ptr);
waitForCurrentUpdateFinish(update_unit_ptr);
/// Add updated keys to answer.
const size_t attribute_index = getAttributeIndex(attribute.name);
for (auto & [key, value] : update_unit_ptr->found_ids)
{
if (value.found)
{
for (const size_t row : cache_expired_or_not_found_ids[key])
out[row] = std::get<OutputType>(value.values[attribute_index]);
}
}
}
template <typename DefaultGetter>
void CacheDictionary::getItemsString(
Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const
{
const auto rows = ext::size(ids);
/// Save on some allocations.
out->getOffsets().reserve(rows);
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
auto found_outdated_values = false;
/// Perform optimistic version, fallback to pessimistic if failed.
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
/// Fetch up-to-date values, discard on fail.
for (const auto row : ext::range(0, rows))
{
const auto id = ids[row];
const auto [cell_idx, state] = findCellIdxForGet(id, now);
if (state == ResultState::FoundAndValid)
{
auto & cell = cells[cell_idx];
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
out->insertData(string_ref.data, string_ref.size);
}
else
{
found_outdated_values = true;
break;
}
}
}
/// Optimistic code completed successfully.
if (!found_outdated_values)
{
query_count.fetch_add(rows, std::memory_order_relaxed);
hit_count.fetch_add(rows, std::memory_order_release);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, ids.size());
return;
}
/// Now onto the pessimistic one, discard possible partial results from the optimistic path.
out->getChars().resize_assume_reserved(0);
out->getOffsets().resize_assume_reserved(0);
/// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
/// we are going to store every string separately
std::unordered_map<Key, String> local_cache;
size_t cache_not_found_count = 0;
size_t cache_expired_count = 0;
size_t total_length = 0;
size_t cache_hit = 0;
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
auto insert_value_routine = [&](size_t row, size_t id, size_t cell_idx)
{
const auto & cell = cells[cell_idx];
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
/// Do not store default, but count it in total length.
if (!cell.isDefault())
local_cache[id] = String{string_ref};
total_length += string_ref.size + 1;
};
for (const auto row : ext::range(0, ids.size()))
{
const auto id = ids[row];
const auto [cell_idx, state] = findCellIdxForGet(id, now);
if (state == ResultState::FoundAndValid)
{
++cache_hit;
insert_value_routine(row, id, cell_idx);
}
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
{
++cache_not_found_count;
cache_expired_or_not_found_ids[id].push_back(row);
}
else if (state == ResultState::FoundButExpired)
{
++cache_expired_count;
cache_expired_or_not_found_ids[id].push_back(row);
if (allow_read_expired_keys)
insert_value_routine(row, id, cell_idx);
}
}
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_count);
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
query_count.fetch_add(rows, std::memory_order_relaxed);
hit_count.fetch_add(rows - cache_expired_count - cache_not_found_count, std::memory_order_release);
/// Async update of expired keys.
if (!cache_not_found_count)
{
if (allow_read_expired_keys && cache_expired_count)
{
std::vector<Key> required_expired_ids;
required_expired_ids.reserve(cache_expired_count);
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
tryPushToUpdateQueueOrThrow(update_unit_ptr);
/// Insert all found keys and defaults to output array.
out->getChars().reserve(total_length);
for (const auto row : ext::range(0, ext::size(ids)))
{
const auto id = ids[row];
StringRef value;
/// Previously we stored found keys in map.
const auto it = local_cache.find(id);
if (it != local_cache.end())
value = StringRef(it->second);
else
value = get_default(row);
out->insertData(value.data, value.size);
}
/// Nothing to do else.
return;
}
}
/// We will request both cache_not_found_ids and cache_expired_ids sync.
std::vector<Key> required_ids;
required_ids.reserve(cache_not_found_count + cache_expired_count);
std::transform(
std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
tryPushToUpdateQueueOrThrow(update_unit_ptr);
waitForCurrentUpdateFinish(update_unit_ptr);
const size_t attribute_index = getAttributeIndex(attribute.name);
/// Only calculate the total length.
for (auto & [key, value] : update_unit_ptr->found_ids)
{
if (value.found)
{
const auto found_value_ref = std::get<String>(value.values[attribute_index]);
total_length += (found_value_ref.size() + 1) * cache_expired_or_not_found_ids[key].size();
}
else
{
for (const auto row : cache_expired_or_not_found_ids[key])
total_length += get_default(row).size + 1;
}
}
out->getChars().reserve(total_length);
for (const auto row : ext::range(0, ext::size(ids)))
{
const auto id = ids[row];
StringRef value;
/// We have two maps: found in cache and found in source.
const auto local_it = local_cache.find(id);
if (local_it != local_cache.end())
value = StringRef(local_it->second);
else
{
const auto found_it = update_unit_ptr->found_ids.find(id);
/// Previously we didn't store defaults in local cache.
if (found_it != update_unit_ptr->found_ids.end() && found_it->second.found)
value = std::get<String>(found_it->second.values[attribute_index]);
else
value = get_default(row);
}
out->insertData(value.data, value.size);
}
}
}

View File

@ -1,32 +0,0 @@
#include <Dictionaries/CacheDictionary.h>
#include <Dictionaries/CacheDictionary.inc.h>
namespace DB
{
#define DEFINE(TYPE) \
void CacheDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) \
const \
{ \
auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
const auto null_value = std::get<TYPE>(attribute.null_value); \
getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t) { return null_value; }); \
}
DEFINE(UInt8)
DEFINE(UInt16)
DEFINE(UInt32)
DEFINE(UInt64)
DEFINE(UInt128)
DEFINE(Int8)
DEFINE(Int16)
DEFINE(Int32)
DEFINE(Int64)
DEFINE(Float32)
DEFINE(Float64)
DEFINE(Decimal32)
DEFINE(Decimal64)
DEFINE(Decimal128)
#undef DEFINE
}

View File

@ -1,34 +0,0 @@
#include <Dictionaries/CacheDictionary.h>
#include <Dictionaries/CacheDictionary.inc.h>
namespace DB
{
#define DEFINE(TYPE) \
void CacheDictionary::get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t row) { return def[row]; }); \
}
DEFINE(UInt8)
DEFINE(UInt16)
DEFINE(UInt32)
DEFINE(UInt64)
DEFINE(UInt128)
DEFINE(Int8)
DEFINE(Int16)
DEFINE(Int32)
DEFINE(Int64)
DEFINE(Float32)
DEFINE(Float64)
DEFINE(Decimal32)
DEFINE(Decimal64)
DEFINE(Decimal128)
#undef DEFINE
}

View File

@ -1,31 +0,0 @@
#include <Dictionaries/CacheDictionary.h>
#include <Dictionaries/CacheDictionary.inc.h>
namespace DB
{
#define DEFINE(TYPE) \
void CacheDictionary::get##TYPE( \
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
{ \
auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t) { return def; }); \
}
DEFINE(UInt8)
DEFINE(UInt16)
DEFINE(UInt32)
DEFINE(UInt64)
DEFINE(UInt128)
DEFINE(Int8)
DEFINE(Int16)
DEFINE(Int32)
DEFINE(Int64)
DEFINE(Float32)
DEFINE(Float64)
DEFINE(Decimal32)
DEFINE(Decimal64)
DEFINE(Decimal128)
#undef DEFINE
}

View File

@ -10,7 +10,8 @@
#include <ext/range.h>
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypesDecimal.h>
namespace ProfileEvents
{
@ -70,48 +71,50 @@ ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(
createAttributes();
}
void ComplexKeyCacheDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
{
dict_struct.validateKeyTypes(key_types);
auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto null_value = StringRef{std::get<String>(attribute.null_values)};
getItemsString(attribute, key_columns, out, [&](const size_t) { return null_value; });
}
void ComplexKeyCacheDictionary::getString(
ColumnPtr ComplexKeyCacheDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const
const ColumnPtr default_values_column) const
{
dict_struct.validateKeyTypes(key_types);
auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
getItemsString(attribute, key_columns, out, [&](const size_t row) { return def->getDataAt(row); });
}
void ComplexKeyCacheDictionary::getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const
{
dict_struct.validateKeyTypes(key_types);
ColumnPtr result;
auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
getItemsString(attribute, key_columns, out, [&](const size_t) { return StringRef{def}; });
auto keys_size = key_columns.front()->size();
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
const auto & null_value = std::get<AttributeType>(attribute.null_values);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<AttributeType, String>)
{
auto * out = column.get();
getItemsString(attribute, key_columns, out, default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsNumberImpl<AttributeType, AttributeType>(attribute, key_columns, out, default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
return result;
}
/// returns cell_idx (always valid for replacing), 'cell is valid' flag, 'cell is outdated' flag,
@ -158,15 +161,21 @@ ComplexKeyCacheDictionary::findCellIdx(const StringRef & key, const CellMetadata
return {oldest_id, false, false};
}
void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
ColumnUInt8::Ptr ComplexKeyCacheDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
{
dict_struct.validateKeyTypes(key_types);
const auto rows_num = key_columns.front()->size();
auto result = ColumnUInt8::create(rows_num);
auto& out = result->getData();
for (const auto row : ext::range(0, rows_num))
out[row] = false;
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
MapType<std::vector<size_t>> outdated_keys;
const auto rows_num = key_columns.front()->size();
const auto keys_size = dict_struct.key->size();
StringRefs keys(keys_size);
Arena temporary_keys_pool;
@ -212,7 +221,7 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
if (outdated_keys.empty())
return;
return result;
std::vector<size_t> required_rows(outdated_keys.size());
std::transform(
@ -233,8 +242,395 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
for (const auto out_idx : outdated_keys[key])
out[out_idx] = false;
});
return result;
}
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
void ComplexKeyCacheDictionary::getItemsNumberImpl(
Attribute & attribute,
const Columns & key_columns,
PaddedPODArray<OutputType> & out,
DefaultValueExtractor & default_value_extractor) const
{
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
MapType<std::vector<size_t>> outdated_keys;
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
const auto rows_num = key_columns.front()->size();
const auto keys_size = dict_struct.key->size();
StringRefs keys(keys_size);
Arena temporary_keys_pool;
PODArray<StringRef> keys_array(rows_num);
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
/// fetch up-to-date values, decide which ones require update
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
keys_array[row] = key;
const auto find_result = findCellIdx(key, now);
/** cell should be updated if either:
* 1. keys (or hash) do not match,
* 2. cell has expired,
* 3. explicit defaults were specified and cell was set default. */
if (!find_result.valid)
{
outdated_keys[key].push_back(row);
if (find_result.outdated)
++cache_expired;
else
++cache_not_found;
}
else
{
++cache_hit;
const auto & cell_idx = find_result.cell_idx;
const auto & cell = cells[cell_idx];
out[row] = cell.isDefault() ? default_value_extractor[row] : static_cast<OutputType>(attribute_array[cell_idx]);
}
}
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
query_count.fetch_add(rows_num, std::memory_order_relaxed);
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
if (outdated_keys.empty())
return;
std::vector<size_t> required_rows(outdated_keys.size());
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
{
return pair.getMapped().front();
});
/// request new values
update(
key_columns,
keys_array,
required_rows,
[&](const StringRef key, const size_t cell_idx)
{
for (const auto row : outdated_keys[key])
out[row] = static_cast<OutputType>(attribute_array[cell_idx]);
},
[&](const StringRef key, const size_t)
{
for (const auto row : outdated_keys[key])
out[row] = default_value_extractor[row];
});
}
void ComplexKeyCacheDictionary::getItemsString(
Attribute & attribute,
const Columns & key_columns,
ColumnString * out,
DictionaryDefaultValueExtractor<String> & default_value_extractor) const
{
const auto rows_num = key_columns.front()->size();
/// save on some allocations
out->getOffsets().reserve(rows_num);
const auto keys_size = dict_struct.key->size();
StringRefs keys(keys_size);
Arena temporary_keys_pool;
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
auto found_outdated_values = false;
/// perform optimistic version, fallback to pessimistic if failed
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
/// fetch up-to-date values, discard on fail
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
SCOPE_EXIT(temporary_keys_pool.rollback(key.size));
const auto find_result = findCellIdx(key, now);
if (!find_result.valid)
{
found_outdated_values = true;
break;
}
else
{
const auto & cell_idx = find_result.cell_idx;
const auto & cell = cells[cell_idx];
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
out->insertData(string_ref.data, string_ref.size);
}
}
}
/// optimistic code completed successfully
if (!found_outdated_values)
{
query_count.fetch_add(rows_num, std::memory_order_relaxed);
hit_count.fetch_add(rows_num, std::memory_order_release);
return;
}
/// now onto the pessimistic one, discard possible partial results from the optimistic path
out->getChars().resize_assume_reserved(0);
out->getOffsets().resize_assume_reserved(0);
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
MapType<std::vector<size_t>> outdated_keys;
/// we are going to store every string separately
MapType<StringRef> map;
PODArray<StringRef> keys_array(rows_num);
size_t total_length = 0;
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
keys_array[row] = key;
const auto find_result = findCellIdx(key, now);
if (!find_result.valid)
{
outdated_keys[key].push_back(row);
if (find_result.outdated)
++cache_expired;
else
++cache_not_found;
}
else
{
++cache_hit;
const auto & cell_idx = find_result.cell_idx;
const auto & cell = cells[cell_idx];
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
if (!cell.isDefault())
map[key] = copyIntoArena(string_ref, temporary_keys_pool);
total_length += string_ref.size + 1;
}
}
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
query_count.fetch_add(rows_num, std::memory_order_relaxed);
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
/// request new values
if (!outdated_keys.empty())
{
std::vector<size_t> required_rows(outdated_keys.size());
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
{
return pair.getMapped().front();
});
update(
key_columns,
keys_array,
required_rows,
[&](const StringRef key, const size_t cell_idx)
{
const StringRef attribute_value = attribute_array[cell_idx];
/// We must copy key and value to own memory, because it may be replaced with another
/// in next iterations of inner loop of update.
const StringRef copied_key = copyIntoArena(key, temporary_keys_pool);
const StringRef copied_value = copyIntoArena(attribute_value, temporary_keys_pool);
map[copied_key] = copied_value;
total_length += (attribute_value.size + 1) * outdated_keys[key].size();
},
[&](const StringRef key, const size_t)
{
for (const auto row : outdated_keys[key])
total_length += default_value_extractor[row].size + 1;
});
}
out->getChars().reserve(total_length);
for (const auto row : ext::range(0, ext::size(keys_array)))
{
const StringRef key = keys_array[row];
auto * const it = map.find(key);
const auto string_ref = it ? it->getMapped() : default_value_extractor[row];
out->insertData(string_ref.data, string_ref.size);
}
}
template <typename PresentKeyHandler, typename AbsentKeyHandler>
void ComplexKeyCacheDictionary::update(
const Columns & in_key_columns,
const PODArray<StringRef> & in_keys,
const std::vector<size_t> & in_requested_rows,
PresentKeyHandler && on_cell_updated,
AbsentKeyHandler && on_key_not_found) const
{
MapType<bool> remaining_keys{in_requested_rows.size()};
for (const auto row : in_requested_rows)
remaining_keys.insert({in_keys[row], false});
std::uniform_int_distribution<UInt64> distribution(dict_lifetime.min_sec, dict_lifetime.max_sec);
const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
{
Stopwatch watch;
auto stream = source_ptr->loadKeys(in_key_columns, in_requested_rows);
stream->readPrefix();
const auto keys_size = dict_struct.key->size();
StringRefs keys(keys_size);
const auto attributes_size = attributes.size();
const auto now = std::chrono::system_clock::now();
while (const auto block = stream->read())
{
/// cache column pointers
const auto key_columns = ext::map<Columns>(
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
{
return block.safeGetByPosition(keys_size + attribute_idx).column;
});
const auto rows_num = block.rows();
for (const auto row : ext::range(0, rows_num))
{
auto key = allocKey(row, key_columns, keys);
const auto hash = StringRefHash{}(key);
const auto find_result = findCellIdx(key, now, hash);
const auto & cell_idx = find_result.cell_idx;
auto & cell = cells[cell_idx];
for (const auto attribute_idx : ext::range(0, attributes.size()))
{
const auto & attribute_column = *attribute_columns[attribute_idx];
auto & attribute = attributes[attribute_idx];
setAttributeValue(attribute, cell_idx, attribute_column[row]);
}
/// if cell id is zero and zero does not map to this cell, then the cell is unused
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
element_count.fetch_add(1, std::memory_order_relaxed);
/// handle memory allocated for old key
if (key == cell.key)
{
freeKey(key);
key = cell.key;
}
else
{
/// new key is different from the old one
if (cell.key.data)
freeKey(cell.key);
cell.key = key;
}
cell.hash = hash;
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
else
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
/// inform caller
on_cell_updated(key, cell_idx);
/// mark corresponding id as found
remaining_keys[key] = true;
}
}
stream->readSuffix();
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, in_requested_rows.size());
ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed());
}
size_t found_num = 0;
size_t not_found_num = 0;
const auto now = std::chrono::system_clock::now();
/// Check which ids have not been found and require setting null_value
for (const auto & key_found_pair : remaining_keys)
{
if (key_found_pair.getMapped())
{
++found_num;
continue;
}
++not_found_num;
auto key = key_found_pair.getKey();
const auto hash = StringRefHash{}(key);
const auto find_result = findCellIdx(key, now, hash);
const auto & cell_idx = find_result.cell_idx;
auto & cell = cells[cell_idx];
/// Set null_value for each attribute
for (auto & attribute : attributes)
setDefaultAttributeValue(attribute, cell_idx);
/// Check if cell had not been occupied before and increment element counter if it hadn't
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
element_count.fetch_add(1, std::memory_order_relaxed);
if (key == cell.key)
key = cell.key;
else
{
if (cell.key.data)
freeKey(cell.key);
/// copy key from temporary pool
key = copyKey(key);
cell.key = key;
}
cell.hash = hash;
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
else
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
cell.setDefault();
/// inform caller that the cell has not been found
on_key_not_found(key, cell_idx);
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num);
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num);
}
void ComplexKeyCacheDictionary::createAttributes()
{
const auto attributes_size = dict_struct.attributes.size();
@ -263,6 +659,102 @@ ComplexKeyCacheDictionary::Attribute & ComplexKeyCacheDictionary::getAttribute(c
return attributes[it->second];
}
void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute, const size_t idx) const
{
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
if constexpr (std::is_same_v<AttributeType, String>)
{
const auto & null_value_ref = std::get<String>(attribute.null_values);
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
if (string_ref.data != null_value_ref.data())
{
if (string_ref.data)
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
string_ref = StringRef{null_value_ref};
}
}
else
{
std::get<ContainerPtrType<AttributeType>>(attribute.arrays)[idx] = std::get<AttributeType>(attribute.null_values);
}
};
callOnDictionaryAttributeType(attribute.type, type_call);
}
ComplexKeyCacheDictionary::Attribute
ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
{
Attribute attr{type, {}, {}};
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
if constexpr (std::is_same_v<AttributeType, String>)
{
attr.null_values = null_value.get<String>();
attr.arrays = std::make_unique<ContainerType<StringRef>>(size);
bytes_allocated += size * sizeof(StringRef);
if (!string_arena)
string_arena = std::make_unique<ArenaWithFreeLists>();
}
else
{
attr.null_values = AttributeType(null_value.get<NearestFieldType<AttributeType>>()); /* NOLINT */
attr.arrays = std::make_unique<ContainerType<AttributeType>>(size); /* NOLINT */
bytes_allocated += size * sizeof(AttributeType);
}
};
callOnDictionaryAttributeType(type, type_call);
return attr;
}
void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const size_t idx, const Field & value) const
{
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
if constexpr (std::is_same_v<AttributeType, String>)
{
const auto & string = value.get<String>();
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
const auto & null_value_ref = std::get<String>(attribute.null_values);
/// free memory unless it points to a null_value
if (string_ref.data && string_ref.data != null_value_ref.data())
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
const auto str_size = string.size();
if (str_size != 0)
{
auto * str_ptr = string_arena->alloc(str_size);
std::copy(string.data(), string.data() + str_size, str_ptr);
string_ref = StringRef{str_ptr, str_size};
}
else
string_ref = {};
}
else
{
std::get<ContainerPtrType<AttributeType>>(attribute.arrays)[idx] = value.get<NearestFieldType<AttributeType>>();
}
};
callOnDictionaryAttributeType(attribute.type, type_call);
}
StringRef ComplexKeyCacheDictionary::allocKey(const size_t row, const Columns & key_columns, StringRefs & keys) const
{
if (key_size_is_fixed)
@ -388,7 +880,7 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names &
keys.push_back(cells[idx].key);
}
using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyCacheDictionary, UInt64>;
using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, keys, column_names);
}

View File

@ -23,7 +23,7 @@
#include "IDictionary.h"
#include "IDictionarySource.h"
#include <DataStreams/IBlockInputStream.h>
#include "DictionaryHelpers.h"
namespace ProfileEvents
{
@ -89,93 +89,16 @@ public:
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
}
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
/// In all functions below, key_columns must be full (non-constant) columns.
/// See the requirement in IDataType.h for text-serialization functions.
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name,
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const;
const ColumnPtr default_values_column) const override;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const;
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
@ -252,227 +175,18 @@ private:
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
template <typename AttributeType, typename OutputType, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
void getItemsNumberImpl(
Attribute & attribute, const Columns & key_columns, PaddedPODArray<OutputType> & out, DefaultGetter && get_default) const
{
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
MapType<std::vector<size_t>> outdated_keys;
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
Attribute & attribute,
const Columns & key_columns,
PaddedPODArray<OutputType> & out,
DefaultValueExtractor & default_value_extractor) const;
const auto rows_num = key_columns.front()->size();
const auto keys_size = dict_struct.key->size();
StringRefs keys(keys_size);
Arena temporary_keys_pool;
PODArray<StringRef> keys_array(rows_num);
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
/// fetch up-to-date values, decide which ones require update
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
keys_array[row] = key;
const auto find_result = findCellIdx(key, now);
/** cell should be updated if either:
* 1. keys (or hash) do not match,
* 2. cell has expired,
* 3. explicit defaults were specified and cell was set default. */
if (!find_result.valid)
{
outdated_keys[key].push_back(row);
if (find_result.outdated)
++cache_expired;
else
++cache_not_found;
}
else
{
++cache_hit;
const auto & cell_idx = find_result.cell_idx;
const auto & cell = cells[cell_idx];
out[row] = cell.isDefault() ? get_default(row) : static_cast<OutputType>(attribute_array[cell_idx]);
}
}
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
query_count.fetch_add(rows_num, std::memory_order_relaxed);
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
if (outdated_keys.empty())
return;
std::vector<size_t> required_rows(outdated_keys.size());
std::transform(
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); });
/// request new values
update(
key_columns,
keys_array,
required_rows,
[&](const StringRef key, const size_t cell_idx)
{
for (const auto row : outdated_keys[key])
out[row] = static_cast<OutputType>(attribute_array[cell_idx]);
},
[&](const StringRef key, const size_t)
{
for (const auto row : outdated_keys[key])
out[row] = get_default(row);
});
}
template <typename DefaultGetter>
void getItemsString(Attribute & attribute, const Columns & key_columns, ColumnString * out, DefaultGetter && get_default) const
{
const auto rows_num = key_columns.front()->size();
/// save on some allocations
out->getOffsets().reserve(rows_num);
const auto keys_size = dict_struct.key->size();
StringRefs keys(keys_size);
Arena temporary_keys_pool;
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
auto found_outdated_values = false;
/// perform optimistic version, fallback to pessimistic if failed
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
/// fetch up-to-date values, discard on fail
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
SCOPE_EXIT(temporary_keys_pool.rollback(key.size));
const auto find_result = findCellIdx(key, now);
if (!find_result.valid)
{
found_outdated_values = true;
break;
}
else
{
const auto & cell_idx = find_result.cell_idx;
const auto & cell = cells[cell_idx];
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
out->insertData(string_ref.data, string_ref.size);
}
}
}
/// optimistic code completed successfully
if (!found_outdated_values)
{
query_count.fetch_add(rows_num, std::memory_order_relaxed);
hit_count.fetch_add(rows_num, std::memory_order_release);
return;
}
/// now onto the pessimistic one, discard possible partial results from the optimistic path
out->getChars().resize_assume_reserved(0);
out->getOffsets().resize_assume_reserved(0);
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
MapType<std::vector<size_t>> outdated_keys;
/// we are going to store every string separately
MapType<StringRef> map;
PODArray<StringRef> keys_array(rows_num);
size_t total_length = 0;
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
const auto now = std::chrono::system_clock::now();
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
keys_array[row] = key;
const auto find_result = findCellIdx(key, now);
if (!find_result.valid)
{
outdated_keys[key].push_back(row);
if (find_result.outdated)
++cache_expired;
else
++cache_not_found;
}
else
{
++cache_hit;
const auto & cell_idx = find_result.cell_idx;
const auto & cell = cells[cell_idx];
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
if (!cell.isDefault())
map[key] = copyIntoArena(string_ref, temporary_keys_pool);
total_length += string_ref.size + 1;
}
}
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
query_count.fetch_add(rows_num, std::memory_order_relaxed);
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
/// request new values
if (!outdated_keys.empty())
{
std::vector<size_t> required_rows(outdated_keys.size());
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
{
return pair.getMapped().front();
});
update(
key_columns,
keys_array,
required_rows,
[&](const StringRef key, const size_t cell_idx)
{
const StringRef attribute_value = attribute_array[cell_idx];
/// We must copy key and value to own memory, because it may be replaced with another
/// in next iterations of inner loop of update.
const StringRef copied_key = copyIntoArena(key, temporary_keys_pool);
const StringRef copied_value = copyIntoArena(attribute_value, temporary_keys_pool);
map[copied_key] = copied_value;
total_length += (attribute_value.size + 1) * outdated_keys[key].size();
},
[&](const StringRef key, const size_t)
{
for (const auto row : outdated_keys[key])
total_length += get_default(row).size + 1;
});
}
out->getChars().reserve(total_length);
for (const auto row : ext::range(0, ext::size(keys_array)))
{
const StringRef key = keys_array[row];
const auto it = map.find(key);
const auto string_ref = it ? it->getMapped() : get_default(row);
out->insertData(string_ref.data, string_ref.size);
}
}
void getItemsString(
Attribute & attribute,
const Columns & key_columns,
ColumnString * out,
DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
template <typename PresentKeyHandler, typename AbsentKeyHandler>
void update(
@ -480,152 +194,7 @@ private:
const PODArray<StringRef> & in_keys,
const std::vector<size_t> & in_requested_rows,
PresentKeyHandler && on_cell_updated,
AbsentKeyHandler && on_key_not_found) const
{
MapType<bool> remaining_keys{in_requested_rows.size()};
for (const auto row : in_requested_rows)
remaining_keys.insert({in_keys[row], false});
std::uniform_int_distribution<UInt64> distribution(dict_lifetime.min_sec, dict_lifetime.max_sec);
const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
{
Stopwatch watch;
auto stream = source_ptr->loadKeys(in_key_columns, in_requested_rows);
stream->readPrefix();
const auto keys_size = dict_struct.key->size();
StringRefs keys(keys_size);
const auto attributes_size = attributes.size();
const auto now = std::chrono::system_clock::now();
while (const auto block = stream->read())
{
/// cache column pointers
const auto key_columns = ext::map<Columns>(
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
{
return block.safeGetByPosition(keys_size + attribute_idx).column;
});
const auto rows_num = block.rows();
for (const auto row : ext::range(0, rows_num))
{
auto key = allocKey(row, key_columns, keys);
const auto hash = StringRefHash{}(key);
const auto find_result = findCellIdx(key, now, hash);
const auto & cell_idx = find_result.cell_idx;
auto & cell = cells[cell_idx];
for (const auto attribute_idx : ext::range(0, attributes.size()))
{
const auto & attribute_column = *attribute_columns[attribute_idx];
auto & attribute = attributes[attribute_idx];
setAttributeValue(attribute, cell_idx, attribute_column[row]);
}
/// if cell id is zero and zero does not map to this cell, then the cell is unused
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
element_count.fetch_add(1, std::memory_order_relaxed);
/// handle memory allocated for old key
if (key == cell.key)
{
freeKey(key);
key = cell.key;
}
else
{
/// new key is different from the old one
if (cell.key.data)
freeKey(cell.key);
cell.key = key;
}
cell.hash = hash;
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
else
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
/// inform caller
on_cell_updated(key, cell_idx);
/// mark corresponding id as found
remaining_keys[key] = true;
}
}
stream->readSuffix();
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, in_requested_rows.size());
ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed());
}
size_t found_num = 0;
size_t not_found_num = 0;
const auto now = std::chrono::system_clock::now();
/// Check which ids have not been found and require setting null_value
for (const auto & key_found_pair : remaining_keys)
{
if (key_found_pair.getMapped())
{
++found_num;
continue;
}
++not_found_num;
auto key = key_found_pair.getKey();
const auto hash = StringRefHash{}(key);
const auto find_result = findCellIdx(key, now, hash);
const auto & cell_idx = find_result.cell_idx;
auto & cell = cells[cell_idx];
/// Set null_value for each attribute
for (auto & attribute : attributes)
setDefaultAttributeValue(attribute, cell_idx);
/// Check if cell had not been occupied before and increment element counter if it hadn't
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
element_count.fetch_add(1, std::memory_order_relaxed);
if (key == cell.key)
key = cell.key;
else
{
if (cell.key.data)
freeKey(cell.key);
/// copy key from temporary pool
key = copyKey(key);
cell.key = key;
}
cell.hash = hash;
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
else
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
cell.setDefault();
/// inform caller that the cell has not been found
on_key_not_found(key, cell_idx);
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num);
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num);
}
AbsentKeyHandler && on_key_not_found) const;
UInt64 getCellIdx(const StringRef key) const;

View File

@ -1,45 +0,0 @@
#include "ComplexKeyCacheDictionary.h"
namespace DB
{
ComplexKeyCacheDictionary::Attribute
ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
{
Attribute attr{type, {}, {}};
switch (type)
{
#define DISPATCH(TYPE) \
case AttributeUnderlyingType::ut##TYPE: \
attr.null_values = TYPE(null_value.get<NearestFieldType<TYPE>>()); /* NOLINT */ \
attr.arrays = std::make_unique<ContainerType<TYPE>>(size); /* NOLINT */ \
bytes_allocated += size * sizeof(TYPE); \
break;
DISPATCH(UInt8)
DISPATCH(UInt16)
DISPATCH(UInt32)
DISPATCH(UInt64)
DISPATCH(UInt128)
DISPATCH(Int8)
DISPATCH(Int16)
DISPATCH(Int32)
DISPATCH(Int64)
DISPATCH(Decimal32)
DISPATCH(Decimal64)
DISPATCH(Decimal128)
DISPATCH(Float32)
DISPATCH(Float64)
#undef DISPATCH
case AttributeUnderlyingType::utString:
attr.null_values = null_value.get<String>();
attr.arrays = std::make_unique<ContainerType<StringRef>>(size);
bytes_allocated += size * sizeof(StringRef);
if (!string_arena)
string_arena = std::make_unique<ArenaWithFreeLists>();
break;
}
return attr;
}
}

View File

@ -1,32 +0,0 @@
#include <Dictionaries/ComplexKeyCacheDictionary.h>
namespace DB
{
#define DEFINE(TYPE) \
void ComplexKeyCacheDictionary::get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
{ \
dict_struct.validateKeyTypes(key_types); \
auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
const auto null_value = std::get<TYPE>(attribute.null_values); \
getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t) { return null_value; }); \
}
DEFINE(UInt8)
DEFINE(UInt16)
DEFINE(UInt32)
DEFINE(UInt64)
DEFINE(UInt128)
DEFINE(Int8)
DEFINE(Int16)
DEFINE(Int32)
DEFINE(Int64)
DEFINE(Float32)
DEFINE(Float64)
DEFINE(Decimal32)
DEFINE(Decimal64)
DEFINE(Decimal128)
#undef DEFINE
}

View File

@ -1,35 +0,0 @@
#include <Dictionaries/ComplexKeyCacheDictionary.h>
namespace DB
{
#define DEFINE(TYPE) \
void ComplexKeyCacheDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
dict_struct.validateKeyTypes(key_types); \
auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t row) { return def[row]; }); \
}
DEFINE(UInt8)
DEFINE(UInt16)
DEFINE(UInt32)
DEFINE(UInt64)
DEFINE(UInt128)
DEFINE(Int8)
DEFINE(Int16)
DEFINE(Int32)
DEFINE(Int64)
DEFINE(Float32)
DEFINE(Float64)
DEFINE(Decimal32)
DEFINE(Decimal64)
DEFINE(Decimal128)
#undef DEFINE
}

View File

@ -1,35 +0,0 @@
#include <Dictionaries/ComplexKeyCacheDictionary.h>
namespace DB
{
#define DEFINE(TYPE) \
void ComplexKeyCacheDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const \
{ \
dict_struct.validateKeyTypes(key_types); \
auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t) { return def; }); \
}
DEFINE(UInt8)
DEFINE(UInt16)
DEFINE(UInt32)
DEFINE(UInt64)
DEFINE(UInt128)
DEFINE(Int8)
DEFINE(Int16)
DEFINE(Int32)
DEFINE(Int64)
DEFINE(Float32)
DEFINE(Float64)
DEFINE(Decimal32)
DEFINE(Decimal64)
DEFINE(Decimal128)
#undef DEFINE
}

View File

@ -1,78 +0,0 @@
#include "ComplexKeyCacheDictionary.h"
namespace DB
{
void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const size_t idx, const Field & value) const
{
switch (attribute.type)
{
case AttributeUnderlyingType::utUInt8:
std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = value.get<UInt64>();
break;
case AttributeUnderlyingType::utUInt16:
std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = value.get<UInt64>();
break;
case AttributeUnderlyingType::utUInt32:
std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = value.get<UInt64>();
break;
case AttributeUnderlyingType::utUInt64:
std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = value.get<UInt64>();
break;
case AttributeUnderlyingType::utUInt128:
std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = value.get<UInt128>();
break;
case AttributeUnderlyingType::utInt8:
std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::utInt16:
std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::utInt32:
std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::utInt64:
std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::utFloat32:
std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = value.get<Float64>();
break;
case AttributeUnderlyingType::utFloat64:
std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = value.get<Float64>();
break;
case AttributeUnderlyingType::utDecimal32:
std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = value.get<Decimal32>();
break;
case AttributeUnderlyingType::utDecimal64:
std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = value.get<Decimal64>();
break;
case AttributeUnderlyingType::utDecimal128:
std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = value.get<Decimal128>();
break;
case AttributeUnderlyingType::utString:
{
const auto & string = value.get<String>();
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
const auto & null_value_ref = std::get<String>(attribute.null_values);
/// free memory unless it points to a null_value
if (string_ref.data && string_ref.data != null_value_ref.data())
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
const auto str_size = string.size();
if (str_size != 0)
{
auto * str_ptr = string_arena->alloc(str_size);
std::copy(string.data(), string.data() + str_size, str_ptr);
string_ref = StringRef{str_ptr, str_size};
}
else
string_ref = {};
break;
}
}
}
}

View File

@ -1,71 +0,0 @@
#include "ComplexKeyCacheDictionary.h"
namespace DB
{
void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute, const size_t idx) const
{
switch (attribute.type)
{
case AttributeUnderlyingType::utUInt8:
std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = std::get<UInt8>(attribute.null_values);
break;
case AttributeUnderlyingType::utUInt16:
std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = std::get<UInt16>(attribute.null_values);
break;
case AttributeUnderlyingType::utUInt32:
std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = std::get<UInt32>(attribute.null_values);
break;
case AttributeUnderlyingType::utUInt64:
std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = std::get<UInt64>(attribute.null_values);
break;
case AttributeUnderlyingType::utUInt128:
std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = std::get<UInt128>(attribute.null_values);
break;
case AttributeUnderlyingType::utInt8:
std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = std::get<Int8>(attribute.null_values);
break;
case AttributeUnderlyingType::utInt16:
std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = std::get<Int16>(attribute.null_values);
break;
case AttributeUnderlyingType::utInt32:
std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = std::get<Int32>(attribute.null_values);
break;
case AttributeUnderlyingType::utInt64:
std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = std::get<Int64>(attribute.null_values);
break;
case AttributeUnderlyingType::utFloat32:
std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = std::get<Float32>(attribute.null_values);
break;
case AttributeUnderlyingType::utFloat64:
std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = std::get<Float64>(attribute.null_values);
break;
case AttributeUnderlyingType::utDecimal32:
std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = std::get<Decimal32>(attribute.null_values);
break;
case AttributeUnderlyingType::utDecimal64:
std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = std::get<Decimal64>(attribute.null_values);
break;
case AttributeUnderlyingType::utDecimal128:
std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = std::get<Decimal128>(attribute.null_values);
break;
case AttributeUnderlyingType::utString:
{
const auto & null_value_ref = std::get<String>(attribute.null_values);
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
if (string_ref.data != null_value_ref.data())
{
if (string_ref.data)
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
string_ref = StringRef{null_value_ref};
}
break;
}
}
}
}

View File

@ -3,6 +3,9 @@
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
#include <Core/Defines.h>
#include <Columns/ColumnNullable.h>
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypesDecimal.h>
namespace DB
{
@ -31,194 +34,151 @@ ComplexKeyDirectDictionary::ComplexKeyDirectDictionary(
createAttributes();
}
#define DECLARE(TYPE) \
void ComplexKeyDirectDictionary::get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
{ \
dict_struct.validateKeyTypes(key_types); \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
const auto null_value = std::get<TYPE>(attribute.null_values); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void ComplexKeyDirectDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
ColumnPtr ComplexKeyDirectDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const
{
dict_struct.validateKeyTypes(key_types);
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & null_value = std::get<StringRef>(attribute.null_values);
getItemsStringImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
[&](const size_t) { return String(null_value.data, null_value.size); });
}
#define DECLARE(TYPE) \
void ComplexKeyDirectDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
dict_struct.validateKeyTypes(key_types); \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void ComplexKeyDirectDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const
{
dict_struct.validateKeyTypes(key_types);
ColumnPtr result;
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
getItemsStringImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
[&](const size_t row) { const auto ref = def->getDataAt(row); return String(ref.data, ref.size); });
}
auto keys_size = key_columns.front()->size();
#define DECLARE(TYPE) \
void ComplexKeyDirectDictionary::get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const TYPE def, ResultArrayType<TYPE> & out) const \
{ \
dict_struct.validateKeyTypes(key_types); \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void ComplexKeyDirectDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const String & def, ColumnString * const out) const
{
dict_struct.validateKeyTypes(key_types);
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
ComplexKeyDirectDictionary::getItemsStringImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
[&](const size_t) { return def; });
}
void ComplexKeyDirectDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
{
dict_struct.validateKeyTypes(key_types);
const auto & attribute = attributes.front();
switch (attribute.type)
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to = nullptr;
if (attribute.is_nullable)
{
case AttributeUnderlyingType::utUInt8:
has<UInt8>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utUInt16:
has<UInt16>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utUInt32:
has<UInt32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utUInt64:
has<UInt64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utUInt128:
has<UInt128>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utInt8:
has<Int8>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utInt16:
has<Int16>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utInt32:
has<Int32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utInt64:
has<Int64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utFloat32:
has<Float32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utFloat64:
has<Float64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utString:
has<String>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utDecimal32:
has<Decimal32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utDecimal64:
has<Decimal64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utDecimal128:
has<Decimal128>(attribute, key_columns, out);
break;
col_null_map_to = ColumnUInt8::create(keys_size, false);
vec_null_map_to = &col_null_map_to->getData();
}
auto type_call = [&](const auto & dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<AttributeType, String>)
{
auto * out = column.get();
getItemsImpl<String, String>(
attribute,
key_columns,
[&](const size_t row, const String value, bool is_null)
{
if (attribute.is_nullable)
(*vec_null_map_to)[row] = is_null;
const auto ref = StringRef{value};
out->insertData(ref.data, ref.size);
},
default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsImpl<AttributeType, AttributeType>(
attribute,
key_columns,
[&](const size_t row, const auto value, bool is_null)
{
if (attribute.is_nullable)
(*vec_null_map_to)[row] = is_null;
out[row] = value;
},
default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
if (attribute.is_nullable)
{
result = ColumnNullable::create(result, std::move(col_null_map_to));
}
return result;
}
ColumnUInt8::Ptr ComplexKeyDirectDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
{
dict_struct.validateKeyTypes(key_types);
auto size = key_columns.front()->size();
auto result = ColumnUInt8::create(size);
auto& out = result->getData();
const auto rows = key_columns.front()->size();
const auto keys_size = dict_struct.key->size();
StringRefs keys_array(keys_size);
MapType<UInt8> has_key;
Arena temporary_keys_pool;
std::vector<size_t> to_load(rows);
PODArray<StringRef> keys(rows);
for (const auto row : ext::range(0, rows))
{
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
keys[row] = key;
has_key[key] = 0;
to_load[row] = row;
}
auto stream = source_ptr->loadKeys(key_columns, to_load);
stream->readPrefix();
while (const auto block = stream->read())
{
const auto columns = ext::map<Columns>(
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
Arena pool;
StringRefs keys_temp(keys_size);
const auto columns_size = columns.front()->size();
for (const auto row_idx : ext::range(0, columns_size))
{
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
if (has_key.has(key))
{
has_key[key] = 1;
}
}
}
stream->readSuffix();
for (const auto row : ext::range(0, rows))
{
out[row] = has_key[keys[row]];
}
query_count.fetch_add(rows, std::memory_order_relaxed);
return result;
}
void ComplexKeyDirectDictionary::createAttributes()
{
@ -229,7 +189,7 @@ void ComplexKeyDirectDictionary::createAttributes()
{
attribute_index_by_name.emplace(attribute.name, attributes.size());
attribute_name_by_index.emplace(attributes.size(), attribute.name);
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value, attribute.name));
attributes.push_back(createAttribute(attribute, attribute.null_value, attribute.name));
if (attribute.hierarchical)
throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
@ -237,7 +197,6 @@ void ComplexKeyDirectDictionary::createAttributes()
}
}
template <typename T>
void ComplexKeyDirectDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
@ -254,59 +213,19 @@ void ComplexKeyDirectDictionary::createAttributeImpl<String>(Attribute & attribu
}
ComplexKeyDirectDictionary::Attribute ComplexKeyDirectDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & attr_name)
ComplexKeyDirectDictionary::Attribute ComplexKeyDirectDictionary::createAttribute(
const DictionaryAttribute & attribute, const Field & null_value, const std::string & attr_name)
{
Attribute attr{type, {}, {}, attr_name};
Attribute attr{attribute.underlying_type, attribute.is_nullable, {}, {}, attr_name};
switch (type)
auto type_call = [&](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
createAttributeImpl<UInt8>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt16:
createAttributeImpl<UInt16>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt32:
createAttributeImpl<UInt32>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt64:
createAttributeImpl<UInt64>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::utInt8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::utInt16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::utInt32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::utInt64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat64:
createAttributeImpl<Float64>(attr, null_value);
break;
case AttributeUnderlyingType::utString:
createAttributeImpl<String>(attr, null_value);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
createAttributeImpl<AttributeType>(attr, null_value);
};
case AttributeUnderlyingType::utDecimal32:
createAttributeImpl<Decimal32>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
}
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
return attr;
}
@ -356,14 +275,18 @@ StringRef ComplexKeyDirectDictionary::placeKeysInPool(
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void ComplexKeyDirectDictionary::getItemsImpl(
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto rows = key_columns.front()->size();
const auto keys_size = dict_struct.key->size();
StringRefs keys_array(keys_size);
MapType<OutputType> value_by_key;
HashMapWithSavedHash<StringRef, bool, StringRefHash> value_is_null;
Arena temporary_keys_pool;
std::vector<size_t> to_load(rows);
PODArray<StringRef> keys(rows);
@ -372,8 +295,9 @@ void ComplexKeyDirectDictionary::getItemsImpl(
{
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
keys[row] = key;
value_by_key[key] = get_default(row);
value_by_key[key] = static_cast<AttributeType>(default_value_extractor[row]);
to_load[row] = row;
value_is_null[key] = false;
}
auto stream = source_ptr->loadKeys(key_columns, to_load);
@ -392,6 +316,11 @@ void ComplexKeyDirectDictionary::getItemsImpl(
});
for (const size_t attribute_idx : ext::range(0, attributes.size()))
{
if (attribute.name != attribute_name_by_index.at(attribute_idx))
{
continue;
}
const IColumn & attribute_column = *attribute_columns[attribute_idx];
Arena pool;
@ -402,17 +331,15 @@ void ComplexKeyDirectDictionary::getItemsImpl(
for (const auto row_idx : ext::range(0, columns_size))
{
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
if (value_by_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
if (value_by_key.has(key))
{
if (attribute.type == AttributeUnderlyingType::utFloat32)
{
value_by_key[key] = static_cast<Float32>(attribute_column[row_idx].template get<Float64>());
}
auto value = attribute_column[row_idx];
if (value.isNull())
value_is_null[key] = true;
else
{
value_by_key[key] = static_cast<OutputType>(attribute_column[row_idx].template get<AttributeType>());
}
value_by_key[key] = static_cast<OutputType>(value.template get<NearestFieldType<AttributeType>>());
}
}
}
@ -422,78 +349,13 @@ void ComplexKeyDirectDictionary::getItemsImpl(
for (const auto row : ext::range(0, rows))
{
set_value(row, value_by_key[keys[row]]);
auto key = keys[row];
set_value(row, value_by_key[key], value_is_null[key]);
}
query_count.fetch_add(rows, std::memory_order_relaxed);
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void ComplexKeyDirectDictionary::getItemsStringImpl(
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
{
const auto rows = key_columns.front()->size();
const auto keys_size = dict_struct.key->size();
StringRefs keys_array(keys_size);
MapType<String> value_by_key;
Arena temporary_keys_pool;
std::vector<size_t> to_load(rows);
PODArray<StringRef> keys(rows);
for (const auto row : ext::range(0, rows))
{
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
keys[row] = key;
value_by_key[key] = get_default(row);
to_load[row] = row;
}
auto stream = source_ptr->loadKeys(key_columns, to_load);
const auto attributes_size = attributes.size();
stream->readPrefix();
while (const auto block = stream->read())
{
const auto columns = ext::map<Columns>(
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
{
return block.safeGetByPosition(keys_size + attribute_idx).column;
});
for (const size_t attribute_idx : ext::range(0, attributes.size()))
{
const IColumn & attribute_column = *attribute_columns[attribute_idx];
Arena pool;
StringRefs keys_temp(keys_size);
const auto columns_size = columns.front()->size();
for (const auto row_idx : ext::range(0, columns_size))
{
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
if (value_by_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
{
const String from_source = attribute_column[row_idx].template get<String>();
value_by_key[key] = from_source;
}
}
}
}
stream->readSuffix();
for (const auto row : ext::range(0, rows))
{
set_value(row, value_by_key[keys[row]]);
}
query_count.fetch_add(rows, std::memory_order_relaxed);
}
const ComplexKeyDirectDictionary::Attribute & ComplexKeyDirectDictionary::getAttribute(const std::string & attribute_name) const
{
const auto it = attribute_index_by_name.find(attribute_name);
@ -503,65 +365,6 @@ const ComplexKeyDirectDictionary::Attribute & ComplexKeyDirectDictionary::getAtt
return attributes[it->second];
}
template <typename T>
void ComplexKeyDirectDictionary::has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const
{
const auto rows = key_columns.front()->size();
const auto keys_size = dict_struct.key->size();
StringRefs keys_array(keys_size);
MapType<UInt8> has_key;
Arena temporary_keys_pool;
std::vector<size_t> to_load(rows);
PODArray<StringRef> keys(rows);
for (const auto row : ext::range(0, rows))
{
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
keys[row] = key;
has_key[key] = 0;
to_load[row] = row;
}
auto stream = source_ptr->loadKeys(key_columns, to_load);
stream->readPrefix();
while (const auto block = stream->read())
{
const auto columns = ext::map<Columns>(
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
for (const size_t attribute_idx : ext::range(0, attributes.size()))
{
Arena pool;
StringRefs keys_temp(keys_size);
const auto columns_size = columns.front()->size();
for (const auto row_idx : ext::range(0, columns_size))
{
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
if (has_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
{
has_key[key] = 1;
}
}
}
}
stream->readSuffix();
for (const auto row : ext::range(0, rows))
{
out[row] = has_key[keys[row]];
}
query_count.fetch_add(rows, std::memory_order_relaxed);
}
BlockInputStreamPtr ComplexKeyDirectDictionary::getBlockInputStream(const Names & /* column_names */, size_t /* max_block_size */) const
{
return source_ptr->loadAll();

View File

@ -12,14 +12,13 @@
#include <ext/range.h>
#include <ext/size.h>
#include <ext/map.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryStructure.h"
#include "DictionaryHelpers.h"
namespace DB
{
using BlockPtr = std::shared_ptr<Block>;
class ComplexKeyDirectDictionary final : public IDictionaryBase
{
@ -60,78 +59,16 @@ public:
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
}
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const override;
void getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const TYPE def, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const String & def, ColumnString * const out) const;
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
@ -142,6 +79,8 @@ private:
struct Attribute final
{
AttributeUnderlyingType type;
bool is_nullable;
std::variant<
UInt8,
UInt16,
@ -168,27 +107,21 @@ private:
template <typename T>
void addAttributeSize(const Attribute & attribute);
void calculateBytesAllocated();
template <typename T>
void createAttributeImpl(Attribute & attribute, const Field & null_value);
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & name);
static Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value, const std::string & name);
template <typename Pool>
StringRef placeKeysInPool(
const size_t row, const Columns & key_columns, StringRefs & keys, const std::vector<DictionaryAttribute> & key_attributes, Pool & pool) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsStringImpl(
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename T>
void resize(Attribute & attribute, const Key id);
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename T>
void setAttributeValueImpl(Attribute & attribute, const Key id, const T & value);
@ -197,9 +130,6 @@ private:
const Attribute & getAttribute(const std::string & attribute_name) const;
template <typename T>
void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;
const DictionaryLifetime dict_lifetime;

View File

@ -1,6 +1,10 @@
#include "ComplexKeyHashedDictionary.h"
#include <ext/map.h>
#include <ext/range.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnNullable.h>
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypesDecimal.h>
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
@ -32,216 +36,111 @@ ComplexKeyHashedDictionary::ComplexKeyHashedDictionary(
calculateBytesAllocated();
}
#define DECLARE(TYPE) \
void ComplexKeyHashedDictionary::get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
{ \
dict_struct.validateKeyTypes(key_types); \
\
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
const auto null_value = std::get<TYPE>(attribute.null_values); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, \
key_columns, \
[&](const size_t row, const auto value) { out[row] = value; }, \
[&](const size_t) { return null_value; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void ComplexKeyHashedDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
{
dict_struct.validateKeyTypes(key_types);
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return null_value; });
}
#define DECLARE(TYPE) \
void ComplexKeyHashedDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
dict_struct.validateKeyTypes(key_types); \
\
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, \
key_columns, \
[&](const size_t row, const auto value) { out[row] = value; }, \
[&](const size_t row) { return def[row]; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void ComplexKeyHashedDictionary::getString(
ColumnPtr ComplexKeyHashedDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const
const ColumnPtr default_values_column) const
{
dict_struct.validateKeyTypes(key_types);
ColumnPtr result;
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t row) { return def->getDataAt(row); });
}
auto keys_size = key_columns.front()->size();
#define DECLARE(TYPE) \
void ComplexKeyHashedDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const \
{ \
dict_struct.validateKeyTypes(key_types); \
\
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to = nullptr;
if (attribute.is_nullable)
{
col_null_map_to = ColumnUInt8::create(keys_size, false);
vec_null_map_to = &col_null_map_to->getData();
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void ComplexKeyHashedDictionary::getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const
{
dict_struct.validateKeyTypes(key_types);
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return StringRef{def}; });
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<AttributeType, String>)
{
auto * out = column.get();
getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t row, const StringRef value, bool is_null)
{
if (attribute.is_nullable)
(*vec_null_map_to)[row] = is_null;
out->insertData(value.data, value.size);
},
default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsImpl<AttributeType, AttributeType>(
attribute,
key_columns,
[&](const size_t row, const auto value, bool is_null)
{
if (attribute.is_nullable)
(*vec_null_map_to)[row] = is_null;
out[row] = value;
},
default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
if (attribute.is_nullable)
{
result = ColumnNullable::create(result, std::move(col_null_map_to));
}
return result;
}
void ComplexKeyHashedDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
ColumnUInt8::Ptr ComplexKeyHashedDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
{
dict_struct.validateKeyTypes(key_types);
auto size = key_columns.front()->size();
auto result = ColumnUInt8::create(size);
auto& out = result->getData();
const auto & attribute = attributes.front();
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
has<UInt8>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utUInt16:
has<UInt16>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utUInt32:
has<UInt32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utUInt64:
has<UInt64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utUInt128:
has<UInt128>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utInt8:
has<Int8>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utInt16:
has<Int16>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utInt32:
has<Int32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utInt64:
has<Int64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utFloat32:
has<Float32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utFloat64:
has<Float64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utString:
has<StringRef>(attribute, key_columns, out);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
case AttributeUnderlyingType::utDecimal32:
has<Decimal32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utDecimal64:
has<Decimal64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::utDecimal128:
has<Decimal128>(attribute, key_columns, out);
break;
}
has<ValueType>(attribute, key_columns, out);
};
callOnDictionaryAttributeType(attribute.type, type_call);
return result;
}
void ComplexKeyHashedDictionary::createAttributes()
@ -252,7 +151,7 @@ void ComplexKeyHashedDictionary::createAttributes()
for (const auto & attribute : dict_struct.attributes)
{
attribute_index_by_name.emplace(attribute.name, attributes.size());
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
attributes.push_back(createAttribute(attribute, attribute.null_value));
if (attribute.hierarchical)
throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
@ -407,66 +306,30 @@ void ComplexKeyHashedDictionary::addAttributeSize(const Attribute & attribute)
bucket_count = map_ref.getBufferSizeInCells();
}
template <>
void ComplexKeyHashedDictionary::addAttributeSize<String>(const Attribute & attribute)
{
const auto & map_ref = std::get<ContainerType<StringRef>>(attribute.maps);
bytes_allocated += sizeof(ContainerType<StringRef>) + map_ref.getBufferSizeInBytes();
bucket_count = map_ref.getBufferSizeInCells();
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
}
void ComplexKeyHashedDictionary::calculateBytesAllocated()
{
bytes_allocated += attributes.size() * sizeof(attributes.front());
for (const auto & attribute : attributes)
{
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
addAttributeSize<UInt8>(attribute);
break;
case AttributeUnderlyingType::utUInt16:
addAttributeSize<UInt16>(attribute);
break;
case AttributeUnderlyingType::utUInt32:
addAttributeSize<UInt32>(attribute);
break;
case AttributeUnderlyingType::utUInt64:
addAttributeSize<UInt64>(attribute);
break;
case AttributeUnderlyingType::utUInt128:
addAttributeSize<UInt128>(attribute);
break;
case AttributeUnderlyingType::utInt8:
addAttributeSize<Int8>(attribute);
break;
case AttributeUnderlyingType::utInt16:
addAttributeSize<Int16>(attribute);
break;
case AttributeUnderlyingType::utInt32:
addAttributeSize<Int32>(attribute);
break;
case AttributeUnderlyingType::utInt64:
addAttributeSize<Int64>(attribute);
break;
case AttributeUnderlyingType::utFloat32:
addAttributeSize<Float32>(attribute);
break;
case AttributeUnderlyingType::utFloat64:
addAttributeSize<Float64>(attribute);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
addAttributeSize<Decimal32>(attribute);
break;
case AttributeUnderlyingType::utDecimal64:
addAttributeSize<Decimal64>(attribute);
break;
case AttributeUnderlyingType::utDecimal128:
addAttributeSize<Decimal128>(attribute);
break;
addAttributeSize<AttributeType>(attribute);
};
case AttributeUnderlyingType::utString:
{
addAttributeSize<StringRef>(attribute);
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
break;
}
}
callOnDictionaryAttributeType(attribute.type, type_call);
}
bytes_allocated += keys_pool.size();
@ -479,73 +342,41 @@ void ComplexKeyHashedDictionary::createAttributeImpl(Attribute & attribute, cons
attribute.maps.emplace<ContainerType<T>>();
}
ComplexKeyHashedDictionary::Attribute
ComplexKeyHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
template <>
void ComplexKeyHashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
{
Attribute attr{type, {}, {}, {}};
attribute.string_arena = std::make_unique<Arena>();
const String & string = null_value.get<String>();
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
attribute.maps.emplace<ContainerType<StringRef>>();
}
switch (type)
ComplexKeyHashedDictionary::Attribute
ComplexKeyHashedDictionary::createAttribute(const DictionaryAttribute & attribute, const Field & null_value)
{
auto nullable_set = attribute.is_nullable ? std::make_unique<NullableSet>() : nullptr;
Attribute attr{attribute.underlying_type, attribute.is_nullable, std::move(nullable_set), {}, {}, {}};
auto type_call = [&](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
createAttributeImpl<UInt8>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt16:
createAttributeImpl<UInt16>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt32:
createAttributeImpl<UInt32>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt64:
createAttributeImpl<UInt64>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::utInt8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::utInt16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::utInt32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::utInt64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat64:
createAttributeImpl<Float64>(attr, null_value);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
createAttributeImpl<AttributeType>(attr, null_value);
};
case AttributeUnderlyingType::utDecimal32:
createAttributeImpl<Decimal32>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
case AttributeUnderlyingType::utString:
{
attr.null_values = null_value.get<String>();
attr.maps.emplace<ContainerType<StringRef>>();
attr.string_arena = std::make_unique<Arena>();
break;
}
}
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
return attr;
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void ComplexKeyHashedDictionary::getItemsImpl(
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto & attr = std::get<ContainerType<AttributeType>>(attribute.maps);
@ -560,7 +391,18 @@ void ComplexKeyHashedDictionary::getItemsImpl(
const auto key = placeKeysInPool(i, key_columns, keys, temporary_keys_pool);
const auto it = attr.find(key);
set_value(i, it ? static_cast<OutputType>(it->getMapped()) : get_default(i));
if (it)
{
set_value(i, static_cast<OutputType>(it->getMapped()), false);
}
else
{
if (attribute.is_nullable && attribute.nullable_set->find(key) != nullptr)
set_value(i, default_value_extractor[i], true);
else
set_value(i, default_value_extractor[i], false);
}
/// free memory allocated for the key
temporary_keys_pool.rollback(key.size);
@ -578,51 +420,42 @@ bool ComplexKeyHashedDictionary::setAttributeValueImpl(Attribute & attribute, co
return pair.second;
}
template <>
bool ComplexKeyHashedDictionary::setAttributeValueImpl<String>(Attribute & attribute, const StringRef key, const String value)
{
const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
return setAttributeValueImpl<StringRef>(attribute, key, StringRef{string_in_arena, value.size()});
}
bool ComplexKeyHashedDictionary::setAttributeValue(Attribute & attribute, const StringRef key, const Field & value)
{
switch (attribute.type)
bool result = false;
auto type_call = [&](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
return setAttributeValueImpl<UInt8>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::utUInt16:
return setAttributeValueImpl<UInt16>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::utUInt32:
return setAttributeValueImpl<UInt32>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::utUInt64:
return setAttributeValueImpl<UInt64>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::utUInt128:
return setAttributeValueImpl<UInt128>(attribute, key, value.get<UInt128>());
case AttributeUnderlyingType::utInt8:
return setAttributeValueImpl<Int8>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::utInt16:
return setAttributeValueImpl<Int16>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::utInt32:
return setAttributeValueImpl<Int32>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::utInt64:
return setAttributeValueImpl<Int64>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::utFloat32:
return setAttributeValueImpl<Float32>(attribute, key, value.get<Float64>());
case AttributeUnderlyingType::utFloat64:
return setAttributeValueImpl<Float64>(attribute, key, value.get<Float64>());
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
return setAttributeValueImpl<Decimal32>(attribute, key, value.get<Decimal32>());
case AttributeUnderlyingType::utDecimal64:
return setAttributeValueImpl<Decimal64>(attribute, key, value.get<Decimal64>());
case AttributeUnderlyingType::utDecimal128:
return setAttributeValueImpl<Decimal128>(attribute, key, value.get<Decimal128>());
case AttributeUnderlyingType::utString:
if (attribute.is_nullable)
{
auto & map = std::get<ContainerType<StringRef>>(attribute.maps);
const auto & string = value.get<String>();
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
const auto pair = map.insert({key, StringRef{string_in_arena, string.size()}});
return pair.second;
if (value.isNull())
{
attribute.nullable_set->insert(key);
result = true;
return;
}
else
{
attribute.nullable_set->erase(key);
}
}
}
return {};
result = setAttributeValueImpl<AttributeType>(attribute, key, value.get<NearestFieldType<AttributeType>>());
};
callOnDictionaryAttributeType(attribute.type, type_call);
return result;
}
const ComplexKeyHashedDictionary::Attribute & ComplexKeyHashedDictionary::getAttribute(const std::string & attribute_name) const
@ -673,6 +506,9 @@ void ComplexKeyHashedDictionary::has(const Attribute & attribute, const Columns
const auto it = attr.find(key);
out[i] = static_cast<bool>(it);
if (attribute.is_nullable && !out[i])
out[i] = attribute.nullable_set->find(key) != nullptr;
/// free memory allocated for the key
temporary_keys_pool.rollback(key.size);
}
@ -684,41 +520,26 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys() const
{
const Attribute & attribute = attributes.front();
switch (attribute.type)
{
case AttributeUnderlyingType::utUInt8:
return getKeys<UInt8>(attribute);
case AttributeUnderlyingType::utUInt16:
return getKeys<UInt16>(attribute);
case AttributeUnderlyingType::utUInt32:
return getKeys<UInt32>(attribute);
case AttributeUnderlyingType::utUInt64:
return getKeys<UInt64>(attribute);
case AttributeUnderlyingType::utUInt128:
return getKeys<UInt128>(attribute);
case AttributeUnderlyingType::utInt8:
return getKeys<Int8>(attribute);
case AttributeUnderlyingType::utInt16:
return getKeys<Int16>(attribute);
case AttributeUnderlyingType::utInt32:
return getKeys<Int32>(attribute);
case AttributeUnderlyingType::utInt64:
return getKeys<Int64>(attribute);
case AttributeUnderlyingType::utFloat32:
return getKeys<Float32>(attribute);
case AttributeUnderlyingType::utFloat64:
return getKeys<Float64>(attribute);
case AttributeUnderlyingType::utString:
return getKeys<StringRef>(attribute);
std::vector<StringRef> result;
case AttributeUnderlyingType::utDecimal32:
return getKeys<Decimal32>(attribute);
case AttributeUnderlyingType::utDecimal64:
return getKeys<Decimal64>(attribute);
case AttributeUnderlyingType::utDecimal128:
return getKeys<Decimal128>(attribute);
}
return {};
auto type_call = [&](const auto & dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
if constexpr (std::is_same_v<AttributeType, String>)
{
result = getKeys<StringRef>(attribute);
}
else
{
result = getKeys<AttributeType>(attribute);
}
};
callOnDictionaryAttributeType(attribute.type, type_call);
return result;
}
template <typename T>
@ -730,12 +551,18 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys(const Attribute & att
for (const auto & key : attr)
keys.push_back(key.getKey());
if (attribute.is_nullable)
{
for (const auto & key: *attribute.nullable_set)
keys.push_back(key.getKey());
}
return keys;
}
BlockInputStreamPtr ComplexKeyHashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyHashedDictionary, UInt64>;
using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getKeys(), column_names);
}

View File

@ -7,17 +7,17 @@
#include <Columns/ColumnString.h>
#include <Common/Arena.h>
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashSet.h>
#include <Core/Block.h>
#include <common/StringRef.h>
#include <ext/range.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryStructure.h"
#include "DictionaryHelpers.h"
namespace DB
{
using BlockPtr = std::shared_ptr<Block>;
class ComplexKeyHashedDictionary final : public IDictionaryBase
{
@ -60,91 +60,16 @@ public:
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
}
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name,
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const;
const ColumnPtr default_values_column) const override;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const;
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
@ -152,9 +77,14 @@ private:
template <typename Value>
using ContainerType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
using NullableSet = HashSetWithSavedHash<StringRef, StringRefHash>;
struct Attribute final
{
AttributeUnderlyingType type;
bool is_nullable;
std::unique_ptr<NullableSet> nullable_set;
std::variant<
UInt8,
UInt16,
@ -170,7 +100,7 @@ private:
Decimal128,
Float32,
Float64,
String>
StringRef>
null_values;
std::variant<
ContainerType<UInt8>,
@ -206,18 +136,21 @@ private:
void calculateBytesAllocated();
template <typename T>
void createAttributeImpl(Attribute & attribute, const Field & null_value);
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
static Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value);
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void
getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename T>
bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
static bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
bool setAttributeValue(Attribute & attribute, const StringRef key, const Field & value);
static bool setAttributeValue(Attribute & attribute, const StringRef key, const Field & value);
const Attribute & getAttribute(const std::string & attribute_name) const;

View File

@ -25,12 +25,10 @@ namespace ErrorCodes
/* BlockInputStream implementation for external dictionaries
* read() returns blocks consisting of the in-memory contents of the dictionaries
*/
template <typename DictionaryType, typename Key>
template <typename Key>
class DictionaryBlockInputStream : public DictionaryBlockInputStreamBase
{
public:
using DictionaryPtr = std::shared_ptr<DictionaryType const>;
DictionaryBlockInputStream(
std::shared_ptr<const IDictionaryBase> dictionary, UInt64 max_block_size, PaddedPODArray<Key> && ids, const Names & column_names);
@ -60,111 +58,9 @@ protected:
Block getBlock(size_t start, size_t size) const override;
private:
// pointer types to getXXX functions
// for single key dictionaries
template <typename Type>
using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, PaddedPODArray<Type> &) const;
template <typename Type>
using DictionaryDecimalGetter
= void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, DecimalPaddedPODArray<Type> &) const;
using DictionaryStringGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, ColumnString *) const;
// for complex complex key dictionaries
template <typename Type>
using GetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, PaddedPODArray<Type> & out) const;
template <typename Type>
using DecimalGetterByKey
= void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, DecimalPaddedPODArray<Type> & out) const;
using StringGetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, ColumnString * out) const;
// call getXXX
// for single key dictionaries
template <typename Type, typename Container>
void callGetter(
DictionaryGetter<Type> getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
template <typename Type, typename Container>
void callGetter(
DictionaryDecimalGetter<Type> getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
template <typename Container>
void callGetter(
DictionaryStringGetter getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
// for complex complex key dictionaries
template <typename Type, typename Container>
void callGetter(
GetterByKey<Type> getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
template <typename Type, typename Container>
void callGetter(
DecimalGetterByKey<Type> getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
template <typename Container>
void callGetter(
StringGetterByKey getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
template <template <typename> class Getter, template <typename> class DecimalGetter, typename StringGetter>
Block
fillBlock(const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const;
template <typename AttributeType, typename Getter>
ColumnPtr getColumnFromAttribute(
Getter getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
template <typename Getter>
ColumnPtr getColumnFromStringAttribute(
Getter getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
ColumnPtr getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const;
void fillKeyColumns(
@ -174,65 +70,54 @@ private:
const DictionaryStructure & dictionary_structure,
ColumnsWithTypeAndName & columns) const;
DictionaryPtr dictionary;
std::shared_ptr<const IDictionaryBase> dictionary;
Names column_names;
PaddedPODArray<Key> ids;
ColumnsWithTypeAndName key_columns;
Poco::Logger * logger;
using FillBlockFunction = Block (DictionaryBlockInputStream<DictionaryType, Key>::*)(
const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const;
FillBlockFunction fill_block_function;
Columns data_columns;
GetColumnsFunction get_key_columns_function;
GetColumnsFunction get_view_columns_function;
enum class DictionaryKeyType
enum class DictionaryInputStreamKeyType
{
Id,
ComplexKey,
Callback
};
DictionaryKeyType key_type;
DictionaryInputStreamKeyType key_type;
};
template <typename DictionaryType, typename Key>
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
template <typename Key>
DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
std::shared_ptr<const IDictionaryBase> dictionary_, UInt64 max_block_size_, PaddedPODArray<Key> && ids_, const Names & column_names_)
: DictionaryBlockInputStreamBase(ids_.size(), max_block_size_)
, dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
, dictionary(dictionary_)
, column_names(column_names_)
, ids(std::move(ids_))
, logger(&Poco::Logger::get("DictionaryBlockInputStream"))
, fill_block_function(
&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<DictionaryGetter, DictionaryDecimalGetter, DictionaryStringGetter>)
, key_type(DictionaryKeyType::Id)
, key_type(DictionaryInputStreamKeyType::Id)
{
}
template <typename DictionaryType, typename Key>
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
template <typename Key>
DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
std::shared_ptr<const IDictionaryBase> dictionary_,
UInt64 max_block_size_,
const std::vector<StringRef> & keys,
const Names & column_names_)
: DictionaryBlockInputStreamBase(keys.size(), max_block_size_)
, dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
, dictionary(dictionary_)
, column_names(column_names_)
, logger(&Poco::Logger::get("DictionaryBlockInputStream"))
, fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>)
, key_type(DictionaryKeyType::ComplexKey)
, key_type(DictionaryInputStreamKeyType::ComplexKey)
{
const DictionaryStructure & dictionaty_structure = dictionary->getStructure();
fillKeyColumns(keys, 0, keys.size(), dictionaty_structure, key_columns);
const DictionaryStructure & dictionary_structure = dictionary->getStructure();
fillKeyColumns(keys, 0, keys.size(), dictionary_structure, key_columns);
}
template <typename DictionaryType, typename Key>
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
template <typename Key>
DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
std::shared_ptr<const IDictionaryBase> dictionary_,
UInt64 max_block_size_,
const Columns & data_columns_,
@ -240,24 +125,23 @@ DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
GetColumnsFunction && get_key_columns_function_,
GetColumnsFunction && get_view_columns_function_)
: DictionaryBlockInputStreamBase(data_columns_.front()->size(), max_block_size_)
, dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
, dictionary(dictionary_)
, column_names(column_names_)
, logger(&Poco::Logger::get("DictionaryBlockInputStream"))
, fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>)
, data_columns(data_columns_)
, get_key_columns_function(get_key_columns_function_)
, get_view_columns_function(get_view_columns_function_)
, key_type(DictionaryKeyType::Callback)
, get_key_columns_function(std::move(get_key_columns_function_))
, get_view_columns_function(std::move(get_view_columns_function_))
, key_type(DictionaryInputStreamKeyType::Callback)
{
}
template <typename DictionaryType, typename Key>
Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, size_t length) const
template <typename Key>
Block DictionaryBlockInputStream<Key>::getBlock(size_t start, size_t length) const
{
/// TODO: Rewrite
switch (key_type)
{
case DictionaryKeyType::ComplexKey:
case DictionaryInputStreamKeyType::ComplexKey:
{
Columns columns;
ColumnsWithTypeAndName view_columns;
@ -268,16 +152,16 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, si
columns.emplace_back(column);
view_columns.emplace_back(column, key_column.type, key_column.name);
}
return (this->*fill_block_function)({}, columns, {}, std::move(view_columns));
return fillBlock({}, columns, {}, std::move(view_columns));
}
case DictionaryKeyType::Id:
case DictionaryInputStreamKeyType::Id:
{
PaddedPODArray<Key> ids_to_fill(ids.begin() + start, ids.begin() + start + length);
return (this->*fill_block_function)(ids_to_fill, {}, {}, {});
return fillBlock(ids_to_fill, {}, {}, {});
}
case DictionaryKeyType::Callback:
case DictionaryInputStreamKeyType::Callback:
{
Columns columns;
columns.reserve(data_columns.size());
@ -294,102 +178,15 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, si
columns.push_back(key_column.column);
types.push_back(key_column.type);
}
return (this->*fill_block_function)({}, columns, types, std::move(view_with_type_and_name));
return fillBlock({}, columns, types, std::move(view_with_type_and_name));
}
}
throw Exception("Unexpected DictionaryKeyType.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Unexpected DictionaryInputStreamKeyType.", ErrorCodes::LOGICAL_ERROR);
}
template <typename DictionaryType, typename Key>
template <typename Type, typename Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
DictionaryGetter<Type> getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & /*keys*/,
const DataTypes & /*data_types*/,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{
(dict.*getter)(attribute.name, ids_to_fill, container);
}
template <typename DictionaryType, typename Key>
template <typename Type, typename Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
DictionaryDecimalGetter<Type> getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & /*keys*/,
const DataTypes & /*data_types*/,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{
(dict.*getter)(attribute.name, ids_to_fill, container);
}
template <typename DictionaryType, typename Key>
template <typename Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
DictionaryStringGetter getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & /*keys*/,
const DataTypes & /*data_types*/,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{
(dict.*getter)(attribute.name, ids_to_fill, container);
}
template <typename DictionaryType, typename Key>
template <typename Type, typename Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
GetterByKey<Type> getter,
const PaddedPODArray<Key> & /*ids_to_fill*/,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{
(dict.*getter)(attribute.name, keys, data_types, container);
}
template <typename DictionaryType, typename Key>
template <typename Type, typename Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
DecimalGetterByKey<Type> getter,
const PaddedPODArray<Key> & /*ids_to_fill*/,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{
(dict.*getter)(attribute.name, keys, data_types, container);
}
template <typename DictionaryType, typename Key>
template <typename Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
StringGetterByKey getter,
const PaddedPODArray<Key> & /*ids_to_fill*/,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{
(dict.*getter)(attribute.name, keys, data_types, container);
}
template <typename DictionaryType, typename Key>
template <template <typename> class Getter, template <typename> class DecimalGetter, typename StringGetter>
Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
template <typename Key>
Block DictionaryBlockInputStream<Key>::fillBlock(
const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const
{
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
@ -408,9 +205,14 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
block_columns.push_back(column);
const DictionaryStructure & structure = dictionary->getStructure();
ColumnPtr ids_column = getColumnFromIds(ids_to_fill);
if (structure.id && names.find(structure.id->name) != names.end())
block_columns.emplace_back(getColumnFromIds(ids_to_fill), std::make_shared<DataTypeUInt64>(), structure.id->name);
{
block_columns.emplace_back(ids_column, std::make_shared<DataTypeUInt64>(), structure.id->name);
}
auto dictionary_key_type = dictionary->getKeyType();
for (const auto idx : ext::range(0, structure.attributes.size()))
{
@ -418,126 +220,35 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
if (names.find(attribute.name) != names.end())
{
ColumnPtr column;
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
column = getColumnFromAttribute<TYPE, Getter<TYPE>>(&DictionaryType::get##TYPE, ids_to_fill, keys, data_types, attribute, *dictionary)
switch (attribute.underlying_type)
if (dictionary_key_type == DictionaryKeyType::simple)
{
case AttributeUnderlyingType::utUInt8:
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
break;
case AttributeUnderlyingType::utUInt16:
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
break;
case AttributeUnderlyingType::utUInt32:
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
break;
case AttributeUnderlyingType::utUInt64:
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
break;
case AttributeUnderlyingType::utUInt128:
GET_COLUMN_FORM_ATTRIBUTE(UInt128);
break;
case AttributeUnderlyingType::utInt8:
GET_COLUMN_FORM_ATTRIBUTE(Int8);
break;
case AttributeUnderlyingType::utInt16:
GET_COLUMN_FORM_ATTRIBUTE(Int16);
break;
case AttributeUnderlyingType::utInt32:
GET_COLUMN_FORM_ATTRIBUTE(Int32);
break;
case AttributeUnderlyingType::utInt64:
GET_COLUMN_FORM_ATTRIBUTE(Int64);
break;
case AttributeUnderlyingType::utFloat32:
GET_COLUMN_FORM_ATTRIBUTE(Float32);
break;
case AttributeUnderlyingType::utFloat64:
GET_COLUMN_FORM_ATTRIBUTE(Float64);
break;
case AttributeUnderlyingType::utDecimal32:
{
column = getColumnFromAttribute<Decimal32, DecimalGetter<Decimal32>>(
&DictionaryType::getDecimal32, ids_to_fill, keys, data_types, attribute, *dictionary);
break;
}
case AttributeUnderlyingType::utDecimal64:
{
column = getColumnFromAttribute<Decimal64, DecimalGetter<Decimal64>>(
&DictionaryType::getDecimal64, ids_to_fill, keys, data_types, attribute, *dictionary);
break;
}
case AttributeUnderlyingType::utDecimal128:
{
column = getColumnFromAttribute<Decimal128, DecimalGetter<Decimal128>>(
&DictionaryType::getDecimal128, ids_to_fill, keys, data_types, attribute, *dictionary);
break;
}
case AttributeUnderlyingType::utString:
{
column = getColumnFromStringAttribute<StringGetter>(
&DictionaryType::getString, ids_to_fill, keys, data_types, attribute, *dictionary);
break;
}
column = dictionary->getColumn(
attribute.name,
attribute.type,
{ids_column},
{std::make_shared<DataTypeUInt64>()},
nullptr /* default_values_column */);
}
#undef GET_COLUMN_FORM_ATTRIBUTE
else
{
column = dictionary->getColumn(
attribute.name,
attribute.type,
keys,
data_types,
nullptr /* default_values_column*/);
}
block_columns.emplace_back(column, attribute.type, attribute.name);
}
}
return Block(block_columns);
}
template <typename DictionaryType, typename Key>
template <typename AttributeType, typename Getter>
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribute(
Getter getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{
if constexpr (IsDecimalNumber<AttributeType>)
{
auto size = ids_to_fill.size();
if (!keys.empty())
size = keys.front()->size();
auto column = ColumnDecimal<AttributeType>::create(size, 0); /// NOTE: There's wrong scale here, but it's unused.
callGetter(getter, ids_to_fill, keys, data_types, column->getData(), attribute, dict);
return column;
}
else
{
auto size = ids_to_fill.size();
if (!keys.empty())
size = keys.front()->size();
auto column_vector = ColumnVector<AttributeType>::create(size);
callGetter(getter, ids_to_fill, keys, data_types, column_vector->getData(), attribute, dict);
return column_vector;
}
}
template <typename DictionaryType, typename Key>
template <typename Getter>
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromStringAttribute(
Getter getter,
const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{
auto column_string = ColumnString::create();
auto ptr = column_string.get();
callGetter(getter, ids_to_fill, keys, data_types, ptr, attribute, dict);
return column_string;
}
template <typename DictionaryType, typename Key>
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const
template <typename Key>
ColumnPtr DictionaryBlockInputStream<Key>::getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const
{
auto column_vector = ColumnVector<UInt64>::create();
column_vector->getData().reserve(ids_to_fill.size());
@ -547,8 +258,8 @@ ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(cons
}
template <typename DictionaryType, typename Key>
void DictionaryBlockInputStream<DictionaryType, Key>::fillKeyColumns(
template <typename Key>
void DictionaryBlockInputStream<Key>::fillKeyColumns(
const std::vector<StringRef> & keys,
size_t start,
size_t size,

View File

@ -0,0 +1,149 @@
#pragma once
#include <Columns/IColumn.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <DataTypes/DataTypesDecimal.h>
#include "DictionaryStructure.h"
namespace DB
{
namespace ErrorCodes
{
extern const int TYPE_MISMATCH;
}
/**
* In Dictionaries implementation String attribute is stored in arena and StringRefs are pointing to it.
*/
template <typename DictionaryAttributeType>
using DictionaryValueType =
std::conditional_t<std::is_same_v<DictionaryAttributeType, String>, StringRef, DictionaryAttributeType>;
/**
* Used to create column with right type for DictionaryAttributeType.
*/
template <typename DictionaryAttributeType>
class DictionaryAttributeColumnProvider
{
public:
using ColumnType =
std::conditional_t<std::is_same_v<DictionaryAttributeType, String>, ColumnString,
std::conditional_t<IsDecimalNumber<DictionaryAttributeType>, ColumnDecimal<DictionaryAttributeType>,
ColumnVector<DictionaryAttributeType>>>;
using ColumnPtr = typename ColumnType::MutablePtr;
static ColumnPtr getColumn(const DictionaryAttribute & dictionary_attribute, size_t size)
{
if constexpr (std::is_same_v<DictionaryAttributeType, String>)
{
return ColumnType::create();
}
if constexpr (IsDecimalNumber<DictionaryAttributeType>)
{
auto scale = getDecimalScale(*dictionary_attribute.nested_type);
return ColumnType::create(size, scale);
}
else if constexpr (IsNumber<DictionaryAttributeType>)
return ColumnType::create(size);
else
throw Exception{"Unsupported attribute type.", ErrorCodes::TYPE_MISMATCH};
}
};
/**
* DictionaryDefaultValueExtractor used to simplify getting default value for IDictionary function `getColumn`.
* Provides interface for getting default value with operator[];
*
* If default_values_column is null then attribute_default_value will be used.
* If default_values_column is not null in constructor than this column values will be used as default values.
*/
template <typename DictionaryAttributeType>
class DictionaryDefaultValueExtractor
{
using DefaultColumnType = typename DictionaryAttributeColumnProvider<DictionaryAttributeType>::ColumnType;
public:
using DefaultValueType = DictionaryValueType<DictionaryAttributeType>;
DictionaryDefaultValueExtractor(DictionaryAttributeType attribute_default_value, ColumnPtr default_values_column_ = nullptr)
: default_value(std::move(attribute_default_value))
{
if (default_values_column_ == nullptr)
use_default_value_from_column = false;
else
{
if (const auto * const default_col = checkAndGetColumn<DefaultColumnType>(*default_values_column_))
{
default_values_column = default_col;
use_default_value_from_column = true;
}
else if (const auto * const default_col_const = checkAndGetColumnConst<DefaultColumnType>(default_values_column_.get()))
{
default_value = default_col_const->template getValue<DictionaryAttributeType>();
use_default_value_from_column = false;
}
else
throw Exception{"Type of default column is not the same as dictionary attribute type.", ErrorCodes::TYPE_MISMATCH};
}
}
DefaultValueType operator[](size_t row)
{
if (!use_default_value_from_column)
return static_cast<DefaultValueType>(default_value);
assert(default_values_column != nullptr);
if constexpr (std::is_same_v<DefaultColumnType, ColumnString>)
return default_values_column->getDataAt(row);
else
return default_values_column->getData()[row];
}
private:
DictionaryAttributeType default_value;
const DefaultColumnType * default_values_column = nullptr;
bool use_default_value_from_column = false;
};
/**
* Returns ColumnVector data as PaddedPodArray.
* If column is constant parameter backup_storage is used to store values.
*/
template <typename T>
static const PaddedPODArray<T> & getColumnVectorData(
const IDictionaryBase * dictionary,
const ColumnPtr column,
PaddedPODArray<T> & backup_storage)
{
bool is_const_column = isColumnConst(*column);
auto full_column = column->convertToFullColumnIfConst();
auto vector_col = checkAndGetColumn<ColumnVector<T>>(full_column.get());
if (!vector_col)
{
throw Exception{ErrorCodes::TYPE_MISMATCH,
"{}: type mismatch: column has wrong type expected {}",
dictionary->getDictionaryID().getNameForLogs(),
TypeName<T>::get()};
}
if (is_const_column)
{
// With type conversion and const columns we need to use backup storage here
auto & data = vector_col->getData();
backup_storage.assign(data);
return backup_storage;
}
else
{
return vector_col->getData();
}
}
}

View File

@ -2,6 +2,8 @@
#include <Columns/IColumn.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeArray.h>
#include <Functions/FunctionHelpers.h>
#include <Formats/FormatSettings.h>
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
@ -12,7 +14,6 @@
#include <unordered_set>
#include <ext/range.h>
namespace DB
{
namespace ErrorCodes
@ -41,54 +42,46 @@ namespace
}
AttributeUnderlyingType getAttributeUnderlyingType(const std::string & type)
AttributeUnderlyingType getAttributeUnderlyingType(const DataTypePtr & type)
{
static const std::unordered_map<std::string, AttributeUnderlyingType> dictionary
auto type_index = type->getTypeId();
switch (type_index)
{
{"UInt8", AttributeUnderlyingType::utUInt8},
{"UInt16", AttributeUnderlyingType::utUInt16},
{"UInt32", AttributeUnderlyingType::utUInt32},
{"UInt64", AttributeUnderlyingType::utUInt64},
{"UUID", AttributeUnderlyingType::utUInt128},
{"Int8", AttributeUnderlyingType::utInt8},
{"Int16", AttributeUnderlyingType::utInt16},
{"Int32", AttributeUnderlyingType::utInt32},
{"Int64", AttributeUnderlyingType::utInt64},
{"Float32", AttributeUnderlyingType::utFloat32},
{"Float64", AttributeUnderlyingType::utFloat64},
{"String", AttributeUnderlyingType::utString},
{"Date", AttributeUnderlyingType::utUInt16},
};
case TypeIndex::UInt8: return AttributeUnderlyingType::utUInt8;
case TypeIndex::UInt16: return AttributeUnderlyingType::utUInt16;
case TypeIndex::UInt32: return AttributeUnderlyingType::utUInt32;
case TypeIndex::UInt64: return AttributeUnderlyingType::utUInt64;
case TypeIndex::UInt128: return AttributeUnderlyingType::utUInt128;
const auto it = dictionary.find(type);
if (it != std::end(dictionary))
return it->second;
case TypeIndex::Int8: return AttributeUnderlyingType::utInt8;
case TypeIndex::Int16: return AttributeUnderlyingType::utInt16;
case TypeIndex::Int32: return AttributeUnderlyingType::utInt32;
case TypeIndex::Int64: return AttributeUnderlyingType::utInt64;
/// Can contain arbitrary scale and timezone parameters.
if (type.find("DateTime64") == 0)
return AttributeUnderlyingType::utUInt64;
case TypeIndex::Float32: return AttributeUnderlyingType::utFloat32;
case TypeIndex::Float64: return AttributeUnderlyingType::utFloat64;
/// Can contain arbitrary timezone as parameter.
if (type.find("DateTime") == 0)
return AttributeUnderlyingType::utUInt32;
case TypeIndex::Decimal32: return AttributeUnderlyingType::utDecimal32;
case TypeIndex::Decimal64: return AttributeUnderlyingType::utDecimal64;
case TypeIndex::Decimal128: return AttributeUnderlyingType::utDecimal128;
if (type.find("Decimal") == 0)
{
size_t start = strlen("Decimal");
if (type.find("32", start) == start)
return AttributeUnderlyingType::utDecimal32;
if (type.find("64", start) == start)
return AttributeUnderlyingType::utDecimal64;
if (type.find("128", start) == start)
return AttributeUnderlyingType::utDecimal128;
case TypeIndex::Date: return AttributeUnderlyingType::utUInt16;
case TypeIndex::DateTime: return AttributeUnderlyingType::utUInt32;
case TypeIndex::DateTime64: return AttributeUnderlyingType::utUInt64;
case TypeIndex::UUID: return AttributeUnderlyingType::utUInt128;
case TypeIndex::String: return AttributeUnderlyingType::utString;
// Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries.
// TODO: This should be fixed by fully supporting arrays in dictionaries.
case TypeIndex::Array: return AttributeUnderlyingType::utString;
default: break;
}
// Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries.
// TODO: This should be fixed by fully supporting arrays in dictionaries.
if (type.find("Array") == 0)
return AttributeUnderlyingType::utString;
throw Exception{"Unknown type " + type, ErrorCodes::UNKNOWN_TYPE};
throw Exception{"Unknown type for dictionary" + type->getName(), ErrorCodes::UNKNOWN_TYPE};
}
@ -215,16 +208,32 @@ void DictionaryStructure::validateKeyTypes(const DataTypes & key_types) const
for (const auto i : ext::range(0, key_types.size()))
{
const auto & expected_type = (*key)[i].type->getName();
const auto & actual_type = key_types[i]->getName();
const auto & expected_type = (*key)[i].type;
const auto & actual_type = key_types[i];
if (expected_type != actual_type)
throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type + ", found "
+ actual_type,
ErrorCodes::TYPE_MISMATCH};
if (!areTypesEqual(expected_type, actual_type))
throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type->getName() + ", found "
+ actual_type->getName(),
ErrorCodes::TYPE_MISMATCH};
}
}
const DictionaryAttribute & DictionaryStructure::getAttribute(const String& attribute_name, const DataTypePtr & type) const
{
auto find_iter
= std::find_if(attributes.begin(), attributes.end(), [&](const auto & attribute) { return attribute.name == attribute_name; });
if (find_iter == attributes.end())
throw Exception{"No such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
const auto & attribute = *find_iter;
if (!areTypesEqual(attribute.type, type))
throw Exception{"Attribute type does not match, expected " + attribute.type->getName() + ", found " + type->getName(),
ErrorCodes::TYPE_MISMATCH};
return *find_iter;
}
std::string DictionaryStructure::getKeyDescription() const
{
@ -318,9 +327,20 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
if ((range_min && name == range_min->name) || (range_max && name == range_max->name))
continue;
const auto type_string = config.getString(prefix + "type");
const auto type = DataTypeFactory::instance().get(type_string);
const auto underlying_type = getAttributeUnderlyingType(type_string);
const auto initial_type = DataTypeFactory::instance().get(type_string);
auto type = initial_type;
bool is_array = false;
bool is_nullable = false;
if (type->isNullable())
{
is_nullable = true;
type = removeNullable(type);
}
const auto underlying_type = getAttributeUnderlyingType(type);
const auto expression = config.getString(prefix + "expression", "");
if (!expression.empty())
@ -333,7 +353,9 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
try
{
if (null_value_string.empty())
{
null_value = type->getDefault();
}
else
{
ReadBufferFromString null_value_buffer{null_value_string};
@ -365,8 +387,18 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
has_hierarchy = has_hierarchy || hierarchical;
res_attributes.emplace_back(
DictionaryAttribute{name, underlying_type, type, expression, null_value, hierarchical, injective, is_object_id});
res_attributes.emplace_back(DictionaryAttribute{
name,
underlying_type,
initial_type,
type,
expression,
null_value,
hierarchical,
injective,
is_object_id,
is_nullable,
is_array});
}
return res_attributes;

View File

@ -42,7 +42,6 @@ std::string toString(const AttributeUnderlyingType type);
/// Min and max lifetimes for a dictionary or it's entry
using DictionaryLifetime = ExternalLoadableLifetime;
/** Holds the description of a single dictionary attribute:
* - name, used for lookup into dictionary and source;
* - type, used in conjunction with DataTypeFactory and getAttributeUnderlyingTypeByname;
@ -57,13 +56,74 @@ struct DictionaryAttribute final
const std::string name;
const AttributeUnderlyingType underlying_type;
const DataTypePtr type;
const DataTypePtr nested_type;
const std::string expression;
const Field null_value;
const bool hierarchical;
const bool injective;
const bool is_object_id;
const bool is_nullable;
const bool is_array;
};
template <typename Type>
struct DictionaryAttributeType
{
using AttributeType = Type;
};
template <typename F>
void callOnDictionaryAttributeType(AttributeUnderlyingType type, F&& func)
{
switch (type)
{
case AttributeUnderlyingType::utUInt8:
func(DictionaryAttributeType<UInt8>());
break;
case AttributeUnderlyingType::utUInt16:
func(DictionaryAttributeType<UInt16>());
break;
case AttributeUnderlyingType::utUInt32:
func(DictionaryAttributeType<UInt32>());
break;
case AttributeUnderlyingType::utUInt64:
func(DictionaryAttributeType<UInt64>());
break;
case AttributeUnderlyingType::utUInt128:
func(DictionaryAttributeType<UInt128>());
break;
case AttributeUnderlyingType::utInt8:
func(DictionaryAttributeType<Int8>());
break;
case AttributeUnderlyingType::utInt16:
func(DictionaryAttributeType<Int16>());
break;
case AttributeUnderlyingType::utInt32:
func(DictionaryAttributeType<Int32>());
break;
case AttributeUnderlyingType::utInt64:
func(DictionaryAttributeType<Int64>());
break;
case AttributeUnderlyingType::utFloat32:
func(DictionaryAttributeType<Float32>());
break;
case AttributeUnderlyingType::utFloat64:
func(DictionaryAttributeType<Float64>());
break;
case AttributeUnderlyingType::utString:
func(DictionaryAttributeType<String>());
break;
case AttributeUnderlyingType::utDecimal32:
func(DictionaryAttributeType<Decimal32>());
break;
case AttributeUnderlyingType::utDecimal64:
func(DictionaryAttributeType<Decimal64>());
break;
case AttributeUnderlyingType::utDecimal128:
func(DictionaryAttributeType<Decimal128>());
break;
}
};
struct DictionarySpecialAttribute final
{
@ -94,10 +154,10 @@ struct DictionaryStructure final
DictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
void validateKeyTypes(const DataTypes & key_types) const;
const DictionaryAttribute &getAttribute(const String& attribute_name, const DataTypePtr & type) const;
std::string getKeyDescription() const;
bool isKeySizeFixed() const;
size_t getKeySize() const;
private:
/// range_min and range_max have to be parsed before this function call
std::vector<DictionaryAttribute> getAttributes(

View File

@ -3,7 +3,10 @@
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
#include <Core/Defines.h>
#include <Functions/FunctionHelpers.h>
#include <Columns/ColumnNullable.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Common/HashTable/HashSet.h>
namespace DB
{
@ -35,11 +38,13 @@ DirectDictionary::DirectDictionary(
void DirectDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
{
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
DictionaryDefaultValueExtractor<UInt64> extractor(null_value);
getItemsImpl<UInt64, UInt64>(
*hierarchical_attribute,
ids,
[&](const size_t row, const UInt64 value) { out[row] = value; },
[&](const size_t) { return null_value; });
[&](const size_t row, const UInt64 value, bool) { out[row] = value; },
extractor);
}
@ -128,395 +133,101 @@ void DirectDictionary::isInConstantVector(const Key child_id, const PaddedPODArr
isInImpl(child_id, ancestor_ids, out);
}
#define DECLARE(TYPE) \
void DirectDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
const auto null_value = std::get<TYPE>(attribute.null_values); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void DirectDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
ColumnPtr DirectDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes &,
const ColumnPtr default_values_column) const
{
ColumnPtr result;
PaddedPODArray<Key> backup_storage;
const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & null_value = std::get<StringRef>(attribute.null_values);
getItemsStringImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
[&](const size_t) { return String(null_value.data, null_value.size); });
}
auto keys_size = ids.size();
#define DECLARE(TYPE) \
void DirectDictionary::get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void DirectDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
{
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
getItemsStringImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
[&](const size_t row) { const auto ref = def->getDataAt(row); return String(ref.data, ref.size); });
}
#define DECLARE(TYPE) \
void DirectDictionary::get##TYPE( \
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void DirectDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
{
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
DirectDictionary::getItemsStringImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
[&](const size_t) { return def; });
}
void DirectDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
{
const auto & attribute = attributes.front();
switch (attribute.type)
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to = nullptr;
if (attribute.is_nullable)
{
case AttributeUnderlyingType::utUInt8:
has<UInt8>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt16:
has<UInt16>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt32:
has<UInt32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt64:
has<UInt64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt128:
has<UInt128>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt8:
has<Int8>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt16:
has<Int16>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt32:
has<Int32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt64:
has<Int64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utFloat32:
has<Float32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utFloat64:
has<Float64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utString:
has<String>(attribute, ids, out);
break;
case AttributeUnderlyingType::utDecimal32:
has<Decimal32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utDecimal64:
has<Decimal64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utDecimal128:
has<Decimal128>(attribute, ids, out);
break;
col_null_map_to = ColumnUInt8::create(keys_size, false);
vec_null_map_to = &col_null_map_to->getData();
}
}
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
void DirectDictionary::createAttributes()
{
const auto size = dict_struct.attributes.size();
attributes.reserve(size);
for (const auto & attribute : dict_struct.attributes)
auto type_call = [&](const auto &dictionary_attribute_type)
{
attribute_index_by_name.emplace(attribute.name, attributes.size());
attribute_name_by_index.emplace(attributes.size(), attribute.name);
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value, attribute.name));
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
if (attribute.hierarchical)
using ValueType = DictionaryValueType<AttributeType>;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<AttributeType, String>)
{
hierarchical_attribute = &attributes.back();
auto * out = column.get();
if (hierarchical_attribute->type != AttributeUnderlyingType::utUInt64)
throw Exception{full_name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH};
}
}
}
template <typename T>
void DirectDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
}
template <>
void DirectDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
{
attribute.string_arena = std::make_unique<Arena>();
const String & string = null_value.get<String>();
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
}
DirectDictionary::Attribute DirectDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & attr_name)
{
Attribute attr{type, {}, {}, attr_name};
switch (type)
{
case AttributeUnderlyingType::utUInt8:
createAttributeImpl<UInt8>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt16:
createAttributeImpl<UInt16>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt32:
createAttributeImpl<UInt32>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt64:
createAttributeImpl<UInt64>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::utInt8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::utInt16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::utInt32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::utInt64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat64:
createAttributeImpl<Float64>(attr, null_value);
break;
case AttributeUnderlyingType::utString:
createAttributeImpl<String>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal32:
createAttributeImpl<Decimal32>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
}
return attr;
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void DirectDictionary::getItemsImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
{
const auto rows = ext::size(ids);
HashMap<Key, OutputType> value_by_key;
for (const auto row : ext::range(0, rows))
value_by_key[ids[row]] = get_default(row);
std::vector<Key> to_load;
to_load.reserve(value_by_key.size());
for (auto it = value_by_key.begin(); it != value_by_key.end(); ++it)
to_load.emplace_back(static_cast<Key>(it->getKey()));
auto stream = source_ptr->loadIds(to_load);
stream->readPrefix();
while (const auto block = stream->read())
{
const IColumn & id_column = *block.safeGetByPosition(0).column;
for (const size_t attribute_idx : ext::range(0, attributes.size()))
{
const IColumn & attribute_column = *block.safeGetByPosition(attribute_idx + 1).column;
for (const auto row_idx : ext::range(0, id_column.size()))
{
const auto key = id_column[row_idx].get<UInt64>();
if (value_by_key.find(key) != value_by_key.end() && attribute.name == attribute_name_by_index.at(attribute_idx))
getItemsImpl<String, String>(
attribute,
ids,
[&](const size_t row, const String value, bool is_null)
{
if (attribute.type == AttributeUnderlyingType::utFloat32)
{
value_by_key[key] = static_cast<Float32>(attribute_column[row_idx].get<Float64>());
}
else
{
value_by_key[key] = static_cast<OutputType>(attribute_column[row_idx].get<AttributeType>());
}
if (attribute.is_nullable)
(*vec_null_map_to)[row] = is_null;
}
}
const auto ref = StringRef{value};
out->insertData(ref.data, ref.size);
},
default_value_extractor);
}
}
stream->readSuffix();
for (const auto row : ext::range(0, rows))
set_value(row, value_by_key[ids[row]]);
query_count.fetch_add(rows, std::memory_order_relaxed);
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void DirectDictionary::getItemsStringImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
{
const auto rows = ext::size(ids);
HashMap<Key, String> value_by_key;
for (const auto row : ext::range(0, rows))
value_by_key[ids[row]] = get_default(row);
std::vector<Key> to_load;
to_load.reserve(value_by_key.size());
for (auto it = value_by_key.begin(); it != value_by_key.end(); ++it)
to_load.emplace_back(static_cast<Key>(it->getKey()));
auto stream = source_ptr->loadIds(to_load);
stream->readPrefix();
while (const auto block = stream->read())
{
const IColumn & id_column = *block.safeGetByPosition(0).column;
for (const size_t attribute_idx : ext::range(0, attributes.size()))
else
{
auto & out = column->getData();
const IColumn & attribute_column = *block.safeGetByPosition(attribute_idx + 1).column;
for (const auto row_idx : ext::range(0, id_column.size()))
{
const auto key = id_column[row_idx].get<UInt64>();
if (value_by_key.find(key) != value_by_key.end() && attribute.name == attribute_name_by_index.at(attribute_idx))
getItemsImpl<AttributeType, AttributeType>(
attribute,
ids,
[&](const size_t row, const auto value, bool is_null)
{
const String from_source = attribute_column[row_idx].get<String>();
value_by_key[key] = from_source;
}
}
if (attribute.is_nullable)
(*vec_null_map_to)[row] = is_null;
out[row] = value;
},
default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
if (attribute.is_nullable)
{
result = ColumnNullable::create(result, std::move(col_null_map_to));
}
stream->readSuffix();
for (const auto row : ext::range(0, rows))
set_value(row, value_by_key[ids[row]]);
query_count.fetch_add(rows, std::memory_order_relaxed);
return result;
}
const DirectDictionary::Attribute & DirectDictionary::getAttribute(const std::string & attribute_name) const
ColumnUInt8::Ptr DirectDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
{
const auto it = attribute_index_by_name.find(attribute_name);
if (it == std::end(attribute_index_by_name))
throw Exception{full_name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
PaddedPODArray<Key> backup_storage;
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
return attributes[it->second];
}
auto result = ColumnUInt8::create(ext::size(ids));
auto& out = result->getData();
template <typename T>
void DirectDictionary::has(const Attribute &, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
{
const auto rows = ext::size(ids);
HashMap<Key, UInt8> has_key;
@ -548,6 +259,137 @@ void DirectDictionary::has(const Attribute &, const PaddedPODArray<Key> & ids, P
out[row] = has_key[ids[row]];
query_count.fetch_add(rows, std::memory_order_relaxed);
return result;
}
void DirectDictionary::createAttributes()
{
const auto size = dict_struct.attributes.size();
attributes.reserve(size);
for (const auto & attribute : dict_struct.attributes)
{
attribute_index_by_name.emplace(attribute.name, attributes.size());
attribute_name_by_index.emplace(attributes.size(), attribute.name);
attributes.push_back(createAttribute(attribute, attribute.null_value, attribute.name));
if (attribute.hierarchical)
{
hierarchical_attribute = &attributes.back();
if (hierarchical_attribute->type != AttributeUnderlyingType::utUInt64)
throw Exception{full_name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH};
}
}
}
template <typename T>
void DirectDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
}
template <>
void DirectDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
{
attribute.string_arena = std::make_unique<Arena>();
const String & string = null_value.get<String>();
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
}
DirectDictionary::Attribute DirectDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value, const std::string & attr_name)
{
Attribute attr{attribute.underlying_type, attribute.is_nullable, {}, {}, attr_name};
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
createAttributeImpl<AttributeType>(attr, null_value);
};
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
return attr;
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void DirectDictionary::getItemsImpl(
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto rows = ext::size(ids);
HashMap<Key, OutputType> value_by_key;
HashSet<Key> value_is_null;
for (const auto row : ext::range(0, rows))
{
auto key = ids[row];
value_by_key[key] = static_cast<AttributeType>(default_value_extractor[row]);
}
std::vector<Key> to_load;
to_load.reserve(value_by_key.size());
for (auto it = value_by_key.begin(); it != value_by_key.end(); ++it)
to_load.emplace_back(static_cast<Key>(it->getKey()));
auto stream = source_ptr->loadIds(to_load);
stream->readPrefix();
const auto it = attribute_index_by_name.find(attribute.name);
if (it == std::end(attribute_index_by_name))
throw Exception{full_name + ": no such attribute '" + attribute.name + "'", ErrorCodes::BAD_ARGUMENTS};
auto attribute_index = it->second;
while (const auto block = stream->read())
{
const IColumn & id_column = *block.safeGetByPosition(0).column;
const IColumn & attribute_column = *block.safeGetByPosition(attribute_index + 1).column;
for (const auto row_idx : ext::range(0, id_column.size()))
{
const auto key = id_column[row_idx].get<UInt64>();
if (value_by_key.find(key) != value_by_key.end())
{
auto value = attribute_column[row_idx];
if (value.isNull())
value_is_null.insert(key);
else
value_by_key[key] = static_cast<OutputType>(value.get<NearestFieldType<AttributeType>>());
}
}
}
stream->readSuffix();
for (const auto row : ext::range(0, rows))
{
auto key = ids[row];
set_value(row, value_by_key[key], value_is_null.find(key) != nullptr);
}
query_count.fetch_add(rows, std::memory_order_relaxed);
}
const DirectDictionary::Attribute & DirectDictionary::getAttribute(const std::string & attribute_name) const
{
const auto it = attribute_index_by_name.find(attribute_name);
if (it == std::end(attribute_index_by_name))
throw Exception{full_name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
return attributes[it->second];
}

View File

@ -13,11 +13,10 @@
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryHelpers.h"
namespace DB
{
using BlockPtr = std::shared_ptr<Block>;
class DirectDictionary final : public IDictionary
{
@ -65,76 +64,16 @@ public:
void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const override;
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
const;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
@ -142,6 +81,7 @@ private:
struct Attribute final
{
AttributeUnderlyingType type;
bool is_nullable;
std::variant<
UInt8,
UInt16,
@ -168,23 +108,17 @@ private:
template <typename T>
void addAttributeSize(const Attribute & attribute);
void calculateBytesAllocated();
template <typename T>
void createAttributeImpl(Attribute & attribute, const Field & null_value);
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & name);
static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value, const std::string & name);
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsStringImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename T>
void resize(Attribute & attribute, const Key id);
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename T>
void setAttributeValueImpl(Attribute & attribute, const Key id, const T & value);
@ -193,9 +127,6 @@ private:
const Attribute & getAttribute(const std::string & attribute_name) const;
template <typename T>
void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
Key getValueOrNullByKey(const Key & to_find) const;
template <typename ChildType, typename AncestorType>

View File

@ -1,9 +1,14 @@
#include "FlatDictionary.h"
#include <Core/Defines.h>
#include <DataTypes/DataTypesDecimal.h>
#include <IO/WriteHelpers.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnNullable.h>
#include <Functions/FunctionHelpers.h>
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
#include <Core/Defines.h>
namespace DB
{
@ -44,12 +49,13 @@ FlatDictionary::FlatDictionary(
void FlatDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
{
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
DictionaryDefaultValueExtractor<UInt64> extractor(null_value);
getItemsImpl<UInt64, UInt64>(
*hierarchical_attribute,
ids,
[&](const size_t row, const UInt64 value) { out[row] = value; },
[&](const size_t) { return null_value; });
extractor);
}
@ -102,186 +108,103 @@ void FlatDictionary::isInConstantVector(const Key child_id, const PaddedPODArray
isInImpl(child_id, ancestor_ids, out);
}
#define DECLARE(TYPE) \
void FlatDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
const auto null_value = std::get<TYPE>(attribute.null_values); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void FlatDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
ColumnPtr FlatDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes &,
const ColumnPtr default_values_column) const
{
ColumnPtr result;
PaddedPODArray<Key> backup_storage;
const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
auto size = ids.size();
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
const auto & null_value = std::get<StringRef>(attribute.null_values);
getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return null_value; });
}
#define DECLARE(TYPE) \
void FlatDictionary::get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void FlatDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
{
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t row) { return def->getDataAt(row); });
}
#define DECLARE(TYPE) \
void FlatDictionary::get##TYPE( \
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void FlatDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
{
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
FlatDictionary::getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return StringRef{def}; });
}
void FlatDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
{
const auto & attribute = attributes.front();
switch (attribute.type)
auto type_call = [&](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
has<UInt8>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt16:
has<UInt16>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt32:
has<UInt32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt64:
has<UInt64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt128:
has<UInt128>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt8:
has<Int8>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt16:
has<Int16>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt32:
has<Int32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt64:
has<Int64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utFloat32:
has<Float32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utFloat64:
has<Float64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utString:
has<String>(attribute, ids, out);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
case AttributeUnderlyingType::utDecimal32:
has<Decimal32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utDecimal64:
has<Decimal64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utDecimal128:
has<Decimal128>(attribute, ids, out);
break;
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, size);
if constexpr (std::is_same_v<ValueType, StringRef>)
{
auto * out = column.get();
getItemsImpl<ValueType, ValueType>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsImpl<ValueType, ValueType>(
attribute,
ids,
[&](const size_t row, const auto value) { out[row] = value; },
default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
if (attribute.nullable_set)
{
ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, false);
ColumnUInt8::Container& vec_null_map_to = col_null_map_to->getData();
for (size_t row = 0; row < ids.size(); ++row)
{
auto id = ids[row];
if (attribute.nullable_set->find(id) != nullptr)
vec_null_map_to[row] = true;
}
result = ColumnNullable::create(result, std::move(col_null_map_to));
}
return result;
}
ColumnUInt8::Ptr FlatDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
{
PaddedPODArray<Key> backup_storage;
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
auto result = ColumnUInt8::create(ext::size(ids));
auto& out = result->getData();
const auto ids_count = ext::size(ids);
for (const auto i : ext::range(0, ids_count))
{
const auto id = ids[i];
out[i] = id < loaded_ids.size() && loaded_ids[id];
}
query_count.fetch_add(ids_count, std::memory_order_relaxed);
return result;
}
void FlatDictionary::createAttributes()
{
const auto size = dict_struct.attributes.size();
@ -290,7 +213,7 @@ void FlatDictionary::createAttributes()
for (const auto & attribute : dict_struct.attributes)
{
attribute_index_by_name.emplace(attribute.name, attributes.size());
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
attributes.push_back(createAttribute(attribute, attribute.null_value));
if (attribute.hierarchical)
{
@ -416,6 +339,14 @@ void FlatDictionary::addAttributeSize(const Attribute & attribute)
bucket_count = array_ref.capacity();
}
template <>
void FlatDictionary::addAttributeSize<String>(const Attribute & attribute)
{
const auto & array_ref = std::get<ContainerType<StringRef>>(attribute.arrays);
bytes_allocated += sizeof(PaddedPODArray<StringRef>) + array_ref.allocated_bytes();
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
bucket_count = array_ref.capacity();
}
void FlatDictionary::calculateBytesAllocated()
{
@ -423,60 +354,15 @@ void FlatDictionary::calculateBytesAllocated()
for (const auto & attribute : attributes)
{
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
addAttributeSize<UInt8>(attribute);
break;
case AttributeUnderlyingType::utUInt16:
addAttributeSize<UInt16>(attribute);
break;
case AttributeUnderlyingType::utUInt32:
addAttributeSize<UInt32>(attribute);
break;
case AttributeUnderlyingType::utUInt64:
addAttributeSize<UInt64>(attribute);
break;
case AttributeUnderlyingType::utUInt128:
addAttributeSize<UInt128>(attribute);
break;
case AttributeUnderlyingType::utInt8:
addAttributeSize<Int8>(attribute);
break;
case AttributeUnderlyingType::utInt16:
addAttributeSize<Int16>(attribute);
break;
case AttributeUnderlyingType::utInt32:
addAttributeSize<Int32>(attribute);
break;
case AttributeUnderlyingType::utInt64:
addAttributeSize<Int64>(attribute);
break;
case AttributeUnderlyingType::utFloat32:
addAttributeSize<Float32>(attribute);
break;
case AttributeUnderlyingType::utFloat64:
addAttributeSize<Float64>(attribute);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
addAttributeSize<Decimal32>(attribute);
break;
case AttributeUnderlyingType::utDecimal64:
addAttributeSize<Decimal64>(attribute);
break;
case AttributeUnderlyingType::utDecimal128:
addAttributeSize<Decimal128>(attribute);
break;
addAttributeSize<AttributeType>(attribute);
};
case AttributeUnderlyingType::utString:
{
addAttributeSize<StringRef>(attribute);
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
break;
}
}
callOnDictionaryAttributeType(attribute.type, type_call);
}
}
@ -500,67 +386,31 @@ void FlatDictionary::createAttributeImpl<String>(Attribute & attribute, const Fi
}
FlatDictionary::Attribute FlatDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
FlatDictionary::Attribute FlatDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
{
Attribute attr{type, {}, {}, {}};
auto nullable_set = attribute.is_nullable ? std::make_optional<NullableSet>() : std::optional<NullableSet>{};
Attribute attr{attribute.underlying_type, std::move(nullable_set), {}, {}, {}};
switch (type)
auto type_call = [&](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
createAttributeImpl<UInt8>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt16:
createAttributeImpl<UInt16>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt32:
createAttributeImpl<UInt32>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt64:
createAttributeImpl<UInt64>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::utInt8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::utInt16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::utInt32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::utInt64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat64:
createAttributeImpl<Float64>(attr, null_value);
break;
case AttributeUnderlyingType::utString:
createAttributeImpl<String>(attr, null_value);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
createAttributeImpl<Decimal32>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
}
createAttributeImpl<AttributeType>(attr, null_value);
};
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
return attr;
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void FlatDictionary::getItemsImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto & attr = std::get<ContainerType<AttributeType>>(attribute.arrays);
const auto rows = ext::size(ids);
@ -568,7 +418,7 @@ void FlatDictionary::getItemsImpl(
for (const auto row : ext::range(0, rows))
{
const auto id = ids[row];
set_value(row, id < ext::size(attr) && loaded_ids[id] ? static_cast<OutputType>(attr[id]) : get_default(row));
set_value(row, id < ext::size(attr) && loaded_ids[id] ? static_cast<OutputType>(attr[id]) : default_value_extractor[row]);
}
query_count.fetch_add(rows, std::memory_order_relaxed);
@ -592,7 +442,6 @@ void FlatDictionary::resize(Attribute & attribute, const Key id)
template <typename T>
void FlatDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const T & value)
{
resize<T>(attribute, id);
auto & array = std::get<ContainerType<T>>(attribute.arrays);
array[id] = value;
loaded_ids[id] = true;
@ -601,64 +450,38 @@ void FlatDictionary::setAttributeValueImpl(Attribute & attribute, const Key id,
template <>
void FlatDictionary::setAttributeValueImpl<String>(Attribute & attribute, const Key id, const String & value)
{
resize<StringRef>(attribute, id);
const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
auto & array = std::get<ContainerType<StringRef>>(attribute.arrays);
array[id] = StringRef{string_in_arena, value.size()};
loaded_ids[id] = true;
setAttributeValueImpl(attribute, id, StringRef{string_in_arena, value.size()});
}
void FlatDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
{
switch (attribute.type)
auto type_call = [&](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>());
break;
case AttributeUnderlyingType::utUInt16:
setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>());
break;
case AttributeUnderlyingType::utUInt32:
setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>());
break;
case AttributeUnderlyingType::utUInt64:
setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>());
break;
case AttributeUnderlyingType::utUInt128:
setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>());
break;
case AttributeUnderlyingType::utInt8:
setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>());
break;
case AttributeUnderlyingType::utInt16:
setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>());
break;
case AttributeUnderlyingType::utInt32:
setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>());
break;
case AttributeUnderlyingType::utInt64:
setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>());
break;
case AttributeUnderlyingType::utFloat32:
setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>());
break;
case AttributeUnderlyingType::utFloat64:
setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>());
break;
case AttributeUnderlyingType::utString:
setAttributeValueImpl<String>(attribute, id, value.get<String>());
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ResizeType = std::conditional_t<std::is_same_v<AttributeType, String>, StringRef, AttributeType>;
case AttributeUnderlyingType::utDecimal32:
setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal32>());
break;
case AttributeUnderlyingType::utDecimal64:
setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal64>());
break;
case AttributeUnderlyingType::utDecimal128:
setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>());
break;
}
resize<ResizeType>(attribute, id);
if (attribute.nullable_set)
{
if (value.isNull())
{
attribute.nullable_set->insert(id);
loaded_ids[id] = true;
return;
}
else
{
attribute.nullable_set->erase(id);
}
}
setAttributeValueImpl<AttributeType>(attribute, id, value.get<NearestFieldType<AttributeType>>());
};
callOnDictionaryAttributeType(attribute.type, type_call);
}
@ -671,27 +494,13 @@ const FlatDictionary::Attribute & FlatDictionary::getAttribute(const std::string
return attributes[it->second];
}
template <typename T>
void FlatDictionary::has(const Attribute &, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
{
const auto ids_count = ext::size(ids);
for (const auto i : ext::range(0, ids_count))
{
const auto id = ids[i];
out[i] = id < loaded_ids.size() && loaded_ids[id];
}
query_count.fetch_add(ids_count, std::memory_order_relaxed);
}
PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const
{
const auto ids_count = ext::size(loaded_ids);
PaddedPODArray<Key> ids;
ids.reserve(ids_count);
for (auto idx : ext::range(0, ids_count))
if (loaded_ids[idx])
ids.push_back(idx);
@ -700,7 +509,7 @@ PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const
BlockInputStreamPtr FlatDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<FlatDictionary, Key>;
using BlockInputStreamType = DictionaryBlockInputStream<Key>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds(), column_names);
}

View File

@ -3,20 +3,25 @@
#include <atomic>
#include <variant>
#include <vector>
#include <optional>
#include <Common/HashTable/HashSet.h>
#include <Common/Arena.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Common/Arena.h>
#include <Columns/ColumnArray.h>
#include <DataTypes/IDataType.h>
#include <Core/Block.h>
#include <ext/range.h>
#include <ext/size.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryHelpers.h"
namespace DB
{
using BlockPtr = std::shared_ptr<Block>;
class FlatDictionary final : public IDictionary
{
@ -66,76 +71,16 @@ public:
void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const override;
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
const;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
@ -143,9 +88,13 @@ private:
template <typename Value>
using ContainerType = PaddedPODArray<Value>;
using NullableSet = HashSet<Key, DefaultHash<Key>>;
struct Attribute final
{
AttributeUnderlyingType type;
std::optional<NullableSet> nullable_set;
std::variant<
UInt8,
UInt16,
@ -180,6 +129,7 @@ private:
ContainerType<Float64>,
ContainerType<StringRef>>
arrays;
std::unique_ptr<Arena> string_arena;
};
@ -194,13 +144,16 @@ private:
void calculateBytesAllocated();
template <typename T>
void createAttributeImpl(Attribute & attribute, const Field & null_value);
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename T>
void resize(Attribute & attribute, const Key id);
@ -212,9 +165,6 @@ private:
const Attribute & getAttribute(const std::string & attribute_name) const;
template <typename T>
void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
template <typename ChildType, typename AncestorType>
void isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;

View File

@ -4,7 +4,10 @@
#include "DictionaryFactory.h"
#include "ClickHouseDictionarySource.h"
#include <Core/Defines.h>
#include <Functions/FunctionHelpers.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnNullable.h>
#include <DataTypes/DataTypesDecimal.h>
namespace
{
@ -57,12 +60,13 @@ HashedDictionary::HashedDictionary(
void HashedDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
{
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
DictionaryDefaultValueExtractor<UInt64> extractor(null_value);
getItemsImpl<UInt64, UInt64>(
*hierarchical_attribute,
ids,
[&](const size_t row, const UInt64 value) { out[row] = value; },
[&](const size_t) { return null_value; });
extractor);
}
@ -125,183 +129,105 @@ void HashedDictionary::isInConstantVector(const Key child_id, const PaddedPODArr
isInImpl(child_id, ancestor_ids, out);
}
#define DECLARE(TYPE) \
void HashedDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) \
const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
const auto null_value = std::get<TYPE>(attribute.null_values); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void HashedDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
ColumnPtr HashedDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes &,
const ColumnPtr default_values_column) const
{
ColumnPtr result;
PaddedPODArray<Key> backup_storage;
const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
auto size = ids.size();
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
auto type_call = [&](const auto & dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return null_value; });
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, size);
if constexpr (std::is_same_v<AttributeType, String>)
{
auto * out = column.get();
getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsImpl<AttributeType, AttributeType>(
attribute,
ids,
[&](const size_t row, const auto value) { return out[row] = value; },
default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
if (attribute.nullable_set)
{
ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, false);
ColumnUInt8::Container& vec_null_map_to = col_null_map_to->getData();
for (size_t row = 0; row < ids.size(); ++row)
{
auto id = ids[row];
if (attribute.nullable_set->find(id) != nullptr)
vec_null_map_to[row] = true;
}
result = ColumnNullable::create(result, std::move(col_null_map_to));
}
return result;
}
#define DECLARE(TYPE) \
void HashedDictionary::get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void HashedDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
ColumnUInt8::Ptr HashedDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
{
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
PaddedPODArray<Key> backup_storage;
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t row) { return def->getDataAt(row); });
}
size_t ids_count = ext::size(ids);
#define DECLARE(TYPE) \
void HashedDictionary::get##TYPE( \
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE & def, ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
auto result = ColumnUInt8::create(ext::size(ids));
auto& out = result->getData();
void HashedDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
{
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return StringRef{def}; });
}
void HashedDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
{
const auto & attribute = attributes.front();
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
has<UInt8>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt16:
has<UInt16>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt32:
has<UInt32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt64:
has<UInt64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt128:
has<UInt128>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt8:
has<Int8>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt16:
has<Int16>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt32:
has<Int32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt64:
has<Int64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utFloat32:
has<Float32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utFloat64:
has<Float64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utString:
has<StringRef>(attribute, ids, out);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
has<AttributeType>(attribute, ids, out);
};
case AttributeUnderlyingType::utDecimal32:
has<Decimal32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utDecimal64:
has<Decimal64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utDecimal128:
has<Decimal128>(attribute, ids, out);
break;
}
callOnDictionaryAttributeType(attribute.type, type_call);
query_count.fetch_add(ids_count, std::memory_order_relaxed);
return result;
}
void HashedDictionary::createAttributes()
@ -312,7 +238,7 @@ void HashedDictionary::createAttributes()
for (const auto & attribute : dict_struct.attributes)
{
attribute_index_by_name.emplace(attribute.name, attributes.size());
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
attributes.push_back(createAttribute(attribute, attribute.null_value));
if (attribute.hierarchical)
{
@ -429,6 +355,13 @@ void HashedDictionary::resize(Attribute & attribute, size_t added_rows)
map_ref->resize(added_rows);
}
}
template <>
void HashedDictionary::resize<String>(Attribute & attribute, size_t added_rows)
{
resize<StringRef>(attribute, added_rows);
}
void HashedDictionary::resize(size_t added_rows)
{
if (!added_rows)
@ -436,56 +369,14 @@ void HashedDictionary::resize(size_t added_rows)
for (auto & attribute : attributes)
{
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
resize<UInt8>(attribute, added_rows);
break;
case AttributeUnderlyingType::utUInt16:
resize<UInt16>(attribute, added_rows);
break;
case AttributeUnderlyingType::utUInt32:
resize<UInt32>(attribute, added_rows);
break;
case AttributeUnderlyingType::utUInt64:
resize<UInt64>(attribute, added_rows);
break;
case AttributeUnderlyingType::utUInt128:
resize<UInt128>(attribute, added_rows);
break;
case AttributeUnderlyingType::utInt8:
resize<Int8>(attribute, added_rows);
break;
case AttributeUnderlyingType::utInt16:
resize<Int16>(attribute, added_rows);
break;
case AttributeUnderlyingType::utInt32:
resize<Int32>(attribute, added_rows);
break;
case AttributeUnderlyingType::utInt64:
resize<Int64>(attribute, added_rows);
break;
case AttributeUnderlyingType::utFloat32:
resize<Float32>(attribute, added_rows);
break;
case AttributeUnderlyingType::utFloat64:
resize<Float64>(attribute, added_rows);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
resize<AttributeType>(attribute, added_rows);
};
case AttributeUnderlyingType::utDecimal32:
resize<Decimal32>(attribute, added_rows);
break;
case AttributeUnderlyingType::utDecimal64:
resize<Decimal64>(attribute, added_rows);
break;
case AttributeUnderlyingType::utDecimal128:
resize<Decimal128>(attribute, added_rows);
break;
case AttributeUnderlyingType::utString:
resize<StringRef>(attribute, added_rows);
break;
}
callOnDictionaryAttributeType(attribute.type, type_call);
}
}
@ -562,66 +453,27 @@ void HashedDictionary::addAttributeSize(const Attribute & attribute)
}
}
template <>
void HashedDictionary::addAttributeSize<String>(const Attribute & attribute)
{
addAttributeSize<StringRef>(attribute);
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
}
void HashedDictionary::calculateBytesAllocated()
{
bytes_allocated += attributes.size() * sizeof(attributes.front());
for (const auto & attribute : attributes)
{
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
addAttributeSize<UInt8>(attribute);
break;
case AttributeUnderlyingType::utUInt16:
addAttributeSize<UInt16>(attribute);
break;
case AttributeUnderlyingType::utUInt32:
addAttributeSize<UInt32>(attribute);
break;
case AttributeUnderlyingType::utUInt64:
addAttributeSize<UInt64>(attribute);
break;
case AttributeUnderlyingType::utUInt128:
addAttributeSize<UInt128>(attribute);
break;
case AttributeUnderlyingType::utInt8:
addAttributeSize<Int8>(attribute);
break;
case AttributeUnderlyingType::utInt16:
addAttributeSize<Int16>(attribute);
break;
case AttributeUnderlyingType::utInt32:
addAttributeSize<Int32>(attribute);
break;
case AttributeUnderlyingType::utInt64:
addAttributeSize<Int64>(attribute);
break;
case AttributeUnderlyingType::utFloat32:
addAttributeSize<Float32>(attribute);
break;
case AttributeUnderlyingType::utFloat64:
addAttributeSize<Float64>(attribute);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
addAttributeSize<AttributeType>(attribute);
};
case AttributeUnderlyingType::utDecimal32:
addAttributeSize<Decimal32>(attribute);
break;
case AttributeUnderlyingType::utDecimal64:
addAttributeSize<Decimal64>(attribute);
break;
case AttributeUnderlyingType::utDecimal128:
addAttributeSize<Decimal128>(attribute);
break;
case AttributeUnderlyingType::utString:
{
addAttributeSize<StringRef>(attribute);
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
break;
}
}
callOnDictionaryAttributeType(attribute.type, type_call);
}
}
@ -635,93 +487,66 @@ void HashedDictionary::createAttributeImpl(Attribute & attribute, const Field &
attribute.sparse_maps = std::make_unique<SparseCollectionType<T>>();
}
HashedDictionary::Attribute HashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
template <>
void HashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
{
Attribute attr{type, {}, {}, {}, {}};
attribute.string_arena = std::make_unique<Arena>();
const String & string = null_value.get<String>();
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
switch (type)
if (!sparse)
attribute.maps = std::make_unique<CollectionType<StringRef>>();
else
attribute.sparse_maps = std::make_unique<SparseCollectionType<StringRef>>();
}
HashedDictionary::Attribute HashedDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
{
auto nullable_set = attribute.is_nullable ? std::make_optional<NullableSet>() : std::optional<NullableSet>{};
Attribute attr{attribute.underlying_type, std::move(nullable_set), {}, {}, {}, {}};
auto type_call = [&, this](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
createAttributeImpl<UInt8>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt16:
createAttributeImpl<UInt16>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt32:
createAttributeImpl<UInt32>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt64:
createAttributeImpl<UInt64>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::utInt8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::utInt16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::utInt32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::utInt64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat64:
createAttributeImpl<Float64>(attr, null_value);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
createAttributeImpl<AttributeType>(attr, null_value);
};
case AttributeUnderlyingType::utDecimal32:
createAttributeImpl<Decimal32>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
case AttributeUnderlyingType::utString:
{
attr.null_values = null_value.get<String>();
if (!sparse)
attr.maps = std::make_unique<CollectionType<StringRef>>();
else
attr.sparse_maps = std::make_unique<SparseCollectionType<StringRef>>();
attr.string_arena = std::make_unique<Arena>();
break;
}
}
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
return attr;
}
template <typename OutputType, typename AttrType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename MapType, typename ValueSetter, typename DefaultValueExtractor>
void HashedDictionary::getItemsAttrImpl(
const AttrType & attr, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
const MapType & attr,
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto rows = ext::size(ids);
for (const auto i : ext::range(0, rows))
{
const auto it = attr.find(ids[i]);
set_value(i, it != attr.end() ? static_cast<OutputType>(second(*it)) : get_default(i));
set_value(i, it != attr.end() ? static_cast<OutputType>(second(*it)) : default_value_extractor[i]);
}
query_count.fetch_add(rows, std::memory_order_relaxed);
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void HashedDictionary::getItemsImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
if (!sparse)
return getItemsAttrImpl<OutputType>(*std::get<CollectionPtrType<AttributeType>>(attribute.maps), ids, set_value, get_default);
return getItemsAttrImpl<OutputType>(*std::get<SparseCollectionPtrType<AttributeType>>(attribute.sparse_maps), ids, set_value, get_default);
return getItemsAttrImpl<AttributeType, OutputType>(*std::get<CollectionPtrType<AttributeType>>(attribute.maps), ids, set_value, default_value_extractor);
return getItemsAttrImpl<AttributeType, OutputType>(*std::get<SparseCollectionPtrType<AttributeType>>(attribute.sparse_maps), ids, set_value, default_value_extractor);
}
@ -740,58 +565,41 @@ bool HashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id
}
}
template <>
bool HashedDictionary::setAttributeValueImpl<String>(Attribute & attribute, const Key id, const String value)
{
const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
return setAttributeValueImpl<StringRef>(attribute, id, StringRef{string_in_arena, value.size()});
}
bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
{
switch (attribute.type)
bool result = false;
auto type_call = [&, this](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
return setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>());
case AttributeUnderlyingType::utUInt16:
return setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>());
case AttributeUnderlyingType::utUInt32:
return setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>());
case AttributeUnderlyingType::utUInt64:
return setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>());
case AttributeUnderlyingType::utUInt128:
return setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>());
case AttributeUnderlyingType::utInt8:
return setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>());
case AttributeUnderlyingType::utInt16:
return setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>());
case AttributeUnderlyingType::utInt32:
return setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>());
case AttributeUnderlyingType::utInt64:
return setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>());
case AttributeUnderlyingType::utFloat32:
return setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>());
case AttributeUnderlyingType::utFloat64:
return setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>());
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
return setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal32>());
case AttributeUnderlyingType::utDecimal64:
return setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal64>());
case AttributeUnderlyingType::utDecimal128:
return setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>());
case AttributeUnderlyingType::utString:
if (attribute.nullable_set)
{
const auto & string = value.get<String>();
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
if (!sparse)
if (value.isNull())
{
auto & map = *std::get<CollectionPtrType<StringRef>>(attribute.maps);
return map.insert({id, StringRef{string_in_arena, string.size()}}).second;
result = attribute.nullable_set->insert(id).second;
return;
}
else
{
auto & map = *std::get<SparseCollectionPtrType<StringRef>>(attribute.sparse_maps);
return map.insert({id, StringRef{string_in_arena, string.size()}}).second;
attribute.nullable_set->erase(id);
}
}
}
throw Exception{"Invalid attribute type", ErrorCodes::BAD_ARGUMENTS};
result = setAttributeValueImpl<AttributeType>(attribute, id, value.get<NearestFieldType<AttributeType>>());
};
callOnDictionaryAttributeType(attribute.type, type_call);
return result;
}
const HashedDictionary::Attribute & HashedDictionary::getAttribute(const std::string & attribute_name) const
@ -810,9 +618,18 @@ void HashedDictionary::has(const Attribute & attribute, const PaddedPODArray<Key
const auto rows = ext::size(ids);
for (const auto i : ext::range(0, rows))
{
out[i] = attr.find(ids[i]) != nullptr;
query_count.fetch_add(rows, std::memory_order_relaxed);
if (attribute.nullable_set && !out[i])
out[i] = attribute.nullable_set->find(ids[i]) != nullptr;
}
}
template <>
void HashedDictionary::has<String>(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
{
has<StringRef>(attribute, ids, out);
}
template <typename T, typename AttrType>
@ -833,50 +650,39 @@ PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds(const Attribute &
return getIdsAttrImpl<T>(*std::get<SparseCollectionPtrType<T>>(attribute.sparse_maps));
}
template <>
PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds<String>(const Attribute & attribute) const
{
return getIds<StringRef>(attribute);
}
PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds() const
{
const auto & attribute = attributes.front();
PaddedPODArray<HashedDictionary::Key> result;
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
return getIds<UInt8>(attribute);
case AttributeUnderlyingType::utUInt16:
return getIds<UInt16>(attribute);
case AttributeUnderlyingType::utUInt32:
return getIds<UInt32>(attribute);
case AttributeUnderlyingType::utUInt64:
return getIds<UInt64>(attribute);
case AttributeUnderlyingType::utUInt128:
return getIds<UInt128>(attribute);
case AttributeUnderlyingType::utInt8:
return getIds<Int8>(attribute);
case AttributeUnderlyingType::utInt16:
return getIds<Int16>(attribute);
case AttributeUnderlyingType::utInt32:
return getIds<Int32>(attribute);
case AttributeUnderlyingType::utInt64:
return getIds<Int64>(attribute);
case AttributeUnderlyingType::utFloat32:
return getIds<Float32>(attribute);
case AttributeUnderlyingType::utFloat64:
return getIds<Float64>(attribute);
case AttributeUnderlyingType::utString:
return getIds<StringRef>(attribute);
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
/// TODO: Check if order is satisfied
result = getIds<AttributeType>(attribute);
case AttributeUnderlyingType::utDecimal32:
return getIds<Decimal32>(attribute);
case AttributeUnderlyingType::utDecimal64:
return getIds<Decimal64>(attribute);
case AttributeUnderlyingType::utDecimal128:
return getIds<Decimal128>(attribute);
}
return PaddedPODArray<Key>();
if (attribute.nullable_set)
{
for (const auto& value: *attribute.nullable_set)
result.push_back(value.getKey());
}
};
callOnDictionaryAttributeType(attribute.type, type_call);
return result;
}
BlockInputStreamPtr HashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<HashedDictionary, Key>;
using BlockInputStreamType = DictionaryBlockInputStream<Key>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds(), column_names);
}

View File

@ -3,15 +3,18 @@
#include <atomic>
#include <memory>
#include <variant>
#include <optional>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Core/Block.h>
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashSet.h>
#include <sparsehash/sparse_hash_map>
#include <ext/range.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryHelpers.h"
/** This dictionary stores all content in a hash table in memory
* (a separate Key -> Value map for each attribute)
@ -20,7 +23,6 @@
namespace DB
{
using BlockPtr = std::shared_ptr<Block>;
class HashedDictionary final : public IDictionary
{
@ -66,77 +68,16 @@ public:
void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override;
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const override;
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
const;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE & def, ResultArrayType<TYPE> & out) \
const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
void isInVectorVector(
const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
@ -162,9 +103,13 @@ private:
template <typename Value>
using SparseCollectionPtrType = std::unique_ptr<SparseCollectionType<Value>>;
using NullableSet = HashSet<Key, DefaultHash<Key>>;
struct Attribute final
{
AttributeUnderlyingType type;
std::optional<NullableSet> nullable_set;
std::variant<
UInt8,
UInt16,
@ -180,7 +125,7 @@ private:
Decimal128,
Float32,
Float64,
String>
StringRef>
null_values;
std::variant<
CollectionPtrType<UInt8>,
@ -235,14 +180,21 @@ private:
template <typename T>
void createAttributeImpl(Attribute & attribute, const Field & null_value);
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
template <typename OutputType, typename AttrType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename MapType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsAttrImpl(
const AttrType & attr, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
const MapType & attr,
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename T>
bool setAttributeValueImpl(Attribute & attribute, const Key id, const T value);

View File

@ -10,6 +10,8 @@
#include <common/StringRef.h>
#include "IDictionarySource.h"
#include <Dictionaries/DictionaryStructure.h>
#include <DataTypes/IDataType.h>
#include <Columns/ColumnsNumber.h>
#include <chrono>
#include <memory>
@ -20,15 +22,31 @@ namespace DB
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
extern const int TYPE_MISMATCH;
}
struct IDictionaryBase;
using DictionaryPtr = std::unique_ptr<IDictionaryBase>;
struct DictionaryStructure;
class ColumnString;
/** DictionaryKeyType provides IDictionary client information about
* which key type is supported by dictionary.
*
* Simple is for dictionaries that support UInt64 key column.
*
* Complex is for dictionaries that support any combination of key columns.
*
* Range is for dictionary that support combination of UInt64 key column,
* and numeric representable range key column.
*/
enum class DictionaryKeyType
{
simple,
complex,
range
};
/**
* Base class for Dictionaries implementation.
*/
struct IDictionaryBase : public IExternalLoadable
{
using Key = UInt64;
@ -85,6 +103,33 @@ struct IDictionaryBase : public IExternalLoadable
virtual bool isInjective(const std::string & attribute_name) const = 0;
/** Subclass must provide key type that is supported by dictionary.
* Client will use that key type to provide valid key columns for `getColumn` and `has` functions.
*/
virtual DictionaryKeyType getKeyType() const = 0;
/** Subclass must validate key columns and keys types
* and return column representation of dictionary attribute.
*
* Parameter default_values_column must be used to provide default values
* for keys that are not in dictionary. If null pointer is passed,
* then default attribute value must be used.
*/
virtual ColumnPtr getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const = 0;
/** Subclass must validate key columns and key types and return ColumnUInt8 that
* is bitmask representation of is key in dictionary or not.
* If key is in dictionary then value of associated row will be 1, otherwise 0.
*/
virtual ColumnUInt8::Ptr hasKeys(
const Columns & key_columns,
const DataTypes & key_types) const = 0;
virtual BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const = 0;
bool supportUpdates() const override { return true; }
@ -115,7 +160,6 @@ protected:
const String full_name;
};
struct IDictionary : IDictionaryBase
{
IDictionary(const StorageID & dict_id_) : IDictionaryBase(dict_id_) {}
@ -124,8 +168,7 @@ struct IDictionary : IDictionaryBase
virtual void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const = 0;
virtual void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const = 0;
/// TODO: Rewrite
/// Methods for hierarchy.
virtual void isInVectorVector(
@ -157,14 +200,4 @@ struct IDictionary : IDictionaryBase
}
};
/// Implicit conversions in dictGet functions is disabled.
inline void checkAttributeType(const IDictionaryBase * dictionary, const std::string & attribute_name,
AttributeUnderlyingType attribute_type, AttributeUnderlyingType to)
{
if (attribute_type != to)
throw Exception{ErrorCodes::TYPE_MISMATCH, "{}: type mismatch: attribute {} has type {}, expected {}",
dictionary->getDictionaryID().getNameForLogs(),
attribute_name, toString(attribute_type), toString(to)};
}
}

View File

@ -8,6 +8,7 @@
#include <Common/typeid_cast.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesDecimal.h>
#include <IO/WriteIntText.h>
#include <Poco/ByteOrder.h>
#include <Common/formatIPv6.h>
@ -16,6 +17,7 @@
#include <ext/range.h>
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
#include <Functions/FunctionHelpers.h>
namespace DB
{
@ -266,167 +268,75 @@ IPAddressDictionary::IPAddressDictionary(
calculateBytesAllocated();
}
#define DECLARE(TYPE) \
void IPAddressDictionary::get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
{ \
validateKeyTypes(key_types); \
\
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
const auto null_value = std::get<TYPE>(attribute.null_values); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, \
key_columns, \
[&](const size_t row, const auto value) { out[row] = value; }, \
[&](const size_t) { return null_value; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void IPAddressDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
{
validateKeyTypes(key_types);
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return null_value; });
}
#define DECLARE(TYPE) \
void IPAddressDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
validateKeyTypes(key_types); \
\
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, \
key_columns, \
[&](const size_t row, const auto value) { out[row] = value; }, \
[&](const size_t row) { return def[row]; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void IPAddressDictionary::getString(
ColumnPtr IPAddressDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const
const ColumnPtr default_values_column) const
{
validateKeyTypes(key_types);
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t row) { return def->getDataAt(row); });
}
#define DECLARE(TYPE) \
void IPAddressDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const \
{ \
validateKeyTypes(key_types); \
\
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void IPAddressDictionary::getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const
{
validateKeyTypes(key_types);
ColumnPtr result;
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return StringRef{def}; });
auto size = key_columns.front()->size();
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
const auto & null_value = std::get<AttributeType>(attribute.null_values);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, size);
if constexpr (std::is_same_v<AttributeType, String>)
{
auto * out = column.get();
getItemsImpl<ValueType, ValueType>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsImpl<ValueType, ValueType>(
attribute,
key_columns,
[&](const size_t row, const auto value) { return out[row] = value; },
default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
return result;
}
void IPAddressDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
ColumnUInt8::Ptr IPAddressDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
{
validateKeyTypes(key_types);
const auto first_column = key_columns.front();
const auto rows = first_column->size();
auto result = ColumnUInt8::create(rows);
auto& out = result->getData();
if (first_column->isNumeric())
{
uint8_t addrv6_buf[IPV6_BINARY_LENGTH];
@ -451,6 +361,8 @@ void IPAddressDictionary::has(const Columns & key_columns, const DataTypes & key
}
query_count.fetch_add(rows, std::memory_order_relaxed);
return result;
}
void IPAddressDictionary::createAttributes()
@ -652,6 +564,13 @@ void IPAddressDictionary::addAttributeSize(const Attribute & attribute)
bucket_count = vec.size();
}
template <>
void IPAddressDictionary::addAttributeSize<String>(const Attribute & attribute)
{
addAttributeSize<StringRef>(attribute);
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
}
void IPAddressDictionary::calculateBytesAllocated()
{
if (auto * ipv4_col = std::get_if<IPv4Container>(&ip_column))
@ -669,64 +588,18 @@ void IPAddressDictionary::calculateBytesAllocated()
for (const auto & attribute : attributes)
{
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
addAttributeSize<UInt8>(attribute);
break;
case AttributeUnderlyingType::utUInt16:
addAttributeSize<UInt16>(attribute);
break;
case AttributeUnderlyingType::utUInt32:
addAttributeSize<UInt32>(attribute);
break;
case AttributeUnderlyingType::utUInt64:
addAttributeSize<UInt64>(attribute);
break;
case AttributeUnderlyingType::utUInt128:
addAttributeSize<UInt128>(attribute);
break;
case AttributeUnderlyingType::utInt8:
addAttributeSize<Int8>(attribute);
break;
case AttributeUnderlyingType::utInt16:
addAttributeSize<Int16>(attribute);
break;
case AttributeUnderlyingType::utInt32:
addAttributeSize<Int32>(attribute);
break;
case AttributeUnderlyingType::utInt64:
addAttributeSize<Int64>(attribute);
break;
case AttributeUnderlyingType::utFloat32:
addAttributeSize<Float32>(attribute);
break;
case AttributeUnderlyingType::utFloat64:
addAttributeSize<Float64>(attribute);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
addAttributeSize<Decimal32>(attribute);
break;
case AttributeUnderlyingType::utDecimal64:
addAttributeSize<Decimal64>(attribute);
break;
case AttributeUnderlyingType::utDecimal128:
addAttributeSize<Decimal128>(attribute);
break;
addAttributeSize<AttributeType>(attribute);
};
case AttributeUnderlyingType::utString:
{
addAttributeSize<StringRef>(attribute);
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
break;
}
}
callOnDictionaryAttributeType(attribute.type, type_call);
}
}
template <typename T>
void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
@ -734,65 +607,27 @@ void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field
attribute.maps.emplace<ContainerType<T>>();
}
template <>
void IPAddressDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
{
attribute.null_values = null_value.isNull() ? String() : null_value.get<String>();
attribute.maps.emplace<ContainerType<StringRef>>();
attribute.string_arena = std::make_unique<Arena>();
}
IPAddressDictionary::Attribute IPAddressDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
{
Attribute attr{type, {}, {}, {}};
switch (type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
createAttributeImpl<UInt8>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt16:
createAttributeImpl<UInt16>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt32:
createAttributeImpl<UInt32>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt64:
createAttributeImpl<UInt64>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::utInt8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::utInt16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::utInt32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::utInt64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat64:
createAttributeImpl<Float64>(attr, null_value);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
createAttributeImpl<Decimal32>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
createAttributeImpl<AttributeType>(attr, null_value);
};
case AttributeUnderlyingType::utString:
{
attr.null_values = null_value.isNull() ? String() : null_value.get<String>();
attr.maps.emplace<ContainerType<StringRef>>();
attr.string_arena = std::make_unique<Arena>();
break;
}
}
callOnDictionaryAttributeType(type, type_call);
return attr;
}
@ -802,9 +637,12 @@ const uint8_t * IPAddressDictionary::getIPv6FromOffset(const IPAddressDictionary
return reinterpret_cast<const uint8_t *>(&ipv6_col[i * IPV6_BINARY_LENGTH]);
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto first_column = key_columns.front();
const auto rows = first_column->size();
@ -841,7 +679,7 @@ void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
set_value(i, static_cast<OutputType>(vec[row_idx[*found_it]]));
}
else
set_value(i, get_default(i));
set_value(i, default_value_extractor[i]);
}
return;
}
@ -876,13 +714,16 @@ void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
mask_column[*found_it] == mask))
set_value(i, static_cast<OutputType>(vec[row_idx[*found_it]]));
else
set_value(i, get_default(i));
set_value(i, default_value_extractor[i]);
}
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void IPAddressDictionary::getItemsImpl(
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto first_column = key_columns.front();
const auto rows = first_column->size();
@ -891,7 +732,7 @@ void IPAddressDictionary::getItemsImpl(
if (unlikely(key_columns.size() == 2))
{
getItemsByTwoKeyColumnsImpl<AttributeType, OutputType>(
attribute, key_columns, std::forward<ValueSetter>(set_value), std::forward<DefaultGetter>(get_default));
attribute, key_columns, std::forward<ValueSetter>(set_value), default_value_extractor);
query_count.fetch_add(rows, std::memory_order_relaxed);
return;
}
@ -909,7 +750,7 @@ void IPAddressDictionary::getItemsImpl(
if (found != ipNotFound())
set_value(i, static_cast<OutputType>(vec[*found]));
else
set_value(i, get_default(i));
set_value(i, default_value_extractor[i]);
}
}
else
@ -924,7 +765,7 @@ void IPAddressDictionary::getItemsImpl(
if (found != ipNotFound())
set_value(i, static_cast<OutputType>(vec[*found]));
else
set_value(i, get_default(i));
set_value(i, default_value_extractor[i]);
}
}
@ -940,45 +781,24 @@ void IPAddressDictionary::setAttributeValueImpl(Attribute & attribute, const T v
void IPAddressDictionary::setAttributeValue(Attribute & attribute, const Field & value)
{
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
return setAttributeValueImpl<UInt8>(attribute, value.get<UInt64>());
case AttributeUnderlyingType::utUInt16:
return setAttributeValueImpl<UInt16>(attribute, value.get<UInt64>());
case AttributeUnderlyingType::utUInt32:
return setAttributeValueImpl<UInt32>(attribute, value.get<UInt64>());
case AttributeUnderlyingType::utUInt64:
return setAttributeValueImpl<UInt64>(attribute, value.get<UInt64>());
case AttributeUnderlyingType::utUInt128:
return setAttributeValueImpl<UInt128>(attribute, value.get<UInt128>());
case AttributeUnderlyingType::utInt8:
return setAttributeValueImpl<Int8>(attribute, value.get<Int64>());
case AttributeUnderlyingType::utInt16:
return setAttributeValueImpl<Int16>(attribute, value.get<Int64>());
case AttributeUnderlyingType::utInt32:
return setAttributeValueImpl<Int32>(attribute, value.get<Int64>());
case AttributeUnderlyingType::utInt64:
return setAttributeValueImpl<Int64>(attribute, value.get<Int64>());
case AttributeUnderlyingType::utFloat32:
return setAttributeValueImpl<Float32>(attribute, value.get<Float64>());
case AttributeUnderlyingType::utFloat64:
return setAttributeValueImpl<Float64>(attribute, value.get<Float64>());
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
return setAttributeValueImpl<Decimal32>(attribute, value.get<Decimal32>());
case AttributeUnderlyingType::utDecimal64:
return setAttributeValueImpl<Decimal64>(attribute, value.get<Decimal64>());
case AttributeUnderlyingType::utDecimal128:
return setAttributeValueImpl<Decimal128>(attribute, value.get<Decimal128>());
case AttributeUnderlyingType::utString:
if constexpr (std::is_same_v<AttributeType, String>)
{
const auto & string = value.get<String>();
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
return setAttributeValueImpl<StringRef>(attribute, StringRef{string_in_arena, string.size()});
setAttributeValueImpl<StringRef>(attribute, StringRef{string_in_arena, string.size()});
}
}
else
{
setAttributeValueImpl<AttributeType>(attribute, value.get<NearestFieldType<AttributeType>>());
}
};
callOnDictionaryAttributeType(attribute.type, type_call);
}
const IPAddressDictionary::Attribute & IPAddressDictionary::getAttribute(const std::string & attribute_name) const
@ -1045,7 +865,7 @@ static auto keyViewGetter()
BlockInputStreamPtr IPAddressDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<IPAddressDictionary, UInt64>;
using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
const bool is_ipv4 = std::get_if<IPv4Container>(&ip_column) != nullptr;

View File

@ -16,6 +16,7 @@
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryHelpers.h"
namespace DB
{
@ -61,91 +62,16 @@ public:
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
}
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name,
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const;
const ColumnPtr default_values_column) const override;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const;
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
@ -211,17 +137,23 @@ private:
void calculateBytesAllocated();
template <typename T>
void createAttributeImpl(Attribute & attribute, const Field & null_value);
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
static Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsByTwoKeyColumnsImpl(
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void
getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename T>
void setAttributeValueImpl(Attribute & attribute, const T value);

View File

@ -5,6 +5,8 @@
#include <Columns/ColumnArray.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeArray.h>
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypesDecimal.h>
#include <numeric>
@ -92,6 +94,61 @@ bool IPolygonDictionary::isInjective(const std::string &) const
return false;
}
ColumnPtr IPolygonDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes &,
const ColumnPtr default_values_column) const
{
ColumnPtr result;
const auto index = getAttributeIndex(attribute_name);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
auto keys_size = key_columns.front()->size();
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
const auto & null_value = std::get<AttributeType>(null_values[index]);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<AttributeType, String>)
{
auto column_string = ColumnString::create();
auto * out = column.get();
getItemsImpl<String, StringRef>(
index,
key_columns,
[&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); },
default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsImpl<AttributeType, AttributeType>(
index,
key_columns,
[&](const size_t row, const auto value) { return out[row] = value; },
default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
return result;
}
BlockInputStreamPtr IPolygonDictionary::getBlockInputStream(const Names &, size_t) const
{
// TODO: In order for this to work one would first have to support retrieving arrays from dictionaries.
@ -255,8 +312,12 @@ std::vector<IPolygonDictionary::Point> IPolygonDictionary::extractPoints(const C
return result;
}
void IPolygonDictionary::has(const Columns & key_columns, const DataTypes &, PaddedPODArray<UInt8> & out) const
ColumnUInt8::Ptr IPolygonDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
{
auto size = key_columns.front()->size();
auto result = ColumnUInt8::create(size);
auto& out = result->getData();
size_t row = 0;
for (const auto & pt : extractPoints(key_columns))
{
@ -266,6 +327,8 @@ void IPolygonDictionary::has(const Columns & key_columns, const DataTypes &, Pad
}
query_count.fetch_add(row, std::memory_order_relaxed);
return result;
}
size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name) const
@ -276,152 +339,12 @@ size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name)
return it->second;
}
#define DECLARE(TYPE) \
void IPolygonDictionary::get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType<TYPE> & out) const \
{ \
const auto ind = getAttributeIndex(attribute_name); \
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
\
const auto null_value = std::get<TYPE>(null_values[ind]); \
\
getItemsImpl<TYPE, TYPE>( \
ind, \
key_columns, \
[&](const size_t row, const auto value) { out[row] = value; }, \
[&](const size_t) { return null_value; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void IPolygonDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ColumnString * out) const
{
const auto ind = getAttributeIndex(attribute_name);
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
const auto & null_value = StringRef{std::get<String>(null_values[ind])};
getItemsImpl<String, StringRef>(
ind,
key_columns,
[&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); },
[&](const size_t) { return null_value; });
}
#define DECLARE(TYPE) \
void IPolygonDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes &, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto ind = getAttributeIndex(attribute_name); \
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
ind, \
key_columns, \
[&](const size_t row, const auto value) { out[row] = value; }, \
[&](const size_t row) { return def[row]; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void IPolygonDictionary::getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes &,
const ColumnString * const def,
ColumnString * const out) const
{
const auto ind = getAttributeIndex(attribute_name);
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
getItemsImpl<String, StringRef>(
ind,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t row) { return def->getDataAt(row); });
}
#define DECLARE(TYPE) \
void IPolygonDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes &, \
const TYPE def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto ind = getAttributeIndex(attribute_name); \
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
ind, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void IPolygonDictionary::getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes &,
const String & def,
ColumnString * const out) const
{
const auto ind = getAttributeIndex(attribute_name);
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
getItemsImpl<String, StringRef>(
ind,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return StringRef{def}; });
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void IPolygonDictionary::getItemsImpl(
size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
size_t attribute_ind,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto points = extractPoints(key_columns);
@ -437,7 +360,7 @@ void IPolygonDictionary::getItemsImpl(
id = ids[id];
if (!found)
{
set_value(i, static_cast<OutputType>(get_default(i)));
set_value(i, static_cast<OutputType>(default_value_extractor[i]));
continue;
}
if constexpr (std::is_same<AttributeType, String>::value)

View File

@ -12,6 +12,7 @@
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryHelpers.h"
namespace DB
{
@ -78,101 +79,19 @@ public:
bool isInjective(const std::string & attribute_name) const override;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const override;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
/** Functions used to retrieve attributes of specific type by key. */
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes &, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes &,
const ColumnString * const def,
ColumnString * const out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes &, \
const TYPE def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const;
/** Checks whether or not a point can be found in one of the polygons in the dictionary.
* The check is performed for multiple points represented by columns of their x and y coordinates.
* The boolean result is written to out.
*/
// TODO: Refactor the whole dictionary design to perform stronger checks, i.e. make this an override.
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
/** Single coordinate type. */
using Coord = Float32;
/** A two-dimensional point in Euclidean coordinates. */
@ -224,8 +143,12 @@ private:
void appendNullValue(AttributeUnderlyingType type, const Field & value);
/** Helper function for retrieving the value of an attribute by key. */
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsImpl(size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
size_t attribute_ind,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
/** A mapping from the names of the attributes to their index in the two vectors defined below. */
std::map<std::string, size_t> attribute_index_by_name;

View File

@ -37,26 +37,6 @@ protected:
Block getBlock(size_t start, size_t length) const override;
private:
template <typename Type>
using DictionaryGetter = void (DictionaryType::*)(
const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, PaddedPODArray<Type> &) const;
template <typename Type>
using DictionaryDecimalGetter = void (DictionaryType::*)(
const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, DecimalPaddedPODArray<Type> &) const;
template <typename AttributeType, typename Getter>
ColumnPtr getColumnFromAttribute(
Getter getter,
const PaddedPODArray<Key> & ids_to_fill,
const PaddedPODArray<Int64> & dates,
const DictionaryAttribute & attribute,
const DictionaryType & concrete_dictionary) const;
ColumnPtr getColumnFromAttributeString(
const PaddedPODArray<Key> & ids_to_fill,
const PaddedPODArray<Int64> & dates,
const DictionaryAttribute & attribute,
const DictionaryType & concrete_dictionary) const;
template <typename T>
ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array) const;
@ -122,41 +102,6 @@ Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getBlock(
return fillBlock(block_ids, block_start_dates, block_end_dates);
}
template <typename DictionaryType, typename RangeType, typename Key>
template <typename AttributeType, typename Getter>
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttribute(
Getter getter,
const PaddedPODArray<Key> & ids_to_fill,
const PaddedPODArray<Int64> & dates,
const DictionaryAttribute & attribute,
const DictionaryType & concrete_dictionary) const
{
if constexpr (IsDecimalNumber<AttributeType>)
{
auto column = ColumnDecimal<AttributeType>::create(ids_to_fill.size(), 0); /// NOTE: There's wrong scale here, but it's unused.
(concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column->getData());
return column;
}
else
{
auto column_vector = ColumnVector<AttributeType>::create(ids_to_fill.size());
(concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column_vector->getData());
return column_vector;
}
}
template <typename DictionaryType, typename RangeType, typename Key>
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttributeString(
const PaddedPODArray<Key> & ids_to_fill,
const PaddedPODArray<Int64> & dates,
const DictionaryAttribute & attribute,
const DictionaryType & concrete_dictionary) const
{
auto column_string = ColumnString::create();
concrete_dictionary.getString(attribute.name, ids_to_fill, dates, column_string.get());
return column_string;
}
template <typename DictionaryType, typename RangeType, typename Key>
template <typename T>
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromPODArray(const PaddedPODArray<T> & array) const
@ -168,7 +113,6 @@ ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getCo
return column_vector;
}
template <typename DictionaryType, typename RangeType, typename Key>
template <typename DictionarySpecialAttributeType, typename T>
void RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::addSpecialColumn(
@ -216,68 +160,24 @@ Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::fillBlock
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
addSpecialColumn(structure.id, std::make_shared<DataTypeUInt64>(), "ID", names, ids_to_fill, columns);
auto ids_column = columns.back().column;
addSpecialColumn(structure.range_min, structure.range_max->type, "Range Start", names, block_start_dates, columns);
addSpecialColumn(structure.range_max, structure.range_max->type, "Range End", names, block_end_dates, columns);
auto date_key = makeDateKey(block_start_dates, block_end_dates);
auto date_column = getColumnFromPODArray(date_key);
for (const auto idx : ext::range(0, structure.attributes.size()))
{
const DictionaryAttribute & attribute = structure.attributes[idx];
if (names.find(attribute.name) != names.end())
{
ColumnPtr column;
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids_to_fill, date_key, attribute, *dictionary)
switch (attribute.underlying_type)
{
case AttributeUnderlyingType::utUInt8:
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
break;
case AttributeUnderlyingType::utUInt16:
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
break;
case AttributeUnderlyingType::utUInt32:
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
break;
case AttributeUnderlyingType::utUInt64:
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
break;
case AttributeUnderlyingType::utUInt128:
GET_COLUMN_FORM_ATTRIBUTE(UInt128);
break;
case AttributeUnderlyingType::utInt8:
GET_COLUMN_FORM_ATTRIBUTE(Int8);
break;
case AttributeUnderlyingType::utInt16:
GET_COLUMN_FORM_ATTRIBUTE(Int16);
break;
case AttributeUnderlyingType::utInt32:
GET_COLUMN_FORM_ATTRIBUTE(Int32);
break;
case AttributeUnderlyingType::utInt64:
GET_COLUMN_FORM_ATTRIBUTE(Int64);
break;
case AttributeUnderlyingType::utFloat32:
GET_COLUMN_FORM_ATTRIBUTE(Float32);
break;
case AttributeUnderlyingType::utFloat64:
GET_COLUMN_FORM_ATTRIBUTE(Float64);
break;
case AttributeUnderlyingType::utDecimal32:
GET_COLUMN_FORM_ATTRIBUTE(Decimal32);
break;
case AttributeUnderlyingType::utDecimal64:
GET_COLUMN_FORM_ATTRIBUTE(Decimal64);
break;
case AttributeUnderlyingType::utDecimal128:
GET_COLUMN_FORM_ATTRIBUTE(Decimal128);
break;
case AttributeUnderlyingType::utString:
column = getColumnFromAttributeString(ids_to_fill, date_key, attribute, *dictionary);
break;
}
#undef GET_COLUMN_FORM_ATTRIBUTE
ColumnPtr column = dictionary->getColumn(
attribute.name,
attribute.type,
{ids_column, date_column},
{std::make_shared<DataTypeUInt64>(), std::make_shared<DataTypeInt64>()},
nullptr);
columns.emplace_back(column, attribute.type, attribute.name);
}
}

View File

@ -5,6 +5,8 @@
#include <ext/range.h>
#include "DictionaryFactory.h"
#include "RangeDictionaryBlockInputStream.h"
#include <Interpreters/castColumn.h>
#include <DataTypes/DataTypesDecimal.h>
namespace
{
@ -50,6 +52,7 @@ namespace ErrorCodes
extern const int DICTIONARY_IS_EMPTY;
extern const int TYPE_MISMATCH;
extern const int UNSUPPORTED_METHOD;
extern const int NOT_IMPLEMENTED;
}
bool RangeHashedDictionary::Range::isCorrectDate(const RangeStorageType & date)
@ -85,66 +88,101 @@ RangeHashedDictionary::RangeHashedDictionary(
calculateBytesAllocated();
}
#define DECLARE_MULTIPLE_GETTER(TYPE) \
void RangeHashedDictionary::get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<RangeStorageType> & dates, \
ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::ut##TYPE); \
getItems<TYPE>(attribute, ids, dates, out); \
}
DECLARE_MULTIPLE_GETTER(UInt8)
DECLARE_MULTIPLE_GETTER(UInt16)
DECLARE_MULTIPLE_GETTER(UInt32)
DECLARE_MULTIPLE_GETTER(UInt64)
DECLARE_MULTIPLE_GETTER(UInt128)
DECLARE_MULTIPLE_GETTER(Int8)
DECLARE_MULTIPLE_GETTER(Int16)
DECLARE_MULTIPLE_GETTER(Int32)
DECLARE_MULTIPLE_GETTER(Int64)
DECLARE_MULTIPLE_GETTER(Float32)
DECLARE_MULTIPLE_GETTER(Float64)
DECLARE_MULTIPLE_GETTER(Decimal32)
DECLARE_MULTIPLE_GETTER(Decimal64)
DECLARE_MULTIPLE_GETTER(Decimal128)
#undef DECLARE_MULTIPLE_GETTER
void RangeHashedDictionary::getString(
ColumnPtr RangeHashedDictionary::getColumn(
const std::string & attribute_name,
const PaddedPODArray<Key> & ids,
const PaddedPODArray<RangeStorageType> & dates,
ColumnString * out) const
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const
{
const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::utString);
const auto & attr = *std::get<Ptr<StringRef>>(attribute.maps);
const auto & null_value = std::get<String>(attribute.null_values);
ColumnPtr result;
for (const auto i : ext::range(0, ids.size()))
const auto & attribute = getAttribute(attribute_name);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
auto keys_size = key_columns.front()->size();
/// Cast second column to storage type
Columns modified_key_columns = key_columns;
auto range_storage_column = key_columns[1];
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types[1], ""};
auto range_column_storage_type = std::make_shared<DataTypeInt64>();
modified_key_columns[1] = castColumnAccurate(column_to_cast, range_column_storage_type);
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to = nullptr;
if (attribute.is_nullable)
{
const auto * it = attr.find(ids[i]);
if (it)
{
const auto date = dates[i];
const auto & ranges_and_values = it->getMapped();
const auto val_it
= std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<StringRef> & v)
{
return v.range.contains(date);
});
col_null_map_to = ColumnUInt8::create(keys_size, false);
vec_null_map_to = &col_null_map_to->getData();
}
const auto string_ref = val_it != std::end(ranges_and_values) ? val_it->value : StringRef{null_value};
out->insertData(string_ref.data, string_ref.size);
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<AttributeType, String>)
{
auto * out = column.get();
getItemsImpl<ValueType, ValueType>(
attribute,
modified_key_columns,
[&](const size_t row, const StringRef value, bool is_null)
{
if (attribute.is_nullable)
(*vec_null_map_to)[row] = is_null;
out->insertData(value.data, value.size);
},
default_value_extractor);
}
else
out->insertData(null_value.data(), null_value.size());
{
auto & out = column->getData();
getItemsImpl<ValueType, ValueType>(
attribute,
modified_key_columns,
[&](const size_t row, const auto value, bool is_null)
{
if (attribute.is_nullable)
(*vec_null_map_to)[row] = is_null;
out[row] = value;
},
default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
if (attribute.is_nullable)
{
result = ColumnNullable::create(result, std::move(col_null_map_to));
}
query_count.fetch_add(ids.size(), std::memory_order_relaxed);
return result;
}
ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Columns &, const DataTypes &) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"Has not supported", getDictionaryID().getNameForLogs());
}
void RangeHashedDictionary::createAttributes()
{
@ -154,7 +192,7 @@ void RangeHashedDictionary::createAttributes()
for (const auto & attribute : dict_struct.attributes)
{
attribute_index_by_name.emplace(attribute.name, attributes.size());
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
attributes.push_back(createAttribute(attribute, attribute.null_value));
if (attribute.hierarchical)
throw Exception{ErrorCodes::BAD_ARGUMENTS, "Hierarchical attributes not supported by {} dictionary.",
@ -220,66 +258,27 @@ void RangeHashedDictionary::addAttributeSize(const Attribute & attribute)
bucket_count = map_ref->getBufferSizeInCells();
}
template <>
void RangeHashedDictionary::addAttributeSize<String>(const Attribute & attribute)
{
addAttributeSize<StringRef>(attribute);
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
}
void RangeHashedDictionary::calculateBytesAllocated()
{
bytes_allocated += attributes.size() * sizeof(attributes.front());
for (const auto & attribute : attributes)
{
switch (attribute.type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
addAttributeSize<UInt8>(attribute);
break;
case AttributeUnderlyingType::utUInt16:
addAttributeSize<UInt16>(attribute);
break;
case AttributeUnderlyingType::utUInt32:
addAttributeSize<UInt32>(attribute);
break;
case AttributeUnderlyingType::utUInt64:
addAttributeSize<UInt64>(attribute);
break;
case AttributeUnderlyingType::utUInt128:
addAttributeSize<UInt128>(attribute);
break;
case AttributeUnderlyingType::utInt8:
addAttributeSize<Int8>(attribute);
break;
case AttributeUnderlyingType::utInt16:
addAttributeSize<Int16>(attribute);
break;
case AttributeUnderlyingType::utInt32:
addAttributeSize<Int32>(attribute);
break;
case AttributeUnderlyingType::utInt64:
addAttributeSize<Int64>(attribute);
break;
case AttributeUnderlyingType::utFloat32:
addAttributeSize<Float32>(attribute);
break;
case AttributeUnderlyingType::utFloat64:
addAttributeSize<Float64>(attribute);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
addAttributeSize<AttributeType>(attribute);
};
case AttributeUnderlyingType::utDecimal32:
addAttributeSize<Decimal32>(attribute);
break;
case AttributeUnderlyingType::utDecimal64:
addAttributeSize<Decimal64>(attribute);
break;
case AttributeUnderlyingType::utDecimal128:
addAttributeSize<Decimal128>(attribute);
break;
case AttributeUnderlyingType::utString:
{
addAttributeSize<StringRef>(attribute);
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
break;
}
}
callOnDictionaryAttributeType(attribute.type, type_call);
}
}
@ -290,125 +289,80 @@ void RangeHashedDictionary::createAttributeImpl(Attribute & attribute, const Fie
attribute.maps = std::make_unique<Collection<T>>();
}
RangeHashedDictionary::Attribute
RangeHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
template <>
void RangeHashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
{
Attribute attr{type, {}, {}, {}};
attribute.string_arena = std::make_unique<Arena>();
const String & string = null_value.get<String>();
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
attribute.maps = std::make_unique<Collection<StringRef>>();
}
switch (type)
RangeHashedDictionary::Attribute
RangeHashedDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
{
Attribute attr{attribute.underlying_type, attribute.is_nullable, {}, {}, {}};
auto type_call = [&](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
createAttributeImpl<UInt8>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt16:
createAttributeImpl<UInt16>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt32:
createAttributeImpl<UInt32>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt64:
createAttributeImpl<UInt64>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::utInt8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::utInt16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::utInt32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::utInt64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat64:
createAttributeImpl<Float64>(attr, null_value);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
createAttributeImpl<AttributeType>(attr, null_value);
};
case AttributeUnderlyingType::utDecimal32:
createAttributeImpl<Decimal32>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
case AttributeUnderlyingType::utString:
{
attr.null_values = null_value.get<String>();
attr.maps = std::make_unique<Collection<StringRef>>();
attr.string_arena = std::make_unique<Arena>();
break;
}
}
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
return attr;
}
template <typename OutputType>
void RangeHashedDictionary::getItems(
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
const PaddedPODArray<RangeStorageType> & dates,
PaddedPODArray<OutputType> & out) const
{
if (false) {} // NOLINT
#define DISPATCH(TYPE) else if (attribute.type == AttributeUnderlyingType::ut##TYPE) getItemsImpl<TYPE, OutputType>(attribute, ids, dates, out);
DISPATCH(UInt8)
DISPATCH(UInt16)
DISPATCH(UInt32)
DISPATCH(UInt64)
DISPATCH(UInt128)
DISPATCH(Int8)
DISPATCH(Int16)
DISPATCH(Int32)
DISPATCH(Int64)
DISPATCH(Float32)
DISPATCH(Float64)
DISPATCH(Decimal32)
DISPATCH(Decimal64)
DISPATCH(Decimal128)
#undef DISPATCH
else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
}
template <typename AttributeType, typename OutputType>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void RangeHashedDictionary::getItemsImpl(
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
const PaddedPODArray<RangeStorageType> & dates,
PaddedPODArray<OutputType> & out) const
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
const auto null_value = std::get<AttributeType>(attribute.null_values);
PaddedPODArray<Key> key_backup_storage;
PaddedPODArray<RangeStorageType> range_backup_storage;
for (const auto i : ext::range(0, ids.size()))
const PaddedPODArray<Key> & ids = getColumnVectorData(this, key_columns[0], key_backup_storage);
const PaddedPODArray<RangeStorageType> & dates = getColumnVectorData(this, key_columns[1], range_backup_storage);
const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
for (const auto row : ext::range(0, ids.size()))
{
const auto it = attr.find(ids[i]);
const auto it = attr.find(ids[row]);
if (it)
{
const auto date = dates[i];
const auto date = dates[row];
const auto & ranges_and_values = it->getMapped();
const auto val_it
= std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<AttributeType> & v)
{
return v.range.contains(date);
});
const auto val_it = std::find_if(
std::begin(ranges_and_values),
std::end(ranges_and_values),
[date](const Value<AttributeType> & v)
{
return v.range.contains(date);
});
out[i] = static_cast<OutputType>(val_it != std::end(ranges_and_values) ? val_it->value : null_value); // NOLINT
if (val_it != std::end(ranges_and_values))
{
auto& value = val_it->value;
if (value)
set_value(row, static_cast<OutputType>(*value), false); // NOLINT
else
set_value(row, default_value_extractor[row], true);
}
else
{
set_value(row, default_value_extractor[row], false);
}
}
else
{
out[i] = static_cast<OutputType>(null_value); // NOLINT
set_value(row, default_value_extractor[row], false);
}
}
@ -417,9 +371,32 @@ void RangeHashedDictionary::getItemsImpl(
template <typename T>
void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const T value)
void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const Field & value)
{
auto & map = *std::get<Ptr<T>>(attribute.maps);
using ValueType = std::conditional_t<std::is_same_v<T, String>, StringRef, T>;
auto & map = *std::get<Ptr<ValueType>>(attribute.maps);
Value<ValueType> value_to_insert;
if (attribute.is_nullable && value.isNull())
{
value_to_insert = { range, {} };
}
else
{
if constexpr (std::is_same_v<T, String>)
{
const auto & string = value.get<String>();
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
const StringRef string_ref{string_in_arena, string.size()};
value_to_insert = Value<ValueType>{ range, { string_ref }};
}
else
{
value_to_insert = Value<ValueType>{ range, { value.get<NearestFieldType<ValueType>>() }};
}
}
const auto it = map.find(id);
if (it)
@ -427,92 +404,28 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const K
auto & values = it->getMapped();
const auto insert_it
= std::lower_bound(std::begin(values), std::end(values), range, [](const Value<T> & lhs, const Range & rhs_range)
= std::lower_bound(std::begin(values), std::end(values), range, [](const Value<ValueType> & lhs, const Range & rhs_range)
{
return lhs.range < rhs_range;
});
values.insert(insert_it, Value<T>{range, value});
values.insert(insert_it, std::move(value_to_insert));
}
else
map.insert({id, Values<T>{Value<T>{range, value}}});
map.insert({id, Values<ValueType>{std::move(value_to_insert)}});
}
void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value)
{
switch (attribute.type)
auto type_call = [&](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
setAttributeValueImpl<UInt8>(attribute, id, range, value.get<UInt64>());
break;
case AttributeUnderlyingType::utUInt16:
setAttributeValueImpl<UInt16>(attribute, id, range, value.get<UInt64>());
break;
case AttributeUnderlyingType::utUInt32:
setAttributeValueImpl<UInt32>(attribute, id, range, value.get<UInt64>());
break;
case AttributeUnderlyingType::utUInt64:
setAttributeValueImpl<UInt64>(attribute, id, range, value.get<UInt64>());
break;
case AttributeUnderlyingType::utUInt128:
setAttributeValueImpl<UInt128>(attribute, id, range, value.get<UInt128>());
break;
case AttributeUnderlyingType::utInt8:
setAttributeValueImpl<Int8>(attribute, id, range, value.get<Int64>());
break;
case AttributeUnderlyingType::utInt16:
setAttributeValueImpl<Int16>(attribute, id, range, value.get<Int64>());
break;
case AttributeUnderlyingType::utInt32:
setAttributeValueImpl<Int32>(attribute, id, range, value.get<Int64>());
break;
case AttributeUnderlyingType::utInt64:
setAttributeValueImpl<Int64>(attribute, id, range, value.get<Int64>());
break;
case AttributeUnderlyingType::utFloat32:
setAttributeValueImpl<Float32>(attribute, id, range, value.get<Float64>());
break;
case AttributeUnderlyingType::utFloat64:
setAttributeValueImpl<Float64>(attribute, id, range, value.get<Float64>());
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
setAttributeValueImpl<Decimal32>(attribute, id, range, value.get<Decimal32>());
break;
case AttributeUnderlyingType::utDecimal64:
setAttributeValueImpl<Decimal64>(attribute, id, range, value.get<Decimal64>());
break;
case AttributeUnderlyingType::utDecimal128:
setAttributeValueImpl<Decimal128>(attribute, id, range, value.get<Decimal128>());
break;
setAttributeValueImpl<AttributeType>(attribute, id, range, value);
};
case AttributeUnderlyingType::utString:
{
auto & map = *std::get<Ptr<StringRef>>(attribute.maps);
const auto & string = value.get<String>();
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
const StringRef string_ref{string_in_arena, string.size()};
auto * it = map.find(id);
if (it)
{
auto & values = it->getMapped();
const auto insert_it = std::lower_bound(
std::begin(values), std::end(values), range, [](const Value<StringRef> & lhs, const Range & rhs_range)
{
return lhs.range < rhs_range;
});
values.insert(insert_it, Value<StringRef>{range, string_ref});
}
else
map.insert({id, Values<StringRef>{Value<StringRef>{range, string_ref}}});
break;
}
}
callOnDictionaryAttributeType(attribute.type, type_call);
}
const RangeHashedDictionary::Attribute & RangeHashedDictionary::getAttribute(const std::string & attribute_name) const
@ -541,55 +454,18 @@ void RangeHashedDictionary::getIdsAndDates(
{
const auto & attribute = attributes.front();
switch (attribute.type)
auto type_call = [&](const auto &dictionary_attribute_type)
{
case AttributeUnderlyingType::utUInt8:
getIdsAndDates<UInt8>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utUInt16:
getIdsAndDates<UInt16>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utUInt32:
getIdsAndDates<UInt32>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utUInt64:
getIdsAndDates<UInt64>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utUInt128:
getIdsAndDates<UInt128>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utInt8:
getIdsAndDates<Int8>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utInt16:
getIdsAndDates<Int16>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utInt32:
getIdsAndDates<Int32>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utInt64:
getIdsAndDates<Int64>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utFloat32:
getIdsAndDates<Float32>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utFloat64:
getIdsAndDates<Float64>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utString:
getIdsAndDates<StringRef>(attribute, ids, start_dates, end_dates);
break;
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utDecimal32:
getIdsAndDates<Decimal32>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utDecimal64:
getIdsAndDates<Decimal64>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::utDecimal128:
getIdsAndDates<Decimal128>(attribute, ids, start_dates, end_dates);
break;
}
if constexpr (std::is_same_v<AttributeType, String>)
getIdsAndDates<StringRef>(attribute, ids, start_dates, end_dates);
else
getIdsAndDates<AttributeType>(attribute, ids, start_dates, end_dates);
};
callOnDictionaryAttributeType(attribute.type, type_call);
}
template <typename T, typename RangeType>

View File

@ -1,16 +1,18 @@
#pragma once
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Common/HashTable/HashMap.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include <atomic>
#include <memory>
#include <variant>
#include <optional>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashSet.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryHelpers.h"
namespace DB
{
@ -52,38 +54,18 @@ public:
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
}
typedef Int64 RangeStorageType;
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::range; }
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const override;
#define DECLARE_MULTIPLE_GETTER(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<RangeStorageType> & dates, \
ResultArrayType<TYPE> & out) const;
DECLARE_MULTIPLE_GETTER(UInt8)
DECLARE_MULTIPLE_GETTER(UInt16)
DECLARE_MULTIPLE_GETTER(UInt32)
DECLARE_MULTIPLE_GETTER(UInt64)
DECLARE_MULTIPLE_GETTER(UInt128)
DECLARE_MULTIPLE_GETTER(Int8)
DECLARE_MULTIPLE_GETTER(Int16)
DECLARE_MULTIPLE_GETTER(Int32)
DECLARE_MULTIPLE_GETTER(Int64)
DECLARE_MULTIPLE_GETTER(Float32)
DECLARE_MULTIPLE_GETTER(Float64)
DECLARE_MULTIPLE_GETTER(Decimal32)
DECLARE_MULTIPLE_GETTER(Decimal64)
DECLARE_MULTIPLE_GETTER(Decimal128)
#undef DECLARE_MULTIPLE_GETTER
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
void getString(
const std::string & attribute_name,
const PaddedPODArray<Key> & ids,
const PaddedPODArray<RangeStorageType> & dates,
ColumnString * out) const;
using RangeStorageType = Int64;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
@ -101,7 +83,7 @@ private:
struct Value final
{
Range range;
T value;
std::optional<T> value;
};
template <typename T>
@ -111,10 +93,14 @@ private:
template <typename T>
using Ptr = std::unique_ptr<Collection<T>>;
using NullableSet = HashSet<Key, DefaultHash<Key>>;
struct Attribute final
{
public:
AttributeUnderlyingType type;
bool is_nullable;
std::variant<
UInt8,
UInt16,
@ -130,7 +116,7 @@ private:
Decimal128,
Float32,
Float64,
String>
StringRef>
null_values;
std::variant<
Ptr<UInt8>,
@ -162,30 +148,21 @@ private:
void calculateBytesAllocated();
template <typename T>
void createAttributeImpl(Attribute & attribute, const Field & null_value);
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
template <typename OutputType>
void getItems(
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
const PaddedPODArray<RangeStorageType> & dates,
PaddedPODArray<OutputType> & out) const;
template <typename AttributeType, typename OutputType>
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
const Attribute & attribute,
const PaddedPODArray<Key> & ids,
const PaddedPODArray<RangeStorageType> & dates,
PaddedPODArray<OutputType> & out) const;
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename T>
void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const T value);
static void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const Field & value);
void setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value);
static void setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value);
const Attribute & getAttribute(const std::string & attribute_name) const;

View File

@ -22,7 +22,8 @@
#include <filesystem>
#include <city.h>
#include <fcntl.h>
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypesDecimal.h>
namespace ProfileEvents
{
@ -445,7 +446,7 @@ void SSDCachePartition::flush()
template <typename Out, typename GetDefault>
void SSDCachePartition::getValue(const size_t attribute_index, const PaddedPODArray<UInt64> & ids,
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & default_value_extractor,
std::chrono::system_clock::time_point now) const
{
auto set_value = [&](const size_t index, ReadBuffer & buf)
@ -456,7 +457,7 @@ void SSDCachePartition::getValue(const size_t attribute_index, const PaddedPODAr
if (metadata.expiresAt() > now)
{
if (metadata.isDefault())
out[index] = get_default(index);
out[index] = default_value_extractor[index];
else
{
ignoreFromBufferToAttributeIndex(attribute_index, buf);
@ -939,14 +940,14 @@ SSDCacheStorage::~SSDCacheStorage()
template <typename Out, typename GetDefault>
void SSDCacheStorage::getValue(const size_t attribute_index, const PaddedPODArray<UInt64> & ids,
ResultArrayType<Out> & out, std::unordered_map<Key, std::vector<size_t>> & not_found,
GetDefault & get_default, std::chrono::system_clock::time_point now) const
GetDefault & default_value_extractor, std::chrono::system_clock::time_point now) const
{
std::vector<bool> found(ids.size(), false);
{
std::shared_lock lock(rw_lock);
for (const auto & partition : partitions)
partition->getValue<Out>(attribute_index, ids, out, found, get_default, now);
partition->getValue<Out>(attribute_index, ids, out, found, default_value_extractor, now);
}
for (size_t i = 0; i < ids.size(); ++i)
@ -1327,102 +1328,62 @@ SSDCacheDictionary::SSDCacheDictionary(
createAttributes();
}
#define DECLARE(TYPE) \
void SSDCacheDictionary::get##TYPE( \
const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \
{ \
const auto index = getAttributeIndex(attribute_name); \
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
const auto null_value = std::get<TYPE>(null_values[index]); /* NOLINT */ \
getItemsNumberImpl<TYPE, TYPE>(index, ids, out, [&](const size_t) { return null_value; }); /* NOLINT */ \
}
ColumnPtr SSDCacheDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes &,
const ColumnPtr default_values_column) const
{
ColumnPtr result;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
PaddedPODArray<Key> backup_storage;
const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
auto keys_size = ids.size();
#define DECLARE(TYPE) \
void SSDCacheDictionary::get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto index = getAttributeIndex(attribute_name); \
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
getItemsNumberImpl<TYPE, TYPE>( \
index, \
ids, \
out, \
[&](const size_t row) { return def[row]; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
const auto index = getAttributeIndex(attribute_name);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
#define DECLARE(TYPE) \
void SSDCacheDictionary::get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const TYPE def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto index = getAttributeIndex(attribute_name); \
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
getItemsNumberImpl<TYPE, TYPE>( \
index, \
ids, \
out, \
[&](const size_t) { return def; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
const auto & null_value = std::get<AttributeType>(null_values[index]);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<AttributeType, String>)
{
getItemsStringImpl(index, ids, column.get(), default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsNumberImpl<AttributeType, AttributeType>(index, ids, out, default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
return result;
}
template <typename AttributeType, typename OutputType, typename DefaultGetter>
void SSDCacheDictionary::getItemsNumberImpl(
const size_t attribute_index, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
const size_t attribute_index,
const PaddedPODArray<Key> & ids,
ResultArrayType<OutputType> & out,
DefaultGetter & default_value_extractor) const
{
const auto now = std::chrono::system_clock::now();
std::unordered_map<Key, std::vector<size_t>> not_found_ids;
storage.getValue<OutputType>(attribute_index, ids, out, not_found_ids, get_default, now);
storage.getValue<OutputType>(attribute_index, ids, out, not_found_ids, default_value_extractor, now);
if (not_found_ids.empty())
return;
@ -1440,42 +1401,17 @@ void SSDCacheDictionary::getItemsNumberImpl(
[&](const size_t id)
{
for (const size_t row : not_found_ids[id])
out[row] = get_default(row);
out[row] = default_value_extractor[row];
},
getLifetime());
}
void SSDCacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
{
const auto index = getAttributeIndex(attribute_name);
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
const auto null_value = StringRef{std::get<String>(null_values[index])};
getItemsStringImpl(index, ids, out, [&](const size_t) { return null_value; });
}
void SSDCacheDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
{
const auto index = getAttributeIndex(attribute_name);
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
getItemsStringImpl(index, ids, out, [&](const size_t row) { return def->getDataAt(row); });
}
void SSDCacheDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
{
const auto index = getAttributeIndex(attribute_name);
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
getItemsStringImpl(index, ids, out, [&](const size_t) { return StringRef{def}; });
}
template <typename DefaultGetter>
void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const PaddedPODArray<Key> & ids,
ColumnString * out, DefaultGetter && get_default) const
void SSDCacheDictionary::getItemsStringImpl(
const size_t attribute_index,
const PaddedPODArray<Key> & ids,
ColumnString * out,
DefaultGetter & default_value_extractor) const
{
const auto now = std::chrono::system_clock::now();
@ -1494,7 +1430,7 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const
{
if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
{
auto to_insert = get_default(row);
auto to_insert = default_value_extractor[row];
out->insertData(to_insert.data, to_insert.size);
++default_index;
}
@ -1525,7 +1461,7 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const
const auto & id = ids[row];
if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
{
auto to_insert = get_default(row);
auto to_insert = default_value_extractor[row];
out->insertData(to_insert.data, to_insert.size);
++default_index;
}
@ -1539,20 +1475,30 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const
}
else
{
auto to_insert = get_default(row);
auto to_insert = default_value_extractor[row];
out->insertData(to_insert.data, to_insert.size);
}
}
}
void SSDCacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
ColumnUInt8::Ptr SSDCacheDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
{
PaddedPODArray<Key> backup_storage;
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
auto result = ColumnUInt8::create(ext::size(ids));
auto& out = result->getData();
const auto rows = ext::size(ids);
for (const auto row : ext::range(0, rows))
out[row] = false;
const auto now = std::chrono::system_clock::now();
std::unordered_map<Key, std::vector<size_t>> not_found_ids;
storage.has(ids, out, not_found_ids, now);
if (not_found_ids.empty())
return;
return result;
std::vector<Key> required_ids(not_found_ids.size());
std::transform(std::begin(not_found_ids), std::end(not_found_ids), std::begin(required_ids), [](const auto & pair) { return pair.first; });
@ -1571,11 +1517,13 @@ void SSDCacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UIn
out[row] = false;
},
getLifetime());
return result;
}
BlockInputStreamPtr SSDCacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<SSDCacheDictionary, Key>;
using BlockInputStreamType = DictionaryBlockInputStream<Key>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, storage.getCachedIds(), column_names);
}

View File

@ -2,11 +2,15 @@
#if defined(__linux__) || defined(__FreeBSD__)
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include <atomic>
#include <chrono>
#include <list>
#include <shared_mutex>
#include <variant>
#include <vector>
#include <Poco/Logger.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Common/ArenaWithFreeLists.h>
@ -16,12 +20,11 @@
#include <Core/Block.h>
#include <Dictionaries/BucketCache.h>
#include <IO/HashingWriteBuffer.h>
#include <list>
#include <pcg_random.hpp>
#include <Poco/Logger.h>
#include <shared_mutex>
#include <variant>
#include <vector>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryHelpers.h"
namespace DB
{
@ -109,7 +112,7 @@ public:
template <typename Out, typename GetDefault>
void getValue(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & default_value_extractor,
std::chrono::system_clock::time_point now) const;
void getString(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
@ -232,7 +235,7 @@ public:
template <typename Out, typename GetDefault>
void getValue(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
ResultArrayType<Out> & out, std::unordered_map<Key, std::vector<size_t>> & not_found,
GetDefault & get_default, std::chrono::system_clock::time_point now) const;
GetDefault & default_value_extractor, std::chrono::system_clock::time_point now) const;
void getString(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
StringRefs & refs, ArenaWithFreeLists & arena, std::unordered_map<Key, std::vector<size_t>> & not_found,
@ -351,77 +354,20 @@ public:
std::exception_ptr getLastException() const override { return storage.getLastException(); }
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const override;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
template <typename T>
using ResultArrayType = SSDCacheStorage::ResultArrayType<T>;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * def, ColumnString * out)
const;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * out) const;
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
@ -434,11 +380,17 @@ private:
template <typename AttributeType, typename OutputType, typename DefaultGetter>
void getItemsNumberImpl(
size_t attribute_index, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
size_t attribute_index,
const PaddedPODArray<Key> & ids,
ResultArrayType<OutputType> & out,
DefaultGetter & default_value_extractor) const;
template <typename DefaultGetter>
void getItemsStringImpl(size_t attribute_index, const PaddedPODArray<Key> & ids,
ColumnString * out, DefaultGetter && get_default) const;
void getItemsStringImpl(
size_t attribute_index,
const PaddedPODArray<Key> & ids,
ColumnString * out,
DefaultGetter & default_value_extractor) const;
const std::string name;
const DictionaryStructure dict_struct;

View File

@ -9,6 +9,7 @@
#include <Common/ProfilingScopedRWLock.h>
#include <Common/MemorySanitizer.h>
#include <DataStreams/IBlockInputStream.h>
#include <DataTypes/DataTypesDecimal.h>
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
#include <IO/AIO.h>
@ -23,7 +24,7 @@
#include <filesystem>
#include <city.h>
#include <fcntl.h>
#include <Functions/FunctionHelpers.h>
namespace ProfileEvents
{
@ -461,8 +462,12 @@ void SSDComplexKeyCachePartition::flush()
template <typename Out, typename GetDefault>
void SSDComplexKeyCachePartition::getValue(
const size_t attribute_index, const Columns & key_columns, const DataTypes & key_types,
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
const size_t attribute_index,
const Columns & key_columns,
const DataTypes & key_types,
ResultArrayType<Out> & out,
std::vector<bool> & found,
GetDefault & default_value_extractor,
std::chrono::system_clock::time_point now) const
{
auto set_value = [&](const size_t index, ReadBuffer & buf)
@ -474,7 +479,7 @@ void SSDComplexKeyCachePartition::getValue(
if (metadata.expiresAt() > now)
{
if (metadata.isDefault())
out[index] = get_default(index);
out[index] = default_value_extractor[index];
else
{
ignoreFromBufferToAttributeIndex(attribute_index, buf);
@ -520,7 +525,7 @@ void SSDComplexKeyCachePartition::getString(const size_t attribute_index,
getImpl(key_columns, key_types, set_value, found);
}
void SSDComplexKeyCachePartition::has(
void SSDComplexKeyCachePartition::hasKeys(
const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
std::vector<bool> & found, std::chrono::system_clock::time_point now) const
{
@ -1018,7 +1023,7 @@ void SSDComplexKeyCacheStorage::getString(
hit_count.fetch_add(n - count_not_found, std::memory_order_release);
}
void SSDComplexKeyCacheStorage::has(
void SSDComplexKeyCacheStorage::hasKeys(
const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
std::unordered_map<KeyRef, std::vector<size_t>> & not_found,
TemporalComplexKeysPool & not_found_pool, std::chrono::system_clock::time_point now) const
@ -1031,7 +1036,7 @@ void SSDComplexKeyCacheStorage::has(
{
std::shared_lock lock(rw_lock);
for (const auto & partition : partitions)
partition->has(key_columns, key_types, out, found, now);
partition->hasKeys(key_columns, key_types, out, found, now);
}
size_t count_not_found = 0;
@ -1376,96 +1381,64 @@ SSDComplexKeyCacheDictionary::SSDComplexKeyCacheDictionary(
createAttributes();
}
#define DECLARE(TYPE) \
void SSDComplexKeyCacheDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
ResultArrayType<TYPE> & out) const \
{ \
const auto index = getAttributeIndex(attribute_name); \
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
const auto null_value = std::get<TYPE>(null_values[index]); /* NOLINT */ \
getItemsNumberImpl<TYPE, TYPE>(index, key_columns, key_types, out, [&](const size_t) { return null_value; }); /* NOLINT */ \
}
ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const
{
ColumnPtr result;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
dict_struct.validateKeyTypes(key_types);
#define DECLARE(TYPE) \
void SSDComplexKeyCacheDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto index = getAttributeIndex(attribute_name); \
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
getItemsNumberImpl<TYPE, TYPE>(index, key_columns, key_types, out, [&](const size_t row) { return def[row]; }); /* NOLINT */ \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
const auto index = getAttributeIndex(attribute_name);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
#define DECLARE(TYPE) \
void SSDComplexKeyCacheDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto index = getAttributeIndex(attribute_name); \
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
getItemsNumberImpl<TYPE, TYPE>(index, key_columns, key_types, out, [&](const size_t) { return def; }); /* NOLINT */ \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
auto keys_size = key_columns.front()->size();
template <typename AttributeType, typename OutputType, typename DefaultGetter>
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
const auto & null_value = std::get<AttributeType>(null_values[index]);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<AttributeType, String>)
{
auto * out = column.get();
getItemsStringImpl(index, key_columns, key_types, out, default_value_extractor);
}
else
{
auto & out = column->getData();
getItemsNumberImpl<AttributeType, AttributeType>(
index,
key_columns,
key_types,
out,
default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
return result;
}
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
const size_t attribute_index,
const Columns & key_columns, const DataTypes & key_types,
ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
const Columns & key_columns,
const DataTypes & key_types,
ResultArrayType<OutputType> & out,
DefaultValueExtractor & default_value_extractor) const
{
assert(dict_struct.key);
assert(key_columns.size() == key_types.size());
@ -1476,7 +1449,7 @@ void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
TemporalComplexKeysPool not_found_pool;
std::unordered_map<KeyRef, std::vector<size_t>> not_found_keys;
storage.getValue<OutputType>(attribute_index, key_columns, key_types, out, not_found_keys, not_found_pool, get_default, now);
storage.getValue<OutputType>(attribute_index, key_columns, key_types, out, not_found_keys, not_found_pool, default_value_extractor, now);
if (not_found_keys.empty())
return;
@ -1503,54 +1476,17 @@ void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
[&](const auto key)
{
for (const size_t row : not_found_keys[key])
out[row] = get_default(row);
out[row] = default_value_extractor[row];
},
getLifetime());
}
void SSDComplexKeyCacheDictionary::getString(
const std::string & attribute_name,
const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
{
const auto index = getAttributeIndex(attribute_name);
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
const auto null_value = StringRef{std::get<String>(null_values[index])};
getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t) { return null_value; });
}
void SSDComplexKeyCacheDictionary::getString(
const std::string & attribute_name,
const Columns & key_columns, const DataTypes & key_types,
const ColumnString * const def, ColumnString * const out) const
{
const auto index = getAttributeIndex(attribute_name);
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t row) { return def->getDataAt(row); });
}
void SSDComplexKeyCacheDictionary::getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const
{
const auto index = getAttributeIndex(attribute_name);
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t) { return StringRef{def}; });
}
template <typename DefaultGetter>
void SSDComplexKeyCacheDictionary::getItemsStringImpl(
const size_t attribute_index,
const Columns & key_columns,
const DataTypes & key_types,
ColumnString * out,
DefaultGetter && get_default) const
DictionaryDefaultValueExtractor<String> & default_value_extractor) const
{
dict_struct.validateKeyTypes(key_types);
@ -1576,7 +1512,7 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
{
if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
{
auto to_insert = get_default(row);
auto to_insert = default_value_extractor[row];
out->insertData(to_insert.data, to_insert.size);
++default_index;
}
@ -1619,7 +1555,7 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
SCOPE_EXIT(tmp_keys_pool.rollback(key));
if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
{
auto to_insert = get_default(row);
auto to_insert = default_value_extractor[row];
out->insertData(to_insert.data, to_insert.size);
++default_index;
}
@ -1633,26 +1569,31 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
}
else
{
auto to_insert = get_default(row);
auto to_insert = default_value_extractor[row];
out->insertData(to_insert.data, to_insert.size);
}
}
}
void SSDComplexKeyCacheDictionary::has(
const Columns & key_columns,
const DataTypes & key_types,
PaddedPODArray<UInt8> & out) const
ColumnUInt8::Ptr SSDComplexKeyCacheDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
{
dict_struct.validateKeyTypes(key_types);
const auto rows_num = key_columns.front()->size();
auto result = ColumnUInt8::create(rows_num);
auto& out = result->getData();
for (const auto row : ext::range(0, rows_num))
out[row] = false;
const auto now = std::chrono::system_clock::now();
std::unordered_map<KeyRef, std::vector<size_t>> not_found_keys;
TemporalComplexKeysPool not_found_pool;
storage.has(key_columns, key_types, out, not_found_keys, not_found_pool, now);
storage.hasKeys(key_columns, key_types, out, not_found_keys, not_found_pool, now);
if (not_found_keys.empty())
return;
return result;
std::vector<KeyRef> required_keys(not_found_keys.size());
std::transform(std::begin(not_found_keys), std::end(not_found_keys), std::begin(required_keys), [](const auto & pair) { return pair.first; });
@ -1681,6 +1622,8 @@ void SSDComplexKeyCacheDictionary::has(
out[row] = false;
},
getLifetime());
return result;
}
BlockInputStreamPtr SSDComplexKeyCacheDictionary::getBlockInputStream(

View File

@ -2,11 +2,13 @@
#if defined(OS_LINUX) || defined(__FreeBSD__)
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include <atomic>
#include <chrono>
#include <list>
#include <shared_mutex>
#include <variant>
#include <vector>
#include <Poco/Logger.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Common/Arena.h>
@ -19,13 +21,11 @@
#include <Dictionaries/BucketCache.h>
#include <ext/scope_guard.h>
#include <IO/HashingWriteBuffer.h>
#include <list>
#include <pcg_random.hpp>
#include <Poco/Logger.h>
#include <shared_mutex>
#include <variant>
#include <vector>
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryStructure.h"
#include "DictionaryHelpers.h"
namespace DB
{
@ -313,7 +313,7 @@ public:
template <typename Out, typename GetDefault>
void getValue(const size_t attribute_index,
const Columns & key_columns, const DataTypes & key_types,
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & default_value_extractor,
std::chrono::system_clock::time_point now) const;
void getString(const size_t attribute_index,
@ -321,7 +321,7 @@ public:
StringRefs & refs, ArenaWithFreeLists & arena, std::vector<bool> & found,
std::vector<size_t> & default_ids, std::chrono::system_clock::time_point now) const;
void has(const Columns & key_columns, const DataTypes & key_types,
void hasKeys(const Columns & key_columns, const DataTypes & key_types,
ResultArrayType<UInt8> & out, std::vector<bool> & found,
std::chrono::system_clock::time_point now) const;
@ -459,7 +459,7 @@ public:
TemporalComplexKeysPool & not_found_pool,
std::vector<size_t> & default_ids, std::chrono::system_clock::time_point now) const;
void has(const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
void hasKeys(const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
std::unordered_map<KeyRef, std::vector<size_t>> & not_found,
TemporalComplexKeysPool & not_found_pool, std::chrono::system_clock::time_point now) const;
@ -569,88 +569,20 @@ public:
std::exception_ptr getLastException() const override { return storage.getLastException(); }
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr default_values_column) const override;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
template <typename T>
using ResultArrayType = SSDComplexKeyCacheStorage::ResultArrayType<T>;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const Columns & key_columns,
const DataTypes & key_types, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const Columns & key_columns,
const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const Columns & key_columns,
const DataTypes & key_types, const String & def, ColumnString * const out) const;
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
@ -661,17 +593,20 @@ private:
AttributeValueVariant createAttributeNullValueWithType(const AttributeUnderlyingType type, const Field & null_value);
void createAttributes();
template <typename AttributeType, typename OutputType, typename DefaultGetter>
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
void getItemsNumberImpl(
const size_t attribute_index,
const Columns & key_columns, const DataTypes & key_types,
ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
const Columns & key_columns,
const DataTypes & key_types,
ResultArrayType<OutputType> & out,
DefaultValueExtractor & default_value_extractor) const;
template <typename DefaultGetter>
void getItemsStringImpl(
const size_t attribute_index,
const Columns & key_columns, const DataTypes & key_types,
ColumnString * out, DefaultGetter && get_default) const;
const Columns & key_columns,
const DataTypes & key_types,
ColumnString * out,
DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
const std::string name;
const DictionaryStructure dict_struct;

View File

@ -21,7 +21,7 @@
#include "registerDictionaries.h"
#if USE_ODBC
# include <Poco/Data/ODBC/Connector.h>
# include <Poco/Data/ODBC/Connector.h> // Y_IGNORE
#endif
namespace DB

View File

@ -6,31 +6,25 @@ LIBRARY()
PEERDIR(
clickhouse/src/Common
contrib/libs/poco/Data
contrib/libs/poco/Data/ODBC
contrib/libs/poco/MongoDB
contrib/libs/poco/Redis
contrib/libs/sparsehash
)
IF (USE_ODBC)
PEERDIR(contrib/libs/poco/Data/ODBC)
ENDIF ()
NO_COMPILER_WARNINGS()
SRCS(
CacheDictionary.cpp
CacheDictionary_generate1.cpp
CacheDictionary_generate2.cpp
CacheDictionary_generate3.cpp
CassandraBlockInputStream.cpp
CassandraDictionarySource.cpp
CassandraHelpers.cpp
ClickHouseDictionarySource.cpp
ComplexKeyCacheDictionary.cpp
ComplexKeyCacheDictionary_createAttributeWithType.cpp
ComplexKeyCacheDictionary_generate1.cpp
ComplexKeyCacheDictionary_generate2.cpp
ComplexKeyCacheDictionary_generate3.cpp
ComplexKeyCacheDictionary_setAttributeValue.cpp
ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp
ComplexKeyDirectDictionary.cpp
ComplexKeyHashedDictionary.cpp
DictionaryBlockInputStreamBase.cpp

View File

@ -5,12 +5,15 @@ LIBRARY()
PEERDIR(
clickhouse/src/Common
contrib/libs/poco/Data
contrib/libs/poco/Data/ODBC
contrib/libs/poco/MongoDB
contrib/libs/poco/Redis
contrib/libs/sparsehash
)
IF (USE_ODBC)
PEERDIR(contrib/libs/poco/Data/ODBC)
ENDIF ()
NO_COMPILER_WARNINGS()

View File

@ -146,20 +146,32 @@ Block MySQLBlockInputStream::readImpl()
const auto value = row[position_mapping[index]];
const auto & sample = description.sample_block.getByPosition(index);
bool is_type_nullable = description.types[index].second;
if (!value.isNull())
{
if (description.types[index].second)
if (is_type_nullable)
{
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[index]);
const auto & data_type = assert_cast<const DataTypeNullable &>(*sample.type);
insertValue(*data_type.getNestedType(), column_nullable.getNestedColumn(), description.types[index].first, value);
column_nullable.getNullMapData().emplace_back(0);
column_nullable.getNullMapData().emplace_back(false);
}
else
{
insertValue(*sample.type, *columns[index], description.types[index].first, value);
}
}
else
{
insertDefaultValue(*columns[index], *sample.column);
if (is_type_nullable)
{
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[index]);
column_nullable.getNullMapData().back() = true;
}
}
}
++num_rows;

View File

@ -504,7 +504,7 @@ private:
using namespace traits_;
using namespace impl_;
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
class FunctionBinaryArithmetic : public IFunction
{
static constexpr const bool is_plus = IsOperation<Op>::plus;
@ -542,16 +542,54 @@ class FunctionBinaryArithmetic : public IFunction
>(type, std::forward<F>(f));
}
template <typename F>
static bool castTypeNoFloats(const IDataType * type, F && f)
{
return castTypeToEither<
DataTypeUInt8,
DataTypeUInt16,
DataTypeUInt32,
DataTypeUInt64,
DataTypeUInt256,
DataTypeInt8,
DataTypeInt16,
DataTypeInt32,
DataTypeInt64,
DataTypeInt128,
DataTypeInt256,
DataTypeDate,
DataTypeDateTime,
DataTypeDecimal<Decimal32>,
DataTypeDecimal<Decimal64>,
DataTypeDecimal<Decimal128>,
DataTypeDecimal<Decimal256>,
DataTypeFixedString
>(type, std::forward<F>(f));
}
template <typename F>
static bool castBothTypes(const IDataType * left, const IDataType * right, F && f)
{
return castType(left, [&](const auto & left_)
if constexpr (valid_on_float_arguments)
{
return castType(right, [&](const auto & right_)
return castType(left, [&](const auto & left_)
{
return f(left_, right_);
return castType(right, [&](const auto & right_)
{
return f(left_, right_);
});
});
});
}
else
{
return castTypeNoFloats(left, [&](const auto & left_)
{
return castTypeNoFloats(right, [&](const auto & right_)
{
return f(left_, right_);
});
});
}
}
static FunctionOverloadResolverPtr
@ -1319,11 +1357,11 @@ public:
};
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
class FunctionBinaryArithmeticWithConstants : public FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
class FunctionBinaryArithmeticWithConstants : public FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>
{
public:
using Base = FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>;
using Base = FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>;
using Monotonicity = typename Base::Monotonicity;
static FunctionPtr create(
@ -1488,7 +1526,7 @@ private:
DataTypePtr return_type;
};
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
class BinaryArithmeticOverloadResolver : public IFunctionOverloadResolverImpl
{
public:
@ -1512,14 +1550,14 @@ public:
|| (arguments[1].column && isColumnConst(*arguments[1].column))))
{
return std::make_unique<DefaultFunction>(
FunctionBinaryArithmeticWithConstants<Op, Name, valid_on_default_arguments>::create(
FunctionBinaryArithmeticWithConstants<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::create(
arguments[0], arguments[1], return_type, context),
ext::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
return_type);
}
return std::make_unique<DefaultFunction>(
FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>::create(context),
FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::create(context),
ext::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
return_type);
}
@ -1530,7 +1568,7 @@ public:
throw Exception(
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + ", should be 2",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
return FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>::getReturnTypeImplStatic(arguments, context);
return FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::getReturnTypeImplStatic(arguments, context);
}
private:

View File

@ -212,4 +212,12 @@ checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments)
return {nested_columns, offsets->data()};
}
bool areTypesEqual(const DataTypePtr & lhs, const DataTypePtr & rhs)
{
const auto & lhs_name = lhs->getName();
const auto & rhs_name = rhs->getName();
return lhs_name == rhs_name;
}
}

View File

@ -152,4 +152,8 @@ void validateFunctionArgumentTypes(const IFunction & func, const ColumnsWithType
std::pair<std::vector<const IColumn *>, const ColumnArray::Offset *>
checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments);
/// Check if two types are equal
bool areTypesEqual(const DataTypePtr & lhs, const DataTypePtr & rhs);
}

View File

@ -38,8 +38,8 @@ void registerFunctionsExternalDictionaries(FunctionFactory & factory)
factory.registerFunction<FunctionDictGetDateTimeOrDefault>();
factory.registerFunction<FunctionDictGetUUIDOrDefault>();
factory.registerFunction<FunctionDictGetStringOrDefault>();
factory.registerFunction<FunctionDictGetNoType>();
factory.registerFunction<FunctionDictGetNoTypeOrDefault>();
factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::get>>();
factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::getOrDefault>>();
}
}

File diff suppressed because it is too large Load Diff

View File

@ -101,7 +101,8 @@ struct IntegerRoundingComputation
return scale;
}
static ALWAYS_INLINE T computeImpl(T x, T scale)
/// Integer overflow is Ok.
static ALWAYS_INLINE_NO_SANITIZE_UNDEFINED T computeImpl(T x, T scale)
{
switch (rounding_mode)
{

View File

@ -21,7 +21,7 @@ namespace ErrorCodes
}
template <class T>
inline constexpr bool is_gcd_lcm_implemeted = !(is_big_int_v<T> || std::is_floating_point_v<T>);
inline constexpr bool is_gcd_lcm_implemeted = !is_big_int_v<T>;
template <typename A, typename B, typename Impl, typename Name>
struct GCDLCMImpl
@ -33,7 +33,7 @@ struct GCDLCMImpl
static inline std::enable_if_t<!is_gcd_lcm_implemeted<Result>, Result>
apply(A, B)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not implemented for big integers and floats", Name::name);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not implemented for big integers", Name::name);
}
template <typename Result = ResultType>

View File

@ -353,6 +353,9 @@ bool FunctionArrayEnumerateExtended<Derived>::execute128bit(
keys_bytes += key_sizes[j];
}
if (keys_bytes > 16)
return false;
executeMethod<MethodFixed>(offsets, columns, key_sizes, nullptr, res_values);
return true;
}

View File

@ -37,7 +37,7 @@ struct BitAndImpl
};
struct NameBitAnd { static constexpr auto name = "bitAnd"; };
using FunctionBitAnd = BinaryArithmeticOverloadResolver<BitAndImpl, NameBitAnd, true>;
using FunctionBitAnd = BinaryArithmeticOverloadResolver<BitAndImpl, NameBitAnd, true, false>;
}

View File

@ -36,7 +36,7 @@ struct BitOrImpl
};
struct NameBitOr { static constexpr auto name = "bitOr"; };
using FunctionBitOr = BinaryArithmeticOverloadResolver<BitOrImpl, NameBitOr, true>;
using FunctionBitOr = BinaryArithmeticOverloadResolver<BitOrImpl, NameBitOr, true, false>;
}

View File

@ -43,7 +43,7 @@ struct BitRotateLeftImpl
};
struct NameBitRotateLeft { static constexpr auto name = "bitRotateLeft"; };
using FunctionBitRotateLeft = BinaryArithmeticOverloadResolver<BitRotateLeftImpl, NameBitRotateLeft>;
using FunctionBitRotateLeft = BinaryArithmeticOverloadResolver<BitRotateLeftImpl, NameBitRotateLeft, true, false>;
}

View File

@ -42,7 +42,7 @@ struct BitRotateRightImpl
};
struct NameBitRotateRight { static constexpr auto name = "bitRotateRight"; };
using FunctionBitRotateRight = BinaryArithmeticOverloadResolver<BitRotateRightImpl, NameBitRotateRight>;
using FunctionBitRotateRight = BinaryArithmeticOverloadResolver<BitRotateRightImpl, NameBitRotateRight, true, false>;
}

View File

@ -42,7 +42,7 @@ struct BitShiftLeftImpl
};
struct NameBitShiftLeft { static constexpr auto name = "bitShiftLeft"; };
using FunctionBitShiftLeft = BinaryArithmeticOverloadResolver<BitShiftLeftImpl, NameBitShiftLeft>;
using FunctionBitShiftLeft = BinaryArithmeticOverloadResolver<BitShiftLeftImpl, NameBitShiftLeft, true, false>;
}

View File

@ -42,7 +42,7 @@ struct BitShiftRightImpl
};
struct NameBitShiftRight { static constexpr auto name = "bitShiftRight"; };
using FunctionBitShiftRight = BinaryArithmeticOverloadResolver<BitShiftRightImpl, NameBitShiftRight>;
using FunctionBitShiftRight = BinaryArithmeticOverloadResolver<BitShiftRightImpl, NameBitShiftRight, true, false>;
}

View File

@ -34,7 +34,7 @@ struct BitTestImpl
};
struct NameBitTest { static constexpr auto name = "bitTest"; };
using FunctionBitTest = BinaryArithmeticOverloadResolver<BitTestImpl, NameBitTest>;
using FunctionBitTest = BinaryArithmeticOverloadResolver<BitTestImpl, NameBitTest, true, false>;
}

View File

@ -36,7 +36,7 @@ struct BitXorImpl
};
struct NameBitXor { static constexpr auto name = "bitXor"; };
using FunctionBitXor = BinaryArithmeticOverloadResolver<BitXorImpl, NameBitXor, true>;
using FunctionBitXor = BinaryArithmeticOverloadResolver<BitXorImpl, NameBitXor, true, false>;
}

View File

@ -23,7 +23,7 @@ struct GCDImpl : public GCDLCMImpl<A, B, GCDImpl<A, B>, NameGCD>
}
};
using FunctionGCD = BinaryArithmeticOverloadResolver<GCDImpl, NameGCD, false>;
using FunctionGCD = BinaryArithmeticOverloadResolver<GCDImpl, NameGCD, false, false>;
}

View File

@ -95,7 +95,7 @@ void geodistInit()
sphere_metric_meters_lut[i] = static_cast<float>(sqr((EARTH_DIAMETER * PI / 360) * cos(latitude)));
sphere_metric_lut[i] = cosf(latitude);
sphere_metric_lut[i] = sqrf(cosf(latitude));
}
}
@ -182,7 +182,7 @@ float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg)
/// (Remember how a plane flies from Moscow to New York)
/// But if longitude is close but latitude is different enough, there is no difference between meridian and great circle line.
float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, KTABLE] indexes
float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, METRIC_LUT_SIZE] indexes
size_t latitude_midpoint_index = floatToIndex(latitude_midpoint) & (METRIC_LUT_SIZE - 1);
/// This is linear interpolation between two table items at index "latitude_midpoint_index" and "latitude_midpoint_index + 1".

View File

@ -54,7 +54,7 @@ struct LCMImpl : public GCDLCMImpl<A, B, LCMImpl<A, B>, NameLCM>
}
};
using FunctionLCM = BinaryArithmeticOverloadResolver<LCMImpl, NameLCM, false>;
using FunctionLCM = BinaryArithmeticOverloadResolver<LCMImpl, NameLCM, false, false>;
}

View File

@ -69,6 +69,10 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion()
boost::algorithm::to_lower(matched_region);
region = matched_region;
}
else
{
region = Aws::Region::AWS_GLOBAL;
}
}
}

View File

@ -609,10 +609,10 @@ bool ActionsDAG::hasStatefulFunctions() const
return false;
}
bool ActionsDAG::empty() const
bool ActionsDAG::trivial() const
{
for (const auto & node : nodes)
if (node.type != ActionType::INPUT)
if (node.type == ActionType::FUNCTION || node.type == ActionType::ARRAY_JOIN)
return false;
return true;

View File

@ -223,7 +223,7 @@ public:
bool hasArrayJoin() const;
bool hasStatefulFunctions() const;
bool empty() const; /// If actions only contain inputs.
bool trivial() const; /// If actions has no functions or array join.
const ActionsSettings & getSettings() const { return settings; }

View File

@ -50,7 +50,6 @@
#include <Interpreters/SystemLog.h>
#include <Interpreters/Context.h>
#include <Interpreters/DDLWorker.h>
#include <Common/DNSResolver.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/UncompressedCache.h>
#include <Parsers/ASTCreateQuery.h>

View File

@ -51,7 +51,6 @@
#include <Interpreters/Context.h>
#include <Common/ProfileEvents.h>
#include <Interpreters/DNSCacheUpdater.h>
#include <Common/SensitiveDataMasker.h>
#include <Processors/Transforms/LimitsCheckingTransform.h>

View File

@ -0,0 +1,56 @@
#pragma once
#include <Processors/QueryPlan/QueryPlan.h>
#include <array>
namespace DB
{
namespace QueryPlanOptimizations
{
/// This is the main function which optimizes the whole QueryPlan tree.
void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes);
/// Optimization is a function applied to QueryPlan::Node.
/// It can read and update subtree of specified node.
/// It return the number of updated layers of subtree if some change happened.
/// It must guarantee that the structure of tree is correct.
///
/// New nodes should be added to QueryPlan::Nodes list.
/// It is not needed to remove old nodes from the list.
struct Optimization
{
using Function = size_t (*)(QueryPlan::Node *, QueryPlan::Nodes &);
const Function apply = nullptr;
const char * name;
};
/// Move ARRAY JOIN up if possible.
size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
/// Move LimitStep down if possible.
size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes);
/// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep
/// Replace chain `FilterStep -> ExpressionStep` to single FilterStep
size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
inline const auto & getOptimizations()
{
static const std::array<Optimization, 4> optimizations =
{{
{tryLiftUpArrayJoin, "liftUpArrayJoin"},
{tryPushDownLimit, "pushDownLimit"},
{trySplitFilter, "splitFilter"},
{tryMergeExpressions, "mergeExpressions"},
}};
return optimizations;
}
}
}

View File

@ -0,0 +1,85 @@
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <Processors/QueryPlan/FilterStep.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Processors/QueryPlan/ArrayJoinStep.h>
#include <Interpreters/ActionsDAG.h>
#include <Interpreters/ArrayJoinAction.h>
namespace DB::QueryPlanOptimizations
{
size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
{
if (parent_node->children.size() != 1)
return 0;
QueryPlan::Node * child_node = parent_node->children.front();
auto & parent = parent_node->step;
auto & child = child_node->step;
auto * expression_step = typeid_cast<ExpressionStep *>(parent.get());
auto * filter_step = typeid_cast<FilterStep *>(parent.get());
auto * array_join_step = typeid_cast<ArrayJoinStep *>(child.get());
if (!(expression_step || filter_step) || !array_join_step)
return 0;
const auto & array_join = array_join_step->arrayJoin();
const auto & expression = expression_step ? expression_step->getExpression()
: filter_step->getExpression();
auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns);
/// No actions can be moved before ARRAY JOIN.
if (split_actions.first->trivial())
return 0;
auto description = parent->getStepDescription();
/// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin.
if (split_actions.second->trivial())
{
auto expected_header = parent->getOutputStream().header;
/// Expression/Filter -> ArrayJoin
std::swap(parent, child);
/// ArrayJoin -> Expression/Filter
if (expression_step)
child = std::make_unique<ExpressionStep>(child_node->children.at(0)->step->getOutputStream(),
std::move(split_actions.first));
else
child = std::make_unique<FilterStep>(child_node->children.at(0)->step->getOutputStream(),
std::move(split_actions.first),
filter_step->getFilterColumnName(),
filter_step->removesFilterColumn());
child->setStepDescription(std::move(description));
array_join_step->updateInputStream(child->getOutputStream(), expected_header);
return 2;
}
/// Add new expression step before ARRAY JOIN.
/// Expression/Filter -> ArrayJoin -> Something
auto & node = nodes.emplace_back();
node.children.swap(child_node->children);
child_node->children.emplace_back(&node);
/// Expression/Filter -> ArrayJoin -> node -> Something
node.step = std::make_unique<ExpressionStep>(node.children.at(0)->step->getOutputStream(),
std::move(split_actions.first));
node.step->setStepDescription(description);
array_join_step->updateInputStream(node.step->getOutputStream(), {});
if (expression_step)
parent = std::make_unique<ExpressionStep>(array_join_step->getOutputStream(), split_actions.second);
else
parent = std::make_unique<FilterStep>(array_join_step->getOutputStream(), split_actions.second,
filter_step->getFilterColumnName(), filter_step->removesFilterColumn());
parent->setStepDescription(description + " [split]");
return 3;
}
}

View File

@ -0,0 +1,114 @@
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <Processors/QueryPlan/ITransformingStep.h>
#include <Processors/QueryPlan/LimitStep.h>
#include <Processors/QueryPlan/TotalsHavingStep.h>
#include <Processors/QueryPlan/MergingSortedStep.h>
#include <Processors/QueryPlan/FinishSortingStep.h>
#include <Processors/QueryPlan/MergeSortingStep.h>
#include <Processors/QueryPlan/PartialSortingStep.h>
#include <Common/typeid_cast.h>
namespace DB::QueryPlanOptimizations
{
/// If plan looks like Limit -> Sorting, update limit for Sorting
static bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit)
{
if (limit == 0)
return false;
QueryPlanStepPtr & step = node->step;
QueryPlan::Node * child = nullptr;
bool updated = false;
if (auto * merging_sorted = typeid_cast<MergingSortedStep *>(step.get()))
{
/// TODO: remove LimitStep here.
merging_sorted->updateLimit(limit);
updated = true;
child = node->children.front();
}
else if (auto * finish_sorting = typeid_cast<FinishSortingStep *>(step.get()))
{
/// TODO: remove LimitStep here.
finish_sorting->updateLimit(limit);
updated = true;
}
else if (auto * merge_sorting = typeid_cast<MergeSortingStep *>(step.get()))
{
merge_sorting->updateLimit(limit);
updated = true;
child = node->children.front();
}
else if (auto * partial_sorting = typeid_cast<PartialSortingStep *>(step.get()))
{
partial_sorting->updateLimit(limit);
updated = true;
}
/// We often have chain PartialSorting -> MergeSorting -> MergingSorted
/// Try update limit for them also if possible.
if (child)
tryUpdateLimitForSortingSteps(child, limit);
return updated;
}
size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
{
if (parent_node->children.size() != 1)
return 0;
QueryPlan::Node * child_node = parent_node->children.front();
auto & parent = parent_node->step;
auto & child = child_node->step;
auto * limit = typeid_cast<LimitStep *>(parent.get());
if (!limit)
return 0;
/// Skip LIMIT WITH TIES by now.
if (limit->withTies())
return 0;
const auto * transforming = dynamic_cast<const ITransformingStep *>(child.get());
/// Skip everything which is not transform.
if (!transforming)
return 0;
/// Special cases for sorting steps.
if (tryUpdateLimitForSortingSteps(child_node, limit->getLimitForSorting()))
return 0;
/// Special case for TotalsHaving. Totals may be incorrect if we push down limit.
if (typeid_cast<const TotalsHavingStep *>(child.get()))
return 0;
/// Now we should decide if pushing down limit possible for this step.
const auto & transform_traits = transforming->getTransformTraits();
const auto & data_stream_traits = transforming->getDataStreamTraits();
/// Cannot push down if child changes the number of rows.
if (!transform_traits.preserves_number_of_rows)
return 0;
/// Cannot push down if data was sorted exactly by child stream.
if (!child->getOutputStream().sort_description.empty() && !data_stream_traits.preserves_sorting)
return 0;
/// Now we push down limit only if it doesn't change any stream properties.
/// TODO: some of them may be changed and, probably, not important for following streams. We may add such info.
if (!limit->getOutputStream().hasEqualPropertiesWith(transforming->getOutputStream()))
return 0;
/// Input stream for Limit have changed.
limit->updateInputStream(transforming->getInputStreams().front());
parent.swap(child);
return 2;
}
}

View File

@ -0,0 +1,65 @@
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <Processors/QueryPlan/FilterStep.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Interpreters/ActionsDAG.h>
namespace DB::QueryPlanOptimizations
{
size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
{
if (parent_node->children.size() != 1)
return false;
QueryPlan::Node * child_node = parent_node->children.front();
auto & parent = parent_node->step;
auto & child = child_node->step;
auto * parent_expr = typeid_cast<ExpressionStep *>(parent.get());
auto * parent_filter = typeid_cast<FilterStep *>(parent.get());
auto * child_expr = typeid_cast<ExpressionStep *>(child.get());
if (parent_expr && child_expr)
{
const auto & child_actions = child_expr->getExpression();
const auto & parent_actions = parent_expr->getExpression();
/// We cannot combine actions with arrayJoin and stateful function because we not always can reorder them.
/// Example: select rowNumberInBlock() from (select arrayJoin([1, 2]))
/// Such a query will return two zeroes if we combine actions together.
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
return 0;
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
auto expr = std::make_unique<ExpressionStep>(child_expr->getInputStreams().front(), merged);
expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
parent_node->step = std::move(expr);
parent_node->children.swap(child_node->children);
return 1;
}
else if (parent_filter && child_expr)
{
const auto & child_actions = child_expr->getExpression();
const auto & parent_actions = parent_filter->getExpression();
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
return 0;
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
auto filter = std::make_unique<FilterStep>(child_expr->getInputStreams().front(), merged,
parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn());
filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
parent_node->step = std::move(filter);
parent_node->children.swap(child_node->children);
return 1;
}
return 0;
}
}

Some files were not shown because too many files have changed in this diff Show More