mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge branch 'master' into Add_IStoragePolicy_interface
This commit is contained in:
commit
caef103837
45
.pylintrc
Normal file
45
.pylintrc
Normal file
@ -0,0 +1,45 @@
|
||||
# vim: ft=config
|
||||
|
||||
[BASIC]
|
||||
max-module-lines=2000
|
||||
# due to SQL
|
||||
max-line-length=200
|
||||
# Drop/decrease them one day:
|
||||
max-branches=50
|
||||
max-nested-blocks=10
|
||||
max-statements=200
|
||||
|
||||
[FORMAT]
|
||||
ignore-long-lines = (# )?<?https?://\S+>?$
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
disable = bad-continuation,
|
||||
missing-docstring,
|
||||
bad-whitespace,
|
||||
too-few-public-methods,
|
||||
invalid-name,
|
||||
too-many-arguments,
|
||||
keyword-arg-before-vararg,
|
||||
too-many-locals,
|
||||
too-many-instance-attributes,
|
||||
cell-var-from-loop,
|
||||
fixme,
|
||||
too-many-public-methods,
|
||||
wildcard-import,
|
||||
unused-wildcard-import,
|
||||
singleton-comparison,
|
||||
# pytest.mark.parametrize is not callable (not-callable)
|
||||
not-callable,
|
||||
# https://github.com/PyCQA/pylint/issues/3882
|
||||
# [Python 3.9] Value 'Optional' is unsubscriptable (unsubscriptable-object) (also Union)
|
||||
unsubscriptable-object,
|
||||
# Drop them one day:
|
||||
redefined-outer-name,
|
||||
broad-except,
|
||||
bare-except,
|
||||
no-else-return,
|
||||
global-statement
|
||||
|
||||
[SIMILARITIES]
|
||||
# due to SQL
|
||||
min-similarity-lines=1000
|
@ -851,7 +851,7 @@ public:
|
||||
}
|
||||
|
||||
/// Saturation can occur if 29 Feb is mapped to non-leap year.
|
||||
inline time_t addYears(time_t t, Int64 delta) const
|
||||
inline NO_SANITIZE_UNDEFINED time_t addYears(time_t t, Int64 delta) const
|
||||
{
|
||||
DayNum result_day = addYears(toDayNum(t), delta);
|
||||
|
||||
|
@ -104,8 +104,3 @@ template <> struct is_big_int<wUInt256> { static constexpr bool value = true; };
|
||||
template <typename T>
|
||||
inline constexpr bool is_big_int_v = is_big_int<T>::value;
|
||||
|
||||
template <typename To, typename From>
|
||||
inline To bigint_cast(const From & x [[maybe_unused]])
|
||||
{
|
||||
return static_cast<To>(x);
|
||||
}
|
||||
|
2
contrib/aws
vendored
2
contrib/aws
vendored
@ -1 +1 @@
|
||||
Subproject commit a220591e335923ce1c19bbf9eb925787f7ab6c13
|
||||
Subproject commit 7d48b2c8193679cc4516e5bd68ae4a64b94dae7d
|
2
contrib/cassandra
vendored
2
contrib/cassandra
vendored
@ -1 +1 @@
|
||||
Subproject commit 9cbc1a806df5d40fddbf84533b9873542c6513d8
|
||||
Subproject commit b446d7eb68e6962f431e2b3771313bfe9a2bbd93
|
@ -43,6 +43,7 @@ RUN apt-get update \
|
||||
clang-tidy-${LLVM_VERSION} \
|
||||
cmake \
|
||||
curl \
|
||||
lsof \
|
||||
expect \
|
||||
fakeroot \
|
||||
git \
|
||||
|
@ -21,13 +21,16 @@ function clone
|
||||
|
||||
git init
|
||||
git remote add origin https://github.com/ClickHouse/ClickHouse
|
||||
git fetch --depth=100 origin "$SHA_TO_TEST"
|
||||
git fetch --depth=100 origin master # Used to obtain the list of modified or added tests
|
||||
|
||||
# Network is unreliable. GitHub neither.
|
||||
for _ in {1..100}; do git fetch --depth=100 origin "$SHA_TO_TEST" && break; sleep 1; done
|
||||
# Used to obtain the list of modified or added tests
|
||||
for _ in {1..100}; do git fetch --depth=100 origin master && break; sleep 1; done
|
||||
|
||||
# If not master, try to fetch pull/.../{head,merge}
|
||||
if [ "$PR_TO_TEST" != "0" ]
|
||||
then
|
||||
git fetch --depth=100 origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*"
|
||||
for _ in {1..100}; do git fetch --depth=100 origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*" && break; sleep 1; done
|
||||
fi
|
||||
|
||||
git checkout "$SHA_TO_TEST"
|
||||
@ -189,14 +192,14 @@ case "$stage" in
|
||||
echo "failure" > status.txt
|
||||
if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*" server.log > description.txt
|
||||
then
|
||||
echo "Lost connection to server. See the logs" > description.txt
|
||||
echo "Lost connection to server. See the logs." > description.txt
|
||||
fi
|
||||
else
|
||||
# Something different -- maybe the fuzzer itself died? Don't grep the
|
||||
# server log in this case, because we will find a message about normal
|
||||
# server termination (Received signal 15), which is confusing.
|
||||
echo "failure" > status.txt
|
||||
echo "Fuzzer failed ($fuzzer_exit_code). See the logs" > description.txt
|
||||
echo "Fuzzer failed ($fuzzer_exit_code). See the logs." > description.txt
|
||||
fi
|
||||
;&
|
||||
"report")
|
||||
|
@ -62,7 +62,7 @@ RUN python3 -m pip install \
|
||||
avro \
|
||||
cassandra-driver \
|
||||
confluent-kafka \
|
||||
dicttoxml \
|
||||
dict2xml \
|
||||
docker \
|
||||
docker-compose==1.22.0 \
|
||||
grpcio \
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/python3
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import clickhouse_driver
|
||||
|
@ -55,12 +55,11 @@ function run_tests()
|
||||
ADDITIONAL_OPTIONS+=('00000_no_tests_to_skip')
|
||||
fi
|
||||
|
||||
for _ in $(seq 1 "$NUM_TRIES"); do
|
||||
clickhouse-test --testname --shard --zookeeper --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a test_output/test_result.txt
|
||||
if [ "${PIPESTATUS[0]}" -ne "0" ]; then
|
||||
break;
|
||||
fi
|
||||
done
|
||||
clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
|
||||
--test-runs "$NUM_TRIES" --jobs 4 \
|
||||
"$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
|
||||
| ts '%Y-%m-%d %H:%M:%S' \
|
||||
| tee -a test_output/test_result.txt
|
||||
}
|
||||
|
||||
export -f run_tests
|
||||
|
@ -1,7 +1,7 @@
|
||||
# docker build -t yandex/clickhouse-style-test .
|
||||
FROM ubuntu:20.04
|
||||
|
||||
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip && pip3 install codespell
|
||||
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip python3-pytest && pip3 install codespell
|
||||
|
||||
|
||||
CMD cd /ClickHouse/utils/check-style && \
|
||||
|
@ -114,6 +114,10 @@ CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.
|
||||
- `_path` — Path to the file.
|
||||
- `_file` — Name of the file.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
|
||||
|
||||
## S3-related settings {#settings}
|
||||
|
||||
The following settings can be set before query execution or placed into configuration file.
|
||||
@ -124,8 +128,29 @@ The following settings can be set before query execution or placed into configur
|
||||
|
||||
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.
|
||||
|
||||
**See Also**
|
||||
### Endpoint-based settings {#endpointsettings}
|
||||
|
||||
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
|
||||
The following settings can be specified in configuration file for given endpoint (which will be matched by exact prefix of a URL):
|
||||
|
||||
- `endpoint` — Mandatory. Specifies prefix of an endpoint.
|
||||
- `access_key_id` and `secret_access_key` — Optional. Specifies credentials to use with given endpoint.
|
||||
- `use_environment_credentials` — Optional, default value is `false`. If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint.
|
||||
- `header` — Optional, can be speficied multiple times. Adds specified HTTP header to a request to given endpoint.
|
||||
|
||||
This configuration also applies to S3 disks in `MergeTree` table engine family.
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
<s3>
|
||||
<endpoint-name>
|
||||
<endpoint>https://storage.yandexcloud.net/my-test-bucket-768/</endpoint>
|
||||
<!-- <access_key_id>ACCESS_KEY_ID</access_key_id> -->
|
||||
<!-- <secret_access_key>SECRET_ACCESS_KEY</secret_access_key> -->
|
||||
<!-- <use_environment_credentials>false</use_environment_credentials> -->
|
||||
<!-- <header>Authorization: Bearer SOME-TOKEN</header> -->
|
||||
</endpoint-name>
|
||||
</s3>
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/s3/) <!--hide-->
|
||||
|
@ -254,7 +254,6 @@ ENGINE = MergeTree()
|
||||
PARTITION BY toYYYYMM(EventDate)
|
||||
ORDER BY (CounterID, EventDate, intHash32(UserID))
|
||||
SAMPLE BY intHash32(UserID)
|
||||
SETTINGS index_granularity = 8192
|
||||
```
|
||||
|
||||
``` sql
|
||||
@ -450,7 +449,6 @@ ENGINE = CollapsingMergeTree(Sign)
|
||||
PARTITION BY toYYYYMM(StartDate)
|
||||
ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
|
||||
SAMPLE BY intHash32(UserID)
|
||||
SETTINGS index_granularity = 8192
|
||||
```
|
||||
|
||||
You can execute those queries using the interactive mode of `clickhouse-client` (just launch it in a terminal without specifying a query in advance) or try some [alternative interface](../interfaces/index.md) if you want.
|
||||
|
@ -27,6 +27,8 @@ We recommend using SQL-driven workflow. Both of the configuration methods work s
|
||||
!!! note "Warning"
|
||||
You can’t manage the same access entity by both configuration methods simultaneously.
|
||||
|
||||
To see all users, roles, profiles, etc. and all their grants use [SHOW ACCESS](../sql-reference/statements/show.md#show-access-statement) statement.
|
||||
|
||||
## Usage {#access-control-usage}
|
||||
|
||||
By default, the ClickHouse server provides the `default` user account which is not allowed using SQL-driven access control and account management but has all the rights and permissions. The `default` user account is used in any cases when the username is not defined, for example, at login from client or in distributed queries. In distributed query processing a default user account is used, if the configuration of the server or cluster doesn’t specify the [user and password](../engines/table-engines/special/distributed.md) properties.
|
||||
|
@ -2489,7 +2489,6 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
|
||||
## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}
|
||||
|
||||
Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility.
|
||||
@ -2523,7 +2522,6 @@ With `aggregate_functions_null_for_empty = 1` the result would be:
|
||||
└───────────────┴──────────────┘
|
||||
```
|
||||
|
||||
|
||||
## union_default_mode {#union-default-mode}
|
||||
|
||||
Sets a mode for combining `SELECT` query results. The setting is only used when shared with [UNION](../../sql-reference/statements/select/union.md) without explicitly specifying the `UNION ALL` or `UNION DISTINCT`.
|
||||
@ -2538,7 +2536,6 @@ Default value: `''`.
|
||||
|
||||
See examples in [UNION](../../sql-reference/statements/select/union.md).
|
||||
|
||||
|
||||
## data_type_default_nullable {#data_type_default_nullable}
|
||||
|
||||
Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable).
|
||||
@ -2550,7 +2547,6 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
|
||||
## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold}
|
||||
|
||||
Enables special logic to perform merges on replicas.
|
||||
@ -2570,4 +2566,15 @@ High values for that threshold may lead to replication delays.
|
||||
|
||||
It can be useful when merges are CPU bounded not IO bounded (performing heavy data compression, calculating aggregate functions or default expressions that require a large amount of calculations, or just very high number of tiny merges).
|
||||
|
||||
## max_final_threads {#max-final-threads}
|
||||
|
||||
Sets the maximum number of parallel threads for the `SELECT` query data read phase with the [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 or 1 — Disabled. `SELECT` queries are executed in a single thread.
|
||||
|
||||
Default value: `16`.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
|
||||
|
@ -7,16 +7,16 @@ Columns:
|
||||
- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — Quota ID.
|
||||
- `storage`([String](../../sql-reference/data-types/string.md)) — Storage of quotas. Possible value: “users.xml” if a quota configured in the users.xml file, “disk” if a quota configured by an SQL-query.
|
||||
- `keys` ([Array](../../sql-reference/data-types/array.md)([Enum8](../../sql-reference/data-types/enum.md))) — Key specifies how the quota should be shared. If two connections use the same quota and key, they share the same amounts of resources. Values:
|
||||
- `[]` — All users share the same quota.
|
||||
- `['user_name']` — Connections with the same user name share the same quota.
|
||||
- `['ip_address']` — Connections from the same IP share the same quota.
|
||||
- `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota-key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header.
|
||||
- `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `user_name`.
|
||||
- `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `ip_address`.
|
||||
- `[]` — All users share the same quota.
|
||||
- `['user_name']` — Connections with the same user name share the same quota.
|
||||
- `['ip_address']` — Connections from the same IP share the same quota.
|
||||
- `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota-key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header.
|
||||
- `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `user_name`.
|
||||
- `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `ip_address`.
|
||||
- `durations` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Time interval lengths in seconds.
|
||||
- `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows which users the quota is applied to. Values:
|
||||
- `0` — The quota applies to users specify in the `apply_to_list`.
|
||||
- `1` — The quota applies to all users except those listed in `apply_to_except`.
|
||||
- `0` — The quota applies to users specify in the `apply_to_list`.
|
||||
- `1` — The quota applies to all users except those listed in `apply_to_except`.
|
||||
- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/[roles](../../operations/access-rights.md#role-management) that the quota should be applied to.
|
||||
- `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/roles that the quota should not apply to.
|
||||
|
||||
|
@ -413,4 +413,68 @@ Result:
|
||||
|
||||
- [log(x)](../../sql-reference/functions/math-functions.md#logx-lnx)
|
||||
|
||||
## sign(x) {#signx}
|
||||
|
||||
The `sign` function can extract the sign of a real number.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
sign(x)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `x` — Values from `-∞` to `+∞`. Support all numeric types in ClickHouse.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- -1 for `x < 0`
|
||||
- 0 for `x = 0`
|
||||
- 1 for `x > 0`
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT sign(0);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─sign(0)─┐
|
||||
│ 0 │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT sign(1);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─sign(1)─┐
|
||||
│ 1 │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT sign(-1);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─sign(-1)─┐
|
||||
│ -1 │
|
||||
└──────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/query_language/functions/math_functions/) <!--hide-->
|
||||
|
@ -5,16 +5,35 @@ toc_title: QUOTA
|
||||
|
||||
# ALTER QUOTA {#alter-quota-statement}
|
||||
|
||||
Changes quotas.
|
||||
Changes [quotas](../../../operations/access-rights.md#quotas-management).
|
||||
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
|
||||
[RENAME TO new_name]
|
||||
[KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}]
|
||||
[FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
|
||||
{MAX { {QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number } [,...] |
|
||||
[KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
|
||||
[FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
|
||||
{MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
|
||||
NO LIMITS | TRACKING ONLY} [,...]]
|
||||
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
|
||||
```
|
||||
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
|
||||
|
||||
Parameters `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
|
||||
|
||||
`ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
|
||||
|
||||
**Examples**
|
||||
|
||||
Limit the maximum number of queries for the current user with 123 queries in 15 months constraint:
|
||||
|
||||
``` sql
|
||||
ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
|
||||
```
|
||||
|
||||
For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters:
|
||||
|
||||
``` sql
|
||||
ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
|
||||
```
|
||||
|
@ -10,7 +10,7 @@ Changes roles.
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
ALTER ROLE [IF EXISTS] name [ON CLUSTER cluster_name]
|
||||
[RENAME TO new_name]
|
||||
ALTER ROLE [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
|
||||
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
|
||||
```
|
||||
|
@ -10,8 +10,8 @@ Changes row policy.
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
ALTER [ROW] POLICY [IF EXISTS] name [ON CLUSTER cluster_name] ON [database.]table
|
||||
[RENAME TO new_name]
|
||||
ALTER [ROW] POLICY [IF EXISTS] name1 [ON CLUSTER cluster_name1] ON [database1.]table1 [RENAME TO new_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] ON [database2.]table2 [RENAME TO new_name2] ...]
|
||||
[AS {PERMISSIVE | RESTRICTIVE}]
|
||||
[FOR SELECT]
|
||||
[USING {condition | NONE}][,...]
|
||||
|
@ -10,7 +10,7 @@ Changes settings profiles.
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
ALTER SETTINGS PROFILE [IF EXISTS] TO name [ON CLUSTER cluster_name]
|
||||
[RENAME TO new_name]
|
||||
ALTER SETTINGS PROFILE [IF EXISTS] TO name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
|
||||
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...]
|
||||
```
|
||||
|
@ -10,8 +10,8 @@ Changes ClickHouse user accounts.
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
ALTER USER [IF EXISTS] name [ON CLUSTER cluster_name]
|
||||
[RENAME TO new_name]
|
||||
ALTER USER [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
|
||||
[IDENTIFIED [WITH {PLAINTEXT_PASSWORD|SHA256_PASSWORD|DOUBLE_SHA1_PASSWORD}] BY {'password'|'hash'}]
|
||||
[[ADD|DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
|
||||
[DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ]
|
||||
|
@ -11,19 +11,29 @@ Syntax:
|
||||
|
||||
``` sql
|
||||
CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
|
||||
[KEYED BY {'none' | 'user name' | 'ip address' | 'forwarded ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}]
|
||||
[FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
|
||||
{MAX { {QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number } [,...] |
|
||||
[KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
|
||||
[FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
|
||||
{MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
|
||||
NO LIMITS | TRACKING ONLY} [,...]]
|
||||
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
|
||||
```
|
||||
|
||||
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
|
||||
|
||||
Parameters `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
|
||||
|
||||
`ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
|
||||
|
||||
## Example {#create-quota-example}
|
||||
**Examples**
|
||||
|
||||
Limit the maximum number of queries for the current user with 123 queries in 15 months constraint:
|
||||
|
||||
``` sql
|
||||
CREATE QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER
|
||||
CREATE QUOTA qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
|
||||
```
|
||||
|
||||
For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters:
|
||||
|
||||
``` sql
|
||||
CREATE QUOTA qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
|
||||
```
|
||||
|
@ -5,12 +5,12 @@ toc_title: ROLE
|
||||
|
||||
# CREATE ROLE {#create-role-statement}
|
||||
|
||||
Creates a new [role](../../../operations/access-rights.md#role-management). Role is a set of [privileges](../../../sql-reference/statements/grant.md#grant-privileges). A [user](../../../sql-reference/statements/create/user.md) assigned a role gets all the privileges of this role.
|
||||
Creates new [roles](../../../operations/access-rights.md#role-management). Role is a set of [privileges](../../../sql-reference/statements/grant.md#grant-privileges). A [user](../../../sql-reference/statements/create/user.md) assigned a role gets all the privileges of this role.
|
||||
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name
|
||||
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [, name2 ...]
|
||||
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
|
||||
```
|
||||
|
||||
|
@ -5,16 +5,17 @@ toc_title: ROW POLICY
|
||||
|
||||
# CREATE ROW POLICY {#create-row-policy-statement}
|
||||
|
||||
Creates a [filter for rows](../../../operations/access-rights.md#row-policy-management), which a user can read from a table.
|
||||
Creates [filters for rows](../../../operations/access-rights.md#row-policy-management), which a user can read from a table.
|
||||
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name [ON CLUSTER cluster_name] ON [db.]table
|
||||
CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name1 [ON CLUSTER cluster_name1] ON [db1.]table1
|
||||
[, policy_name2 [ON CLUSTER cluster_name2] ON [db2.]table2 ...]
|
||||
[AS {PERMISSIVE | RESTRICTIVE}]
|
||||
[FOR SELECT]
|
||||
[USING condition]
|
||||
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
|
||||
[TO {role1 [, role2 ...] | ALL | ALL EXCEPT role1 [, role2 ...]}]
|
||||
```
|
||||
|
||||
`ON CLUSTER` clause allows creating row policies on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
|
||||
|
@ -5,12 +5,13 @@ toc_title: SETTINGS PROFILE
|
||||
|
||||
# CREATE SETTINGS PROFILE {#create-settings-profile-statement}
|
||||
|
||||
Creates a [settings profile](../../../operations/access-rights.md#settings-profiles-management) that can be assigned to a user or a role.
|
||||
Creates [settings profiles](../../../operations/access-rights.md#settings-profiles-management) that can be assigned to a user or a role.
|
||||
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] TO name [ON CLUSTER cluster_name]
|
||||
CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] TO name1 [ON CLUSTER cluster_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] ...]
|
||||
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...]
|
||||
```
|
||||
|
||||
|
@ -45,7 +45,7 @@ Creates a table with the same structure as another table. You can specify a diff
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name AS table_function()
|
||||
```
|
||||
|
||||
Creates a table with the structure and data returned by a [table function](../../../sql-reference/table-functions/index.md#table-functions).
|
||||
Creates a table with the same result as that of the [table function](../../../sql-reference/table-functions/index.md#table-functions) specified. The created table will also work in the same way as the corresponding table function that was specified.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
|
||||
|
@ -5,12 +5,13 @@ toc_title: USER
|
||||
|
||||
# CREATE USER {#create-user-statement}
|
||||
|
||||
Creates a [user account](../../../operations/access-rights.md#user-account-management).
|
||||
Creates [user accounts](../../../operations/access-rights.md#user-account-management).
|
||||
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
CREATE USER [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
|
||||
CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] ...]
|
||||
[IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH}] BY {'password'|'hash'}]
|
||||
[HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
|
||||
[DEFAULT ROLE role [,...]]
|
||||
@ -69,7 +70,7 @@ CREATE USER john DEFAULT ROLE role1, role2
|
||||
Create the user account `john` and make all his future roles default:
|
||||
|
||||
``` sql
|
||||
ALTER USER user DEFAULT ROLE ALL
|
||||
CREATE USER user DEFAULT ROLE ALL
|
||||
```
|
||||
|
||||
When some role is assigned to `john` in the future, it will become default automatically.
|
||||
@ -77,5 +78,5 @@ When some role is assigned to `john` in the future, it will become default autom
|
||||
Create the user account `john` and make all his future roles default excepting `role1` and `role2`:
|
||||
|
||||
``` sql
|
||||
ALTER USER john DEFAULT ROLE ALL EXCEPT role1, role2
|
||||
CREATE USER john DEFAULT ROLE ALL EXCEPT role1, role2
|
||||
```
|
||||
|
@ -13,7 +13,7 @@ Basic query format:
|
||||
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
|
||||
```
|
||||
|
||||
You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
|
||||
You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
|
||||
|
||||
For example, consider the table:
|
||||
|
||||
@ -30,7 +30,6 @@ CREATE TABLE insert_select_testtable
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY a
|
||||
SETTINGS index_granularity = 8192
|
||||
```
|
||||
|
||||
``` sql
|
||||
@ -55,7 +54,7 @@ SELECT * FROM insert_select_testtable;
|
||||
│ 1 │ a │ 1 │
|
||||
└───┴───┴───┘
|
||||
```
|
||||
|
||||
|
||||
In this example, we see that the second inserted row has `a` and `c` columns filled by the passed values, and `b` filled with value by default.
|
||||
|
||||
If a list of columns doesn't include all existing columns, the rest of the columns are filled with:
|
||||
|
@ -25,6 +25,8 @@ It is applicable when selecting data from tables that use the [MergeTree](../../
|
||||
- [Replicated](../../../engines/table-engines/mergetree-family/replication.md) versions of `MergeTree` engines.
|
||||
- [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md), and [MaterializedView](../../../engines/table-engines/special/materializedview.md) engines that operate over other engines, provided they were created over `MergeTree`-engine tables.
|
||||
|
||||
Now `SELECT` queries with `FINAL` are executed in parallel and slightly faster. But there are drawbacks (see below). The [max_final_threads](../../../operations/settings/settings.md#max-final-threads) setting limits the number of threads used.
|
||||
|
||||
### Drawbacks {#drawbacks}
|
||||
|
||||
Queries that use `FINAL` are executed slightly slower than similar queries that don’t, because:
|
||||
|
@ -231,7 +231,7 @@ Shows privileges for a user.
|
||||
### Syntax {#show-grants-syntax}
|
||||
|
||||
``` sql
|
||||
SHOW GRANTS [FOR user]
|
||||
SHOW GRANTS [FOR user1 [, user2 ...]]
|
||||
```
|
||||
|
||||
If user is not specified, the query returns privileges for the current user.
|
||||
@ -245,7 +245,7 @@ Shows parameters that were used at a [user creation](../../sql-reference/stateme
|
||||
### Syntax {#show-create-user-syntax}
|
||||
|
||||
``` sql
|
||||
SHOW CREATE USER [name | CURRENT_USER]
|
||||
SHOW CREATE USER [name1 [, name2 ...] | CURRENT_USER]
|
||||
```
|
||||
|
||||
## SHOW CREATE ROLE {#show-create-role-statement}
|
||||
@ -255,7 +255,7 @@ Shows parameters that were used at a [role creation](../../sql-reference/stateme
|
||||
### Syntax {#show-create-role-syntax}
|
||||
|
||||
``` sql
|
||||
SHOW CREATE ROLE name
|
||||
SHOW CREATE ROLE name1 [, name2 ...]
|
||||
```
|
||||
|
||||
## SHOW CREATE ROW POLICY {#show-create-row-policy-statement}
|
||||
@ -265,7 +265,7 @@ Shows parameters that were used at a [row policy creation](../../sql-reference/s
|
||||
### Syntax {#show-create-row-policy-syntax}
|
||||
|
||||
``` sql
|
||||
SHOW CREATE [ROW] POLICY name ON [database.]table
|
||||
SHOW CREATE [ROW] POLICY name ON [database1.]table1 [, [database2.]table2 ...]
|
||||
```
|
||||
|
||||
## SHOW CREATE QUOTA {#show-create-quota-statement}
|
||||
@ -275,7 +275,7 @@ Shows parameters that were used at a [quota creation](../../sql-reference/statem
|
||||
### Syntax {#show-create-quota-syntax}
|
||||
|
||||
``` sql
|
||||
SHOW CREATE QUOTA [name | CURRENT]
|
||||
SHOW CREATE QUOTA [name1 [, name2 ...] | CURRENT]
|
||||
```
|
||||
|
||||
## SHOW CREATE SETTINGS PROFILE {#show-create-settings-profile-statement}
|
||||
@ -285,7 +285,7 @@ Shows parameters that were used at a [settings profile creation](../../sql-refer
|
||||
### Syntax {#show-create-settings-profile-syntax}
|
||||
|
||||
``` sql
|
||||
SHOW CREATE [SETTINGS] PROFILE name
|
||||
SHOW CREATE [SETTINGS] PROFILE name1 [, name2 ...]
|
||||
```
|
||||
|
||||
## SHOW USERS {#show-users-statement}
|
||||
@ -307,7 +307,6 @@ Returns a list of [roles](../../operations/access-rights.md#role-management). To
|
||||
``` sql
|
||||
SHOW [CURRENT|ENABLED] ROLES
|
||||
```
|
||||
|
||||
## SHOW PROFILES {#show-profiles-statement}
|
||||
|
||||
Returns a list of [setting profiles](../../operations/access-rights.md#settings-profiles-management). To view user accounts parameters, see the system table [settings_profiles](../../operations/system-tables/settings_profiles.md#system_tables-settings_profiles).
|
||||
@ -347,7 +346,15 @@ Returns a [quota](../../operations/quotas.md) consumption for all users or for c
|
||||
``` sql
|
||||
SHOW [CURRENT] QUOTA
|
||||
```
|
||||
## SHOW ACCESS {#show-access-statement}
|
||||
|
||||
Shows all [users](../../operations/access-rights.md#user-account-management), [roles](../../operations/access-rights.md#role-management), [profiles](../../operations/access-rights.md#settings-profiles-management), etc. and all their [grants](../../sql-reference/statements/grant.md#grant-privileges).
|
||||
|
||||
### Syntax {#show-access-syntax}
|
||||
|
||||
``` sql
|
||||
SHOW ACCESS
|
||||
```
|
||||
## SHOW CLUSTER(s) {#show-cluster-statement}
|
||||
|
||||
Returns a list of clusters. All available clusters are listed in the [system.clusters](../../operations/system-tables/clusters.md) table.
|
||||
|
@ -256,7 +256,6 @@ ENGINE = MergeTree()
|
||||
PARTITION BY toYYYYMM(EventDate)
|
||||
ORDER BY (CounterID, EventDate, intHash32(UserID))
|
||||
SAMPLE BY intHash32(UserID)
|
||||
SETTINGS index_granularity = 8192
|
||||
```
|
||||
|
||||
``` sql
|
||||
@ -452,7 +451,6 @@ ENGINE = CollapsingMergeTree(Sign)
|
||||
PARTITION BY toYYYYMM(StartDate)
|
||||
ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
|
||||
SAMPLE BY intHash32(UserID)
|
||||
SETTINGS index_granularity = 8192
|
||||
```
|
||||
|
||||
Puede ejecutar esas consultas utilizando el modo interactivo de `clickhouse-client` (simplemente ejecútelo en un terminal sin especificar una consulta por adelantado) o pruebe algunos [interfaz alternativa](../interfaces/index.md) Si quieres.
|
||||
|
@ -256,7 +256,6 @@ ENGINE = MergeTree()
|
||||
PARTITION BY toYYYYMM(EventDate)
|
||||
ORDER BY (CounterID, EventDate, intHash32(UserID))
|
||||
SAMPLE BY intHash32(UserID)
|
||||
SETTINGS index_granularity = 8192
|
||||
```
|
||||
|
||||
``` sql
|
||||
@ -452,7 +451,6 @@ ENGINE = CollapsingMergeTree(Sign)
|
||||
PARTITION BY toYYYYMM(StartDate)
|
||||
ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
|
||||
SAMPLE BY intHash32(UserID)
|
||||
SETTINGS index_granularity = 8192
|
||||
```
|
||||
|
||||
Vous pouvez exécuter ces requêtes en utilisant le mode interactif de `clickhouse-client` (lancez - le simplement dans un terminal sans spécifier une requête à l'avance) ou essayez-en [interface de rechange](../interfaces/index.md) Si tu veux.
|
||||
|
@ -262,7 +262,6 @@ ENGINE = MergeTree()
|
||||
PARTITION BY toYYYYMM(EventDate)
|
||||
ORDER BY (CounterID, EventDate, intHash32(UserID))
|
||||
SAMPLE BY intHash32(UserID)
|
||||
SETTINGS index_granularity = 8192
|
||||
```
|
||||
|
||||
``` sql
|
||||
@ -458,7 +457,6 @@ ENGINE = CollapsingMergeTree(Sign)
|
||||
PARTITION BY toYYYYMM(StartDate)
|
||||
ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
|
||||
SAMPLE BY intHash32(UserID)
|
||||
SETTINGS index_granularity = 8192
|
||||
```
|
||||
|
||||
これらのクエリは、`clickhouse-client` の対話型モード(事前にクエリを指定せずにターミナルで起動するだけです)を使って実行するか、[代替インターフェイス](../interfaces/index.md) で実行できます。
|
||||
|
@ -254,7 +254,6 @@ ENGINE = MergeTree()
|
||||
PARTITION BY toYYYYMM(EventDate)
|
||||
ORDER BY (CounterID, EventDate, intHash32(UserID))
|
||||
SAMPLE BY intHash32(UserID)
|
||||
SETTINGS index_granularity = 8192
|
||||
```
|
||||
|
||||
``` sql
|
||||
@ -450,7 +449,6 @@ ENGINE = CollapsingMergeTree(Sign)
|
||||
PARTITION BY toYYYYMM(StartDate)
|
||||
ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
|
||||
SAMPLE BY intHash32(UserID)
|
||||
SETTINGS index_granularity = 8192
|
||||
```
|
||||
|
||||
You can execute those queries using the interactive mode of `clickhouse-client` (just launch it in a terminal without specifying a query in advance) or try some [alternative interface](../interfaces/index.md) if you want.
|
||||
|
@ -28,6 +28,7 @@ ClickHouse поддерживает управление доступом на
|
||||
!!! note "Внимание"
|
||||
Нельзя одновременно использовать оба метода для управления одним и тем же объектом системы доступа.
|
||||
|
||||
Чтобы посмотреть список всех пользователей, ролей, профилей и пр., а также все привилегии, используйте запрос [SHOW ACCESS](../sql-reference/statements/show.md#show-access-statement).
|
||||
|
||||
## Использование {#access-control-usage}
|
||||
|
||||
|
@ -2437,4 +2437,15 @@ SELECT SUM(-1), MAX(0) FROM system.one WHERE 0;
|
||||
|
||||
Эта настройка полезна, когда скорость слияния ограничивается мощностью процессора, а не скоростью операций ввода-вывода (при выполнении "тяжелого" сжатия данных, при расчете агрегатных функций или выражений по умолчанию, требующих большого объема вычислений, или просто при большом количестве мелких слияний).
|
||||
|
||||
## max_final_threads {#max-final-threads}
|
||||
|
||||
Устанавливает максимальное количество параллельных потоков для фазы чтения данных запроса `SELECT` с модификатором [FINAL](../../sql-reference/statements/select/from.md#select-from-final).
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- Положительное целое число.
|
||||
- 0 или 1 — настройка отключена. `SELECT` запросы выполняются в один поток.
|
||||
|
||||
Значение по умолчанию: `16`.
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->
|
||||
|
@ -5,18 +5,38 @@ toc_title: QUOTA
|
||||
|
||||
# ALTER QUOTA {#alter-quota-statement}
|
||||
|
||||
Изменяет квоту.
|
||||
Изменяет [квоту](../../../operations/access-rights.md#quotas-management).
|
||||
|
||||
## Синтаксис {#alter-quota-syntax}
|
||||
Синтаксис:
|
||||
|
||||
``` sql
|
||||
ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
|
||||
[RENAME TO new_name]
|
||||
[KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}]
|
||||
[FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
|
||||
{MAX { {QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number } [,...] |
|
||||
[KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
|
||||
[FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
|
||||
{MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
|
||||
NO LIMITS | TRACKING ONLY} [,...]]
|
||||
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/quota/) <!--hide-->
|
||||
Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md).
|
||||
|
||||
Параметры `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
|
||||
|
||||
В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Ограничить для текущего пользователя максимальное число запросов — не более 123 запросов за каждые 15 месяцев:
|
||||
|
||||
``` sql
|
||||
ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
|
||||
```
|
||||
|
||||
Ограничить по умолчанию максимальное время выполнения запроса — не более полсекунды за каждые 30 минут, а также максимальное число запросов — не более 321 и максимальное число ошибок — не более 10 за каждые 5 кварталов:
|
||||
|
||||
``` sql
|
||||
ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/alter/quota/) <!--hide-->
|
||||
|
@ -5,14 +5,14 @@ toc_title: ROLE
|
||||
|
||||
# ALTER ROLE {#alter-role-statement}
|
||||
|
||||
Изменяет роль.
|
||||
Изменяет роли.
|
||||
|
||||
## Синтаксис {#alter-role-syntax}
|
||||
Синтаксис:
|
||||
|
||||
``` sql
|
||||
ALTER ROLE [IF EXISTS] name [ON CLUSTER cluster_name]
|
||||
[RENAME TO new_name]
|
||||
ALTER ROLE [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
|
||||
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/role/) <!--hide-->
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/alter/role/) <!--hide-->
|
||||
|
@ -7,15 +7,15 @@ toc_title: ROW POLICY
|
||||
|
||||
Изменяет политику доступа к строкам.
|
||||
|
||||
## Синтаксис {#alter-row-policy-syntax}
|
||||
Синтаксис:
|
||||
|
||||
``` sql
|
||||
ALTER [ROW] POLICY [IF EXISTS] name [ON CLUSTER cluster_name] ON [database.]table
|
||||
[RENAME TO new_name]
|
||||
ALTER [ROW] POLICY [IF EXISTS] name1 [ON CLUSTER cluster_name1] ON [database1.]table1 [RENAME TO new_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] ON [database2.]table2 [RENAME TO new_name2] ...]
|
||||
[AS {PERMISSIVE | RESTRICTIVE}]
|
||||
[FOR SELECT]
|
||||
[USING {condition | NONE}][,...]
|
||||
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/row-policy/) <!--hide-->
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/alter/row-policy/) <!--hide-->
|
@ -7,12 +7,12 @@ toc_title: SETTINGS PROFILE
|
||||
|
||||
Изменяет профили настроек.
|
||||
|
||||
## Синтаксис {#alter-settings-profile-syntax}
|
||||
Синтаксис:
|
||||
|
||||
``` sql
|
||||
ALTER SETTINGS PROFILE [IF EXISTS] name [ON CLUSTER cluster_name]
|
||||
[RENAME TO new_name]
|
||||
ALTER SETTINGS PROFILE [IF EXISTS] TO name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
|
||||
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...]
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/settings-profile) <!--hide-->
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/alter/settings-profile) <!--hide-->
|
@ -5,21 +5,19 @@ toc_title: USER
|
||||
|
||||
# ALTER USER {#alter-user-statement}
|
||||
|
||||
Изменяет аккаунт пользователя ClickHouse.
|
||||
Изменяет аккаунты пользователей ClickHouse.
|
||||
|
||||
## Синтаксис {#alter-user-syntax}
|
||||
Синтаксис:
|
||||
|
||||
``` sql
|
||||
ALTER USER [IF EXISTS] name [ON CLUSTER cluster_name]
|
||||
[RENAME TO new_name]
|
||||
ALTER USER [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
|
||||
[IDENTIFIED [WITH {PLAINTEXT_PASSWORD|SHA256_PASSWORD|DOUBLE_SHA1_PASSWORD}] BY {'password'|'hash'}]
|
||||
[[ADD|DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
|
||||
[DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ]
|
||||
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
|
||||
```
|
||||
|
||||
## Описание {#alter-user-dscr}
|
||||
|
||||
Для выполнения `ALTER USER` необходима привилегия [ALTER USER](../grant.md#grant-access-management).
|
||||
|
||||
## Примеры {#alter-user-examples}
|
||||
|
@ -7,23 +7,34 @@ toc_title: "\u041a\u0432\u043e\u0442\u0430"
|
||||
|
||||
Создает [квоту](../../../operations/access-rights.md#quotas-management), которая может быть присвоена пользователю или роли.
|
||||
|
||||
### Синтаксис {#create-quota-syntax}
|
||||
Синтаксис:
|
||||
|
||||
``` sql
|
||||
CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
|
||||
[KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}]
|
||||
[FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
|
||||
{MAX { {QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number } [,...] |
|
||||
[KEYED BY {user_name | ip_address | client_key | client_key, user_name | client_key, ip_address} | NOT KEYED]
|
||||
[FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
|
||||
{MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
|
||||
NO LIMITS | TRACKING ONLY} [,...]]
|
||||
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
|
||||
```
|
||||
Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md).
|
||||
|
||||
### Пример {#create-quota-example}
|
||||
Параметры `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
|
||||
|
||||
Ограничить максимальное количество запросов для текущего пользователя до 123 запросов каждые 15 месяцев:
|
||||
В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Ограничить максимальное количество запросов для текущего пользователя — не более 123 запросов за каждые 15 месяцев:
|
||||
|
||||
``` sql
|
||||
CREATE QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER
|
||||
CREATE QUOTA qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
|
||||
```
|
||||
|
||||
Ограничить по умолчанию максимальное время выполнения запроса — не более полсекунды за каждые 30 минут, а также максимальное число запросов — не более 321 и максимальное число ошибок — не более 10 за каждые 5 кварталов:
|
||||
|
||||
``` sql
|
||||
CREATE QUOTA qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/quota)
|
||||
|
@ -5,18 +5,16 @@ toc_title: "\u0420\u043e\u043b\u044c"
|
||||
|
||||
# CREATE ROLE {#create-role-statement}
|
||||
|
||||
Создает [роль](../../../operations/access-rights.md#role-management).
|
||||
Создает [роли](../../../operations/access-rights.md#role-management). Роль — это набор [привилегий](../grant.md#grant-privileges). Пользователь, которому назначена роль, получает все привилегии этой роли.
|
||||
|
||||
### Синтаксис {#create-role-syntax}
|
||||
Синтаксис:
|
||||
|
||||
```sql
|
||||
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name
|
||||
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [, name2 ...]
|
||||
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
|
||||
```
|
||||
|
||||
### Описание {#create-role-description}
|
||||
|
||||
Роль — это набор [привилегий](../grant.md#grant-privileges). Пользователь, которому назначена роль, получает все привилегии этой роли.
|
||||
## Управление ролями {#managing-roles}
|
||||
|
||||
Одному пользователю можно назначить несколько ролей. Пользователи могут применять назначенные роли в произвольных комбинациях с помощью выражения [SET ROLE](../misc.md#set-role-statement). Конечный объем привилегий — это комбинация всех привилегий всех примененных ролей. Если у пользователя имеются привилегии, присвоенные его аккаунту напрямую, они также прибавляются к привилегиям, присвоенным через роли.
|
||||
|
||||
@ -26,7 +24,7 @@ CREATE ROLE [IF NOT EXISTS | OR REPLACE] name
|
||||
|
||||
Для удаления роли используется выражение [DROP ROLE](../misc.md#drop-role-statement). Удаленная роль автоматически отзывается у всех пользователей, которым была назначена.
|
||||
|
||||
### Примеры {#create-role-examples}
|
||||
## Примеры {#create-role-examples}
|
||||
|
||||
```sql
|
||||
CREATE ROLE accountant;
|
||||
|
@ -5,19 +5,22 @@ toc_title: "\u041f\u043e\u043b\u0438\u0442\u0438\u043a\u0430\u0020\u0434\u043e\u
|
||||
|
||||
# CREATE ROW POLICY {#create-row-policy-statement}
|
||||
|
||||
Создает [фильтр для строк](../../../operations/access-rights.md#row-policy-management), которые пользователь может прочесть из таблицы.
|
||||
Создает [фильтры для строк](../../../operations/access-rights.md#row-policy-management), которые пользователь может прочесть из таблицы.
|
||||
|
||||
### Синтаксис {#create-row-policy-syntax}
|
||||
Синтаксис:
|
||||
|
||||
``` sql
|
||||
CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name [ON CLUSTER cluster_name] ON [db.]table
|
||||
CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name1 [ON CLUSTER cluster_name1] ON [db1.]table1
|
||||
[, policy_name2 [ON CLUSTER cluster_name2] ON [db2.]table2 ...]
|
||||
[AS {PERMISSIVE | RESTRICTIVE}]
|
||||
[FOR SELECT]
|
||||
[USING condition]
|
||||
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
|
||||
```
|
||||
|
||||
#### Секция AS {#create-row-policy-as}
|
||||
Секция `ON CLUSTER` позволяет создавать фильтры для строк на кластере, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).
|
||||
|
||||
## Секция AS {#create-row-policy-as}
|
||||
|
||||
С помощью данной секции можно создать политику разрешения или ограничения.
|
||||
|
||||
@ -27,16 +30,17 @@ CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name [ON CLUSTER cluster
|
||||
|
||||
Ограничительные политики применяются к строкам, прошедшим фильтр разрешительной политики. Если вы не зададите разрешительные политики, пользователь не сможет обращаться ни к каким строкам из таблицы.
|
||||
|
||||
#### Секция TO {#create-row-policy-to}
|
||||
## Секция TO {#create-row-policy-to}
|
||||
|
||||
В секции `TO` вы можете перечислить как роли, так и пользователей. Например, `CREATE ROW POLICY ... TO accountant, john@localhost`.
|
||||
|
||||
Ключевым словом `ALL` обозначаются все пользователи, включая текущего. Ключевые слова `ALL EXCEPT` позволяют исключить пользователей из списка всех пользователей. Например, `CREATE ROW POLICY ... TO ALL EXCEPT accountant, john@localhost`
|
||||
|
||||
### Примеры
|
||||
## Примеры
|
||||
|
||||
- `CREATE ROW POLICY filter ON mydb.mytable FOR SELECT USING a<1000 TO accountant, john@localhost`
|
||||
- `CREATE ROW POLICY filter ON mydb.mytable FOR SELECT USING a<1000 TO ALL EXCEPT mira`
|
||||
`CREATE ROW POLICY filter ON mydb.mytable FOR SELECT USING a<1000 TO accountant, john@localhost`
|
||||
|
||||
`CREATE ROW POLICY filter ON mydb.mytable FOR SELECT USING a<1000 TO ALL EXCEPT mira`
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/row-policy)
|
||||
<!--hide-->
|
@ -5,16 +5,19 @@ toc_title: "\u041f\u0440\u043e\u0444\u0438\u043b\u044c\u0020\u043d\u0430\u0441\u
|
||||
|
||||
# CREATE SETTINGS PROFILE {#create-settings-profile-statement}
|
||||
|
||||
Создает [профиль настроек](../../../operations/access-rights.md#settings-profiles-management), который может быть присвоен пользователю или роли.
|
||||
Создает [профили настроек](../../../operations/access-rights.md#settings-profiles-management), которые могут быть присвоены пользователю или роли.
|
||||
|
||||
### Синтаксис {#create-settings-profile-syntax}
|
||||
Синтаксис:
|
||||
|
||||
``` sql
|
||||
CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
|
||||
CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] TO name1 [ON CLUSTER cluster_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] ...]
|
||||
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...]
|
||||
```
|
||||
|
||||
### Пример {#create-settings-profile-syntax}
|
||||
Секция `ON CLUSTER` позволяет создавать профили на кластере, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).
|
||||
|
||||
## Пример {#create-settings-profile-syntax}
|
||||
|
||||
Создать профиль настроек `max_memory_usage_profile`, который содержит значение и ограничения для настройки `max_memory_usage`. Присвоить профиль пользователю `robin`:
|
||||
|
||||
|
@ -5,19 +5,20 @@ toc_title: "\u041f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u
|
||||
|
||||
# CREATE USER {#create-user-statement}
|
||||
|
||||
Создает [аккаунт пользователя](../../../operations/access-rights.md#user-account-management).
|
||||
Создает [аккаунты пользователей](../../../operations/access-rights.md#user-account-management).
|
||||
|
||||
### Синтаксис {#create-user-syntax}
|
||||
Синтаксис:
|
||||
|
||||
```sql
|
||||
CREATE USER [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
|
||||
CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] ...]
|
||||
[IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH}] BY {'password'|'hash'}]
|
||||
[HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
|
||||
[DEFAULT ROLE role [,...]]
|
||||
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
|
||||
```
|
||||
|
||||
#### Идентификация
|
||||
## Идентификация
|
||||
|
||||
Существует несколько способов идентификации пользователя:
|
||||
|
||||
@ -28,7 +29,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
|
||||
- `IDENTIFIED WITH double_sha1_password BY 'qwerty'`
|
||||
- `IDENTIFIED WITH double_sha1_hash BY 'hash'`
|
||||
|
||||
#### Пользовательский хост
|
||||
## Пользовательский хост
|
||||
|
||||
Пользовательский хост — это хост, с которого можно установить соединение с сервером ClickHouse. Хост задается в секции `HOST` следующими способами:
|
||||
|
||||
@ -49,7 +50,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
|
||||
ClickHouse трактует конструкцию `user_name@'address'` как имя пользователя целиком. То есть технически вы можете создать несколько пользователей с одинаковыми `user_name`, но разными частями конструкции после `@`, но лучше так не делать.
|
||||
|
||||
|
||||
### Примеры {#create-user-examples}
|
||||
## Примеры {#create-user-examples}
|
||||
|
||||
|
||||
Создать аккаунт `mira`, защищенный паролем `qwerty`:
|
||||
@ -69,7 +70,7 @@ CREATE USER john DEFAULT ROLE role1, role2
|
||||
Создать аккаунт `john` и установить ролями по умолчанию все его будущие роли:
|
||||
|
||||
``` sql
|
||||
ALTER USER user DEFAULT ROLE ALL
|
||||
CREATE USER user DEFAULT ROLE ALL
|
||||
```
|
||||
|
||||
Когда роль будет назначена аккаунту `john`, она автоматически станет ролью по умолчанию.
|
||||
@ -77,7 +78,7 @@ ALTER USER user DEFAULT ROLE ALL
|
||||
Создать аккаунт `john` и установить ролями по умолчанию все его будущие роли, кроме `role1` и `role2`:
|
||||
|
||||
``` sql
|
||||
ALTER USER john DEFAULT ROLE ALL EXCEPT role1, role2
|
||||
CREATE USER john DEFAULT ROLE ALL EXCEPT role1, role2
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/user)
|
||||
|
@ -13,7 +13,7 @@ toc_title: INSERT INTO
|
||||
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
|
||||
```
|
||||
|
||||
Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как `APPLY`, `EXCEPT`, `REPLACE`.
|
||||
Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как `APPLY`, `EXCEPT`, `REPLACE`.
|
||||
|
||||
В качестве примера рассмотрим таблицу:
|
||||
|
||||
@ -30,13 +30,12 @@ SHOW CREATE insert_select_testtable
|
||||
`c` Int8
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY a
|
||||
SETTINGS index_granularity = 8192 │
|
||||
ORDER BY a │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
``` sql
|
||||
INSERT INTO insert_select_testtable (*) VALUES (1, 'a', 1)
|
||||
INSERT INTO insert_select_testtable (*) VALUES (1, 'a', 1)
|
||||
```
|
||||
|
||||
Если вы хотите вставить данные во все столбцы, кроме 'b', вам нужно передать столько значений, сколько столбцов вы указали в скобках:
|
||||
|
@ -25,6 +25,8 @@ toc_title: FROM
|
||||
- [Replicated](../../../engines/table-engines/mergetree-family/replication.md) варианты исполнения `MergeTree` движков.
|
||||
- [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md), и [MaterializedView](../../../engines/table-engines/special/materializedview.md), которые работают поверх других движков, если они созданы для таблиц с движками семейства `MergeTree`.
|
||||
|
||||
Теперь `SELECT` запросы с `FINAL` выполняются параллельно и, следовательно, немного быстрее. Но имеются серьезные недостатки при их использовании (смотрите ниже). Настройка [max_final_threads](../../../operations/settings/settings.md#max-final-threads) устанавливает максимальное количество потоков.
|
||||
|
||||
### Недостатки {#drawbacks}
|
||||
|
||||
Запросы, которые используют `FINAL` выполняются немного медленее, чем аналогичные запросы без него, потому что:
|
||||
|
@ -249,11 +249,9 @@ SHOW GRANTS [FOR user]
|
||||
### Синтаксис {#show-create-user-syntax}
|
||||
|
||||
``` sql
|
||||
SHOW CREATE USER [name | CURRENT_USER]
|
||||
SHOW CREATE USER [name1 [, name2 ...] | CURRENT_USER]
|
||||
```
|
||||
|
||||
|
||||
|
||||
## SHOW CREATE ROLE {#show-create-role-statement}
|
||||
|
||||
Выводит параметры, использованные при [создании роли](create/role.md#create-role-statement).
|
||||
@ -261,11 +259,9 @@ SHOW CREATE USER [name | CURRENT_USER]
|
||||
### Синтаксис {#show-create-role-syntax}
|
||||
|
||||
``` sql
|
||||
SHOW CREATE ROLE name
|
||||
SHOW CREATE ROLE name1 [, name2 ...]
|
||||
```
|
||||
|
||||
|
||||
|
||||
## SHOW CREATE ROW POLICY {#show-create-row-policy-statement}
|
||||
|
||||
Выводит параметры, использованные при [создании политики доступа к строкам](create/row-policy.md#create-row-policy-statement).
|
||||
@ -273,10 +269,9 @@ SHOW CREATE ROLE name
|
||||
### Синтаксис {#show-create-row-policy-syntax}
|
||||
|
||||
```sql
|
||||
SHOW CREATE [ROW] POLICY name ON [database.]table
|
||||
SHOW CREATE [ROW] POLICY name ON [database1.]table1 [, [database2.]table2 ...]
|
||||
```
|
||||
|
||||
|
||||
## SHOW CREATE QUOTA {#show-create-quota-statement}
|
||||
|
||||
Выводит параметры, использованные при [создании квоты](create/quota.md#create-quota-statement).
|
||||
@ -284,10 +279,9 @@ SHOW CREATE [ROW] POLICY name ON [database.]table
|
||||
### Синтаксис {#show-create-row-policy-syntax}
|
||||
|
||||
```sql
|
||||
SHOW CREATE QUOTA [name | CURRENT]
|
||||
SHOW CREATE QUOTA [name1 [, name2 ...] | CURRENT]
|
||||
```
|
||||
|
||||
|
||||
## SHOW CREATE SETTINGS PROFILE {#show-create-settings-profile-statement}
|
||||
|
||||
Выводит параметры, использованные при [создании профиля настроек](create/settings-profile.md#create-settings-profile-statement).
|
||||
@ -295,10 +289,9 @@ SHOW CREATE QUOTA [name | CURRENT]
|
||||
### Синтаксис {#show-create-row-policy-syntax}
|
||||
|
||||
```sql
|
||||
SHOW CREATE [SETTINGS] PROFILE name
|
||||
SHOW CREATE [SETTINGS] PROFILE name1 [, name2 ...]
|
||||
```
|
||||
|
||||
|
||||
## SHOW USERS {#show-users-statement}
|
||||
|
||||
Выводит список [пользовательских аккаунтов](../../operations/access-rights.md#user-account-management). Для просмотра параметров пользовательских аккаунтов, см. системную таблицу [system.users](../../operations/system-tables/users.md#system_tables-users).
|
||||
@ -359,4 +352,14 @@ SHOW QUOTAS
|
||||
SHOW [CURRENT] QUOTA
|
||||
```
|
||||
|
||||
## SHOW ACCESS {#show-access-statement}
|
||||
|
||||
Выводит список всех [пользователей](../../operations/access-rights.md#user-account-management), [ролей](../../operations/access-rights.md#role-management), [профилей](../../operations/access-rights.md#settings-profiles-management) и пр., а также все [привилегии](../../sql-reference/statements/grant.md#grant-privileges).
|
||||
|
||||
### Синтаксис {#show-access-syntax}
|
||||
|
||||
``` sql
|
||||
SHOW ACCESS
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/show/) <!--hide-->
|
||||
|
@ -118,7 +118,7 @@ for (auto & stream : streams)
|
||||
stream.second->finalize();
|
||||
```
|
||||
|
||||
**18.** 行的某尾不应该包含空格。
|
||||
**18.** 行的末尾不应该包含空格。
|
||||
|
||||
**19.** 源文件应该用 UTF-8 编码。
|
||||
|
||||
|
@ -254,7 +254,6 @@ ENGINE = MergeTree()
|
||||
PARTITION BY toYYYYMM(EventDate)
|
||||
ORDER BY (CounterID, EventDate, intHash32(UserID))
|
||||
SAMPLE BY intHash32(UserID)
|
||||
SETTINGS index_granularity = 8192
|
||||
```
|
||||
|
||||
``` sql
|
||||
@ -450,7 +449,6 @@ ENGINE = CollapsingMergeTree(Sign)
|
||||
PARTITION BY toYYYYMM(StartDate)
|
||||
ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
|
||||
SAMPLE BY intHash32(UserID)
|
||||
SETTINGS index_granularity = 8192
|
||||
```
|
||||
|
||||
您可以使用`clickhouse-client`的交互模式执行这些查询(只需在终端中启动它,而不需要提前指定查询)。或者如果你愿意,可以尝试一些[替代接口](../interfaces/index.md)。
|
||||
|
@ -25,7 +25,6 @@ CREATE TABLE insert_select_testtable
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY a
|
||||
SETTINGS index_granularity = 8192
|
||||
```
|
||||
|
||||
``` sql
|
||||
|
@ -932,6 +932,10 @@ private:
|
||||
std::cerr << "Received exception from server (version "
|
||||
<< server_version << "):" << std::endl << "Code: "
|
||||
<< server_exception->code() << ". " << text << std::endl;
|
||||
if (is_interactive)
|
||||
{
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
if (client_exception)
|
||||
@ -939,6 +943,10 @@ private:
|
||||
fmt::print(stderr,
|
||||
"Error on processing query '{}':\n{}\n",
|
||||
full_query, client_exception->message());
|
||||
if (is_interactive)
|
||||
{
|
||||
fmt::print(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
// A debug check -- at least some exception must be set, if the error
|
||||
|
@ -229,7 +229,7 @@ public:
|
||||
{
|
||||
for (const auto & x : small)
|
||||
{
|
||||
if (!rb->contains(static_cast<Value>(x.getValue())))
|
||||
if (!r1.rb->contains(static_cast<Value>(x.getValue())))
|
||||
buffer.push_back(x.getValue());
|
||||
}
|
||||
|
||||
|
@ -138,6 +138,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
|
||||
|
||||
void Connection::disconnect()
|
||||
{
|
||||
maybe_compressed_out = nullptr;
|
||||
in = nullptr;
|
||||
last_input_packet_type.reset();
|
||||
out = nullptr; // can write to socket
|
||||
|
@ -4,7 +4,6 @@
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/WeakHash.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
#include <Core/BigInt.h>
|
||||
|
||||
#include <common/unaligned.h>
|
||||
#include <common/sort.h>
|
||||
|
@ -37,33 +37,16 @@ namespace ErrorCodes
|
||||
template <typename T>
|
||||
StringRef ColumnVector<T>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
|
||||
{
|
||||
if constexpr (is_big_int_v<T>)
|
||||
{
|
||||
static constexpr size_t bytesize = BigInt<T>::size;
|
||||
char * pos = arena.allocContinue(bytesize, begin);
|
||||
return BigInt<T>::serialize(data[n], pos);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto * pos = arena.allocContinue(sizeof(T), begin);
|
||||
unalignedStore<T>(pos, data[n]);
|
||||
return StringRef(pos, sizeof(T));
|
||||
}
|
||||
auto * pos = arena.allocContinue(sizeof(T), begin);
|
||||
unalignedStore<T>(pos, data[n]);
|
||||
return StringRef(pos, sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const char * ColumnVector<T>::deserializeAndInsertFromArena(const char * pos)
|
||||
{
|
||||
if constexpr (is_big_int_v<T>)
|
||||
{
|
||||
data.emplace_back(BigInt<T>::deserialize(pos));
|
||||
return pos + BigInt<T>::size;
|
||||
}
|
||||
else
|
||||
{
|
||||
data.emplace_back(unalignedLoad<T>(pos));
|
||||
return pos + sizeof(T);
|
||||
}
|
||||
data.emplace_back(unalignedLoad<T>(pos));
|
||||
return pos + sizeof(T);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -299,18 +282,10 @@ MutableColumnPtr ColumnVector<T>::cloneResized(size_t size) const
|
||||
new_col.data.resize(size);
|
||||
|
||||
size_t count = std::min(this->size(), size);
|
||||
if constexpr (is_POD)
|
||||
{
|
||||
memcpy(new_col.data.data(), data.data(), count * sizeof(data[0]));
|
||||
memcpy(new_col.data.data(), data.data(), count * sizeof(data[0]));
|
||||
|
||||
if (size > count)
|
||||
memset(static_cast<void *>(&new_col.data[count]), static_cast<int>(ValueType()), (size - count) * sizeof(ValueType));
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < count; i++)
|
||||
new_col.data[i] = data[i];
|
||||
}
|
||||
if (size > count)
|
||||
memset(static_cast<void *>(&new_col.data[count]), static_cast<int>(ValueType()), (size - count) * sizeof(ValueType));
|
||||
}
|
||||
|
||||
return res;
|
||||
@ -348,15 +323,7 @@ void ColumnVector<T>::insertRangeFrom(const IColumn & src, size_t start, size_t
|
||||
|
||||
size_t old_size = data.size();
|
||||
data.resize(old_size + length);
|
||||
if constexpr (is_POD)
|
||||
{
|
||||
memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0]));
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < length; i++)
|
||||
data[old_size + i] = src_vec.data[start + i];
|
||||
}
|
||||
memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0]));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -372,70 +339,52 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
|
||||
if (result_size_hint)
|
||||
res_data.reserve(result_size_hint > 0 ? result_size_hint : size);
|
||||
|
||||
if constexpr (is_POD)
|
||||
{
|
||||
const UInt8 * filt_pos = filt.data();
|
||||
const UInt8 * filt_end = filt_pos + size;
|
||||
const T * data_pos = data.data();
|
||||
const UInt8 * filt_pos = filt.data();
|
||||
const UInt8 * filt_end = filt_pos + size;
|
||||
const T * data_pos = data.data();
|
||||
|
||||
#ifdef __SSE2__
|
||||
/** A slightly more optimized version.
|
||||
* Based on the assumption that often pieces of consecutive values
|
||||
* completely pass or do not pass the filter.
|
||||
* Therefore, we will optimistically check the parts of `SIMD_BYTES` values.
|
||||
*/
|
||||
/** A slightly more optimized version.
|
||||
* Based on the assumption that often pieces of consecutive values
|
||||
* completely pass or do not pass the filter.
|
||||
* Therefore, we will optimistically check the parts of `SIMD_BYTES` values.
|
||||
*/
|
||||
|
||||
static constexpr size_t SIMD_BYTES = 16;
|
||||
const __m128i zero16 = _mm_setzero_si128();
|
||||
const UInt8 * filt_end_sse = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
|
||||
static constexpr size_t SIMD_BYTES = 16;
|
||||
const __m128i zero16 = _mm_setzero_si128();
|
||||
const UInt8 * filt_end_sse = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
|
||||
|
||||
while (filt_pos < filt_end_sse)
|
||||
while (filt_pos < filt_end_sse)
|
||||
{
|
||||
int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
|
||||
|
||||
if (0 == mask)
|
||||
{
|
||||
int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
|
||||
|
||||
if (0 == mask)
|
||||
{
|
||||
/// Nothing is inserted.
|
||||
}
|
||||
else if (0xFFFF == mask)
|
||||
{
|
||||
res_data.insert(data_pos, data_pos + SIMD_BYTES);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < SIMD_BYTES; ++i)
|
||||
if (filt_pos[i])
|
||||
res_data.push_back(data_pos[i]);
|
||||
}
|
||||
|
||||
filt_pos += SIMD_BYTES;
|
||||
data_pos += SIMD_BYTES;
|
||||
/// Nothing is inserted.
|
||||
}
|
||||
else if (0xFFFF == mask)
|
||||
{
|
||||
res_data.insert(data_pos, data_pos + SIMD_BYTES);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < SIMD_BYTES; ++i)
|
||||
if (filt_pos[i])
|
||||
res_data.push_back(data_pos[i]);
|
||||
}
|
||||
|
||||
filt_pos += SIMD_BYTES;
|
||||
data_pos += SIMD_BYTES;
|
||||
}
|
||||
#endif
|
||||
|
||||
while (filt_pos < filt_end)
|
||||
{
|
||||
if (*filt_pos)
|
||||
res_data.push_back(*data_pos);
|
||||
|
||||
++filt_pos;
|
||||
++data_pos;
|
||||
}
|
||||
}
|
||||
else
|
||||
while (filt_pos < filt_end)
|
||||
{
|
||||
const auto * filt_pos = filt.begin();
|
||||
const auto * filt_end = filt.end();
|
||||
auto data_pos = data.begin();
|
||||
if (*filt_pos)
|
||||
res_data.push_back(*data_pos);
|
||||
|
||||
while (filt_pos < filt_end)
|
||||
{
|
||||
if (*filt_pos)
|
||||
res_data.push_back(*data_pos);
|
||||
|
||||
++filt_pos;
|
||||
++data_pos;
|
||||
}
|
||||
++filt_pos;
|
||||
++data_pos;
|
||||
}
|
||||
|
||||
return res;
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <Columns/ColumnVectorHelper.h>
|
||||
#include <common/unaligned.h>
|
||||
#include <Core/Field.h>
|
||||
#include <Core/BigInt.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
|
||||
@ -107,10 +106,7 @@ private:
|
||||
|
||||
public:
|
||||
using ValueType = T;
|
||||
static constexpr bool is_POD = !is_big_int_v<T>;
|
||||
using Container = std::conditional_t<is_POD,
|
||||
PaddedPODArray<ValueType>,
|
||||
std::vector<ValueType>>;
|
||||
using Container = PaddedPODArray<ValueType>;
|
||||
|
||||
private:
|
||||
ColumnVector() {}
|
||||
@ -136,10 +132,7 @@ public:
|
||||
|
||||
void insertData(const char * pos, size_t) override
|
||||
{
|
||||
if constexpr (is_POD)
|
||||
data.emplace_back(unalignedLoad<T>(pos));
|
||||
else
|
||||
data.emplace_back(BigInt<T>::deserialize(pos));
|
||||
data.emplace_back(unalignedLoad<T>(pos));
|
||||
}
|
||||
|
||||
void insertDefault() override
|
||||
@ -149,18 +142,12 @@ public:
|
||||
|
||||
void insertManyDefaults(size_t length) override
|
||||
{
|
||||
if constexpr (is_POD)
|
||||
data.resize_fill(data.size() + length, T());
|
||||
else
|
||||
data.resize(data.size() + length, T());
|
||||
data.resize_fill(data.size() + length, T());
|
||||
}
|
||||
|
||||
void popBack(size_t n) override
|
||||
{
|
||||
if constexpr (is_POD)
|
||||
data.resize_assume_reserved(data.size() - n);
|
||||
else
|
||||
data.resize(data.size() - n);
|
||||
data.resize_assume_reserved(data.size() - n);
|
||||
}
|
||||
|
||||
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
|
||||
@ -185,16 +172,12 @@ public:
|
||||
|
||||
size_t allocatedBytes() const override
|
||||
{
|
||||
if constexpr (is_POD)
|
||||
return data.allocated_bytes();
|
||||
else
|
||||
return data.capacity() * sizeof(data[0]);
|
||||
return data.allocated_bytes();
|
||||
}
|
||||
|
||||
void protect() override
|
||||
{
|
||||
if constexpr (is_POD)
|
||||
data.protect();
|
||||
data.protect();
|
||||
}
|
||||
|
||||
void insertValue(const T value)
|
||||
|
@ -1,41 +0,0 @@
|
||||
#include <Common/DirectorySyncGuard.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <fcntl.h> // O_RDWR
|
||||
|
||||
/// OSX does not have O_DIRECTORY
|
||||
#ifndef O_DIRECTORY
|
||||
#define O_DIRECTORY O_RDWR
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_FSYNC;
|
||||
}
|
||||
|
||||
DirectorySyncGuard::DirectorySyncGuard(const DiskPtr & disk_, const String & path)
|
||||
: disk(disk_)
|
||||
, fd(disk_->open(path, O_DIRECTORY))
|
||||
{}
|
||||
|
||||
DirectorySyncGuard::~DirectorySyncGuard()
|
||||
{
|
||||
try
|
||||
{
|
||||
#if defined(OS_DARWIN)
|
||||
if (fcntl(fd, F_FULLFSYNC, 0))
|
||||
throwFromErrno("Cannot fcntl(F_FULLFSYNC)", ErrorCodes::CANNOT_FSYNC);
|
||||
#endif
|
||||
disk->sync(fd);
|
||||
disk->close(fd);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -235,7 +235,7 @@ public:
|
||||
else if constexpr (std::is_same_v<T, UInt128>)
|
||||
throw Exception("No conversion to old UInt128 from " + demangle(typeid(U).name()), ErrorCodes::NOT_IMPLEMENTED);
|
||||
else
|
||||
return bigint_cast<T>(x);
|
||||
return static_cast<T>(x);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1,7 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/types.h>
|
||||
#include <Core/BigInt.h>
|
||||
#include <Common/UInt128.h>
|
||||
#include <common/unaligned.h>
|
||||
|
||||
|
@ -18,7 +18,7 @@
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <Core/Defines.h>
|
||||
#include <Core/BigInt.h>
|
||||
|
||||
|
||||
#define ROTL(x, b) static_cast<UInt64>(((x) << (b)) | ((x) >> (64 - (b))))
|
||||
|
||||
@ -136,23 +136,11 @@ public:
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::enable_if_t<std::has_unique_object_representations_v<T>, void> update(const T & x)
|
||||
void update(const T & x)
|
||||
{
|
||||
update(reinterpret_cast<const char *>(&x), sizeof(x));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::enable_if_t<(std::is_floating_point_v<T> || std::is_same_v<T, CityHash_v1_0_2::uint128>), void> update(const T & x)
|
||||
{
|
||||
update(reinterpret_cast<const char *>(&x), sizeof(x));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::enable_if_t<is_big_int_v<T> && !std::has_unique_object_representations_v<T>, void> update(const T & x)
|
||||
{
|
||||
update(DB::BigInt<T>::serialize(x));
|
||||
}
|
||||
|
||||
void update(const std::string & x)
|
||||
{
|
||||
update(x.data(), x.length());
|
||||
@ -205,27 +193,13 @@ inline UInt64 sipHash64(const char * data, const size_t size)
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::enable_if_t<std::has_unique_object_representations_v<T>, UInt64> sipHash64(const T & x)
|
||||
UInt64 sipHash64(const T & x)
|
||||
{
|
||||
SipHash hash;
|
||||
hash.update(x);
|
||||
return hash.get64();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::enable_if_t<(std::is_floating_point_v<T> || (is_big_int_v<T> && !std::has_unique_object_representations_v<T>)), UInt64> sipHash64(const T & x)
|
||||
{
|
||||
SipHash hash;
|
||||
hash.update(x);
|
||||
return hash.get64();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::enable_if_t<DB::IsDecimalNumber<T>, UInt64> sipHash64(const T & x)
|
||||
{
|
||||
return sipHash64(x.value);
|
||||
}
|
||||
|
||||
inline UInt64 sipHash64(const std::string & s)
|
||||
{
|
||||
return sipHash64(s.data(), s.size());
|
||||
|
@ -18,30 +18,30 @@ namespace zkutil
|
||||
void TestKeeperStorageDispatcher::processingThread()
|
||||
{
|
||||
setThreadName("TestKeeperSProc");
|
||||
try
|
||||
|
||||
while (!shutdown)
|
||||
{
|
||||
while (!shutdown)
|
||||
RequestInfo info;
|
||||
|
||||
UInt64 max_wait = UInt64(operation_timeout.totalMilliseconds());
|
||||
|
||||
if (requests_queue.tryPop(info, max_wait))
|
||||
{
|
||||
RequestInfo info;
|
||||
if (shutdown)
|
||||
break;
|
||||
|
||||
UInt64 max_wait = UInt64(operation_timeout.totalMilliseconds());
|
||||
|
||||
if (requests_queue.tryPop(info, max_wait))
|
||||
try
|
||||
{
|
||||
if (shutdown)
|
||||
break;
|
||||
|
||||
auto responses = storage.processRequest(info.request, info.session_id);
|
||||
for (const auto & response_for_session : responses)
|
||||
setResponse(response_for_session.session_id, response_for_session.response);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
finalize();
|
||||
}
|
||||
}
|
||||
|
||||
void TestKeeperStorageDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response)
|
||||
|
@ -37,7 +37,6 @@ SRCS(
|
||||
CurrentMetrics.cpp
|
||||
CurrentThread.cpp
|
||||
DNSResolver.cpp
|
||||
DirectorySyncGuard.cpp
|
||||
Dwarf.cpp
|
||||
Elf.cpp
|
||||
ErrorCodes.cpp
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/types.h>
|
||||
#include <Core/Types.h>
|
||||
#include <Compression/ICompressionCodec.h>
|
||||
|
||||
|
||||
|
@ -93,7 +93,7 @@ using bool_if_gt_int_vs_uint = std::enable_if_t<is_gt_int_vs_uint<TInt, TUInt>,
|
||||
template <typename TInt, typename TUInt>
|
||||
inline bool_if_gt_int_vs_uint<TInt, TUInt> greaterOpTmpl(TInt a, TUInt b)
|
||||
{
|
||||
return bigint_cast<TInt>(a) > bigint_cast<TInt>(b);
|
||||
return static_cast<TInt>(a) > static_cast<TInt>(b);
|
||||
}
|
||||
|
||||
template <typename TInt, typename TUInt>
|
||||
@ -101,19 +101,19 @@ inline bool_if_gt_int_vs_uint<TInt, TUInt> greaterOpTmpl(TUInt a, TInt b)
|
||||
{
|
||||
using CastA = std::conditional_t<is_big_int_v<TInt> && std::is_same_v<TUInt, DB::UInt128>, DB::UInt256, TInt>;
|
||||
|
||||
return bigint_cast<CastA>(a) > b;
|
||||
return static_cast<CastA>(a) > b;
|
||||
}
|
||||
|
||||
template <typename TInt, typename TUInt>
|
||||
inline bool_if_gt_int_vs_uint<TInt, TUInt> equalsOpTmpl(TInt a, TUInt b)
|
||||
{
|
||||
return bigint_cast<TInt>(a) == bigint_cast<TInt>(b);
|
||||
return static_cast<TInt>(a) == static_cast<TInt>(b);
|
||||
}
|
||||
|
||||
template <typename TInt, typename TUInt>
|
||||
inline bool_if_gt_int_vs_uint<TInt, TUInt> equalsOpTmpl(TUInt a, TInt b)
|
||||
{
|
||||
return bigint_cast<TInt>(a) == bigint_cast<TInt>(b);
|
||||
return static_cast<TInt>(a) == static_cast<TInt>(b);
|
||||
}
|
||||
|
||||
|
||||
@ -196,7 +196,7 @@ inline bool_if_safe_conversion<A, B> greaterOp(A a, B b)
|
||||
using CastB = std::conditional_t<is_big_int_v<A> && std::is_same_v<B, DB::UInt128>, A, CastB1>;
|
||||
|
||||
if constexpr (is_big_int_v<A> || is_big_int_v<B>)
|
||||
return bigint_cast<CastA>(a) > bigint_cast<CastB>(b);
|
||||
return static_cast<CastA>(a) > static_cast<CastB>(b);
|
||||
else
|
||||
return a > b;
|
||||
}
|
||||
@ -306,7 +306,7 @@ inline bool_if_safe_conversion<A, B> equalsOp(A a, B b)
|
||||
{
|
||||
using LargestType = std::conditional_t<(sizeof(A) > sizeof(B)) || ((sizeof(A) == sizeof(B)) && !std::is_same_v<A, DB::UInt128>), A, B>;
|
||||
|
||||
return bigint_cast<LargestType>(a) == bigint_cast<LargestType>(b);
|
||||
return static_cast<LargestType>(a) == static_cast<LargestType>(b);
|
||||
}
|
||||
|
||||
template <>
|
||||
@ -429,7 +429,7 @@ inline bool_if_safe_conversion<A, B> notEqualsOp(A a, B b)
|
||||
using CastB = std::conditional_t<is_big_int_v<A> && std::is_same_v<B, DB::UInt128>, A, CastB1>;
|
||||
|
||||
if constexpr (is_big_int_v<A> || is_big_int_v<B>)
|
||||
return bigint_cast<CastA>(a) != bigint_cast<CastB>(b);
|
||||
return static_cast<CastA>(a) != static_cast<CastB>(b);
|
||||
else
|
||||
return a != b;
|
||||
}
|
||||
@ -451,7 +451,7 @@ inline bool_if_safe_conversion<A, B> lessOp(A a, B b)
|
||||
using CastB = std::conditional_t<is_big_int_v<A> && std::is_same_v<B, DB::UInt128>, A, CastB1>;
|
||||
|
||||
if constexpr (is_big_int_v<A> || is_big_int_v<B>)
|
||||
return bigint_cast<CastA>(a) < bigint_cast<CastB>(b);
|
||||
return static_cast<CastA>(a) < static_cast<CastB>(b);
|
||||
else
|
||||
return a < b;
|
||||
}
|
||||
@ -475,7 +475,7 @@ inline bool_if_safe_conversion<A, B> lessOrEqualsOp(A a, B b)
|
||||
using CastB = std::conditional_t<is_big_int_v<A> && std::is_same_v<B, DB::UInt128>, A, CastB1>;
|
||||
|
||||
if constexpr (is_big_int_v<A> || is_big_int_v<B>)
|
||||
return bigint_cast<CastA>(a) <= bigint_cast<CastB>(b);
|
||||
return static_cast<CastA>(a) <= static_cast<CastB>(b);
|
||||
else
|
||||
return a <= b;
|
||||
}
|
||||
@ -499,7 +499,7 @@ inline bool_if_safe_conversion<A, B> greaterOrEqualsOp(A a, B b)
|
||||
using CastB = std::conditional_t<is_big_int_v<A> && std::is_same_v<B, DB::UInt128>, A, CastB1>;
|
||||
|
||||
if constexpr (is_big_int_v<A> || is_big_int_v<B>)
|
||||
return bigint_cast<CastA>(a) >= bigint_cast<CastB>(b);
|
||||
return static_cast<CastA>(a) >= static_cast<CastB>(b);
|
||||
else
|
||||
return a >= b;
|
||||
}
|
||||
|
@ -1,36 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/StringRef.h>
|
||||
#include <common/unaligned.h>
|
||||
#include <Core/Types.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
struct BigInt
|
||||
{
|
||||
static_assert(sizeof(T) == 32);
|
||||
static constexpr size_t size = 32;
|
||||
|
||||
static StringRef serialize(const T & x, char * pos)
|
||||
{
|
||||
unalignedStore<T>(pos, x);
|
||||
return StringRef(pos, size);
|
||||
}
|
||||
|
||||
static String serialize(const T & x)
|
||||
{
|
||||
String str(size, '\0');
|
||||
serialize(x, str.data());
|
||||
return str;
|
||||
}
|
||||
|
||||
static T deserialize(const char * pos)
|
||||
{
|
||||
return unalignedLoad<T>(pos);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -163,6 +163,7 @@ private:
|
||||
friend class ActionsDAG;
|
||||
};
|
||||
|
||||
using BlockPtr = std::shared_ptr<Block>;
|
||||
using Blocks = std::vector<Block>;
|
||||
using BlocksList = std::list<Block>;
|
||||
using BlocksPtr = std::shared_ptr<Blocks>;
|
||||
|
@ -233,9 +233,9 @@ private:
|
||||
bool overflow = false;
|
||||
|
||||
if constexpr (sizeof(A) > sizeof(CompareInt))
|
||||
overflow |= (bigint_cast<A>(x) != a);
|
||||
overflow |= (static_cast<A>(x) != a);
|
||||
if constexpr (sizeof(B) > sizeof(CompareInt))
|
||||
overflow |= (bigint_cast<B>(y) != b);
|
||||
overflow |= (static_cast<B>(y) != b);
|
||||
if constexpr (is_unsigned_v<A>)
|
||||
overflow |= (x < 0);
|
||||
if constexpr (is_unsigned_v<B>)
|
||||
|
@ -139,6 +139,7 @@ class IColumn;
|
||||
\
|
||||
M(UInt64, min_bytes_to_use_direct_io, 0, "The minimum number of bytes for reading the data with O_DIRECT option during SELECT queries execution. 0 - disabled.", 0) \
|
||||
M(UInt64, min_bytes_to_use_mmap_io, 0, "The minimum number of bytes for reading the data with mmap option during SELECT queries execution. 0 - disabled.", 0) \
|
||||
M(Bool, checksum_on_read, true, "Validate checksums on reading. It is enabled by default and should be always enabled in production. Please do not expect any benefits in disabling this setting. It may only be used for experiments and benchmarks. The setting only applicable for tables of MergeTree family. Checksums are always validated for other table engines and when receiving data over network.", 0) \
|
||||
\
|
||||
M(Bool, force_index_by_date, 0, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \
|
||||
M(Bool, force_primary_key, 0, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
|
||||
@ -326,6 +327,7 @@ class IColumn;
|
||||
M(Bool, log_profile_events, true, "Log query performance statistics into the query_log and query_thread_log.", 0) \
|
||||
M(Bool, log_query_settings, true, "Log query settings into the query_log.", 0) \
|
||||
M(Bool, log_query_threads, true, "Log query threads into system.query_thread_log table. This setting have effect only when 'log_queries' is true.", 0) \
|
||||
M(String, log_comment, "", "Log comment into system.query_log table and server log. It can be set to arbitrary string no longer than max_query_size.", 0) \
|
||||
M(LogsLevel, send_logs_level, LogsLevel::fatal, "Send server text logs with specified minimum level to client. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \
|
||||
M(Bool, enable_optimize_predicate_expression, 1, "If it is set to true, optimize predicates to subqueries.", 0) \
|
||||
M(Bool, enable_optimize_predicate_expression_to_final_subquery, 1, "Allow push predicate to final subquery.", 0) \
|
||||
@ -404,7 +406,7 @@ class IColumn;
|
||||
M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
|
||||
M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \
|
||||
M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
|
||||
M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) \
|
||||
M(Bool, enable_global_with_statement, true, "Propagate WITH statements to UNION queries and all subqueries", 0) \
|
||||
M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \
|
||||
M(Bool, flatten_nested, true, "If true, columns of type Nested will be flatten to separate array columns instead of one array of tuples", 0) \
|
||||
M(Bool, asterisk_include_materialized_columns, false, "Include MATERIALIZED columns for wildcard query", 0) \
|
||||
|
@ -158,7 +158,7 @@ struct Decimal
|
||||
return convertTo<typename U::NativeType>();
|
||||
}
|
||||
else
|
||||
return bigint_cast<U>(value);
|
||||
return static_cast<U>(value);
|
||||
}
|
||||
|
||||
const Decimal<T> & operator += (const T & x) { value += x; return *this; }
|
||||
|
@ -29,8 +29,14 @@ namespace ErrorCodes
|
||||
|
||||
DataTypePtr DataTypeFactory::get(const String & full_name) const
|
||||
{
|
||||
/// Data type parser can be invoked from coroutines with small stack.
|
||||
/// Value 315 is known to cause stack overflow in some test configurations (debug build, sanitizers)
|
||||
/// let's make the threshold significantly lower.
|
||||
/// It is impractical for user to have complex data types with this depth.
|
||||
static constexpr size_t data_type_max_parse_depth = 200;
|
||||
|
||||
ParserDataType parser;
|
||||
ASTPtr ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
ASTPtr ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", 0, data_type_max_parse_depth);
|
||||
return get(ast);
|
||||
}
|
||||
|
||||
|
@ -885,15 +885,17 @@ MutableColumnUniquePtr DataTypeLowCardinality::createColumnUniqueImpl(const IDat
|
||||
if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(&keys_type))
|
||||
type = nullable_type->getNestedType().get();
|
||||
|
||||
if (isString(type))
|
||||
WhichDataType which(type);
|
||||
|
||||
if (which.isString())
|
||||
return creator(static_cast<ColumnString *>(nullptr));
|
||||
if (isFixedString(type))
|
||||
else if (which.isFixedString())
|
||||
return creator(static_cast<ColumnFixedString *>(nullptr));
|
||||
if (typeid_cast<const DataTypeDate *>(type))
|
||||
else if (which.isDate())
|
||||
return creator(static_cast<ColumnVector<UInt16> *>(nullptr));
|
||||
if (typeid_cast<const DataTypeDateTime *>(type))
|
||||
else if (which.isDateTime())
|
||||
return creator(static_cast<ColumnVector<UInt32> *>(nullptr));
|
||||
if (isColumnedAsNumber(type))
|
||||
else if (which.isInt() || which.isUInt() || which.isFloat())
|
||||
{
|
||||
MutableColumnUniquePtr column;
|
||||
TypeListNativeNumbers::forEach(CreateColumnVector(column, *type, creator));
|
||||
|
@ -31,6 +31,7 @@ public:
|
||||
|
||||
bool canBeUsedInBitOperations() const override { return true; }
|
||||
bool canBeInsideNullable() const override { return true; }
|
||||
bool canBeInsideLowCardinality() const override { return false; }
|
||||
|
||||
bool canBePromoted() const override { return false; }
|
||||
};
|
||||
|
@ -597,6 +597,7 @@ inline bool isEnum(const DataTypePtr & data_type) { return WhichDataType(data_ty
|
||||
inline bool isDecimal(const DataTypePtr & data_type) { return WhichDataType(data_type).isDecimal(); }
|
||||
inline bool isTuple(const DataTypePtr & data_type) { return WhichDataType(data_type).isTuple(); }
|
||||
inline bool isArray(const DataTypePtr & data_type) { return WhichDataType(data_type).isArray(); }
|
||||
inline bool isMap(const DataTypePtr & data_type) {return WhichDataType(data_type).isMap(); }
|
||||
|
||||
template <typename T>
|
||||
inline bool isUInt8(const T & data_type)
|
||||
|
@ -218,7 +218,7 @@ using ResultOfGreatest = std::conditional_t<LeastGreatestSpecialCase<A, B>,
|
||||
template <typename T>
|
||||
static inline auto littleBits(const T & x)
|
||||
{
|
||||
return bigint_cast<UInt8>(x);
|
||||
return static_cast<UInt8>(x);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -38,10 +38,15 @@ DataTypePtr convertMySQLDataType(MultiEnum<MySQLDataTypesSupport> type_support,
|
||||
size_t precision,
|
||||
size_t scale)
|
||||
{
|
||||
// we expect mysql_data_type to be either "basic_type" or "type_with_params(param1, param2, ...)"
|
||||
// Mysql returns mysql_data_type as below:
|
||||
// 1. basic_type
|
||||
// 2. basic_type options
|
||||
// 3. type_with_params(param1, param2, ...)
|
||||
// 4. type_with_params(param1, param2, ...) options
|
||||
// The options can be unsigned, zerofill, or some other strings.
|
||||
auto data_type = std::string_view(mysql_data_type);
|
||||
const auto param_start_pos = data_type.find('(');
|
||||
const auto type_name = data_type.substr(0, param_start_pos);
|
||||
const auto type_end_pos = data_type.find_first_of(R"(( )"); // FIXME: fix style-check script instead
|
||||
const auto type_name = data_type.substr(0, type_end_pos);
|
||||
|
||||
DataTypePtr res;
|
||||
|
||||
|
@ -13,11 +13,13 @@
|
||||
#include <IO/WriteBufferFromOStream.h>
|
||||
#include <ext/range.h>
|
||||
#include <ext/size.h>
|
||||
#include <ext/map.h>
|
||||
#include <ext/chrono_io.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include "CacheDictionary.inc.h"
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -130,8 +132,8 @@ const IDictionarySource * CacheDictionary::getSource() const
|
||||
void CacheDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
|
||||
{
|
||||
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_value);
|
||||
|
||||
getItemsNumberImpl<UInt64, UInt64>(*hierarchical_attribute, ids, out, [&](const size_t) { return null_value; });
|
||||
DictionaryDefaultValueExtractor<UInt64> default_value_extractor(null_value);
|
||||
getItemsNumberImpl<UInt64, UInt64>(*hierarchical_attribute, ids, out, default_value_extractor);
|
||||
}
|
||||
|
||||
|
||||
@ -249,34 +251,384 @@ void CacheDictionary::isInConstantVector(const Key child_id, const PaddedPODArra
|
||||
out[i] = std::find(ancestors.begin(), ancestors.end(), ancestor_ids[i]) != ancestors.end();
|
||||
}
|
||||
|
||||
void CacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
|
||||
ColumnPtr CacheDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
ColumnPtr result;
|
||||
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto & keys = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
auto keys_size = keys.size();
|
||||
|
||||
auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
const auto null_value = StringRef{std::get<String>(attribute.null_value)};
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
getItemsString(attribute, ids, out, [&](const size_t) { return null_value; });
|
||||
const auto & null_value = std::get<AttributeType>(attribute.null_value);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
getItemsString(attribute, keys, column.get(), default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
getItemsNumberImpl<AttributeType, AttributeType>(attribute, keys, out, default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void CacheDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
|
||||
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
|
||||
void CacheDictionary::getItemsNumberImpl(
|
||||
Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ResultArrayType<OutputType> & out,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
/// First fill everything with default values
|
||||
const auto rows = ext::size(ids);
|
||||
for (const auto row : ext::range(0, rows))
|
||||
out[row] = default_value_extractor[row];
|
||||
|
||||
getItemsString(attribute, ids, out, [&](const size_t row) { return def->getDataAt(row); });
|
||||
/// Maybe there are duplicate keys, so we remember their indices.
|
||||
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
|
||||
|
||||
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
|
||||
|
||||
size_t cache_hit = 0;
|
||||
size_t cache_not_found_count = 0;
|
||||
size_t cache_expired_cound = 0;
|
||||
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
auto insert_to_answer_routine = [&](size_t row, size_t idx)
|
||||
{
|
||||
auto & cell = cells[idx];
|
||||
if (!cell.isDefault())
|
||||
out[row] = static_cast<OutputType>(attribute_array[idx]);
|
||||
};
|
||||
|
||||
/// fetch up-to-date values, decide which ones require update
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
|
||||
/** cell should be updated if either:
|
||||
* 1. ids do not match,
|
||||
* 2. cell has expired,
|
||||
* 3. explicit defaults were specified and cell was set default. */
|
||||
|
||||
const auto [cell_idx, state] = findCellIdxForGet(id, now);
|
||||
|
||||
if (state == ResultState::FoundAndValid)
|
||||
{
|
||||
++cache_hit;
|
||||
insert_to_answer_routine(row, cell_idx);
|
||||
}
|
||||
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
|
||||
{
|
||||
++cache_not_found_count;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
}
|
||||
else if (state == ResultState::FoundButExpired)
|
||||
{
|
||||
cache_expired_cound++;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
|
||||
if (allow_read_expired_keys)
|
||||
insert_to_answer_routine(row, cell_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_cound);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows - cache_not_found_count - cache_expired_cound, std::memory_order_release);
|
||||
|
||||
if (!cache_not_found_count)
|
||||
{
|
||||
/// Nothing to update - return
|
||||
if (!cache_expired_cound)
|
||||
return;
|
||||
|
||||
/// Update async only if allow_read_expired_keys_is_enabledadd condvar usage and better code
|
||||
if (allow_read_expired_keys)
|
||||
{
|
||||
std::vector<Key> required_expired_ids;
|
||||
required_expired_ids.reserve(cache_expired_cound);
|
||||
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
/// request new values
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
|
||||
/// Nothing to do - return
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/// From this point we have to update all keys sync.
|
||||
/// Maybe allow_read_expired_keys_from_cache_dictionary is disabled
|
||||
/// and there no cache_not_found_ids but some cache_expired.
|
||||
|
||||
std::vector<Key> required_ids;
|
||||
required_ids.reserve(cache_not_found_count + cache_expired_cound);
|
||||
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
/// Request new values
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
waitForCurrentUpdateFinish(update_unit_ptr);
|
||||
|
||||
/// Add updated keys to answer.
|
||||
|
||||
const size_t attribute_index = getAttributeIndex(attribute.name);
|
||||
|
||||
for (auto & [key, value] : update_unit_ptr->found_ids)
|
||||
{
|
||||
if (value.found)
|
||||
{
|
||||
for (const size_t row : cache_expired_or_not_found_ids[key])
|
||||
out[row] = std::get<OutputType>(value.values[attribute_index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CacheDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
|
||||
void CacheDictionary::getItemsString(
|
||||
Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ColumnString * out,
|
||||
DictionaryDefaultValueExtractor<String> & default_value_extractor) const
|
||||
{
|
||||
auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto rows = ext::size(ids);
|
||||
|
||||
getItemsString(attribute, ids, out, [&](const size_t) { return StringRef{def}; });
|
||||
/// Save on some allocations.
|
||||
out->getOffsets().reserve(rows);
|
||||
|
||||
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
|
||||
|
||||
auto found_outdated_values = false;
|
||||
|
||||
/// Perform optimistic version, fallback to pessimistic if failed.
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
/// Fetch up-to-date values, discard on fail.
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
const auto [cell_idx, state] = findCellIdxForGet(id, now);
|
||||
|
||||
if (state == ResultState::FoundAndValid)
|
||||
{
|
||||
auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
}
|
||||
else
|
||||
{
|
||||
found_outdated_values = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Optimistic code completed successfully.
|
||||
if (!found_outdated_values)
|
||||
{
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows, std::memory_order_release);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, ids.size());
|
||||
return;
|
||||
}
|
||||
|
||||
/// Now onto the pessimistic one, discard possible partial results from the optimistic path.
|
||||
out->getChars().resize_assume_reserved(0);
|
||||
out->getOffsets().resize_assume_reserved(0);
|
||||
|
||||
/// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
|
||||
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
|
||||
/// we are going to store every string separately
|
||||
std::unordered_map<Key, String> local_cache;
|
||||
|
||||
size_t cache_not_found_count = 0;
|
||||
size_t cache_expired_count = 0;
|
||||
|
||||
size_t total_length = 0;
|
||||
size_t cache_hit = 0;
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
auto insert_value_routine = [&](size_t row, size_t id, size_t cell_idx)
|
||||
{
|
||||
const auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
|
||||
|
||||
/// Do not store default, but count it in total length.
|
||||
if (!cell.isDefault())
|
||||
local_cache[id] = String{string_ref};
|
||||
|
||||
total_length += string_ref.size + 1;
|
||||
};
|
||||
|
||||
for (const auto row : ext::range(0, ids.size()))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
const auto [cell_idx, state] = findCellIdxForGet(id, now);
|
||||
|
||||
if (state == ResultState::FoundAndValid)
|
||||
{
|
||||
++cache_hit;
|
||||
insert_value_routine(row, id, cell_idx);
|
||||
}
|
||||
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
|
||||
{
|
||||
++cache_not_found_count;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
}
|
||||
else if (state == ResultState::FoundButExpired)
|
||||
{
|
||||
++cache_expired_count;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
|
||||
if (allow_read_expired_keys)
|
||||
insert_value_routine(row, id, cell_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_count);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows - cache_expired_count - cache_not_found_count, std::memory_order_release);
|
||||
|
||||
/// Async update of expired keys.
|
||||
if (!cache_not_found_count)
|
||||
{
|
||||
if (allow_read_expired_keys && cache_expired_count)
|
||||
{
|
||||
std::vector<Key> required_expired_ids;
|
||||
required_expired_ids.reserve(cache_expired_count);
|
||||
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
|
||||
/// Insert all found keys and defaults to output array.
|
||||
out->getChars().reserve(total_length);
|
||||
|
||||
for (const auto row : ext::range(0, ext::size(ids)))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
StringRef value;
|
||||
|
||||
/// Previously we stored found keys in map.
|
||||
const auto it = local_cache.find(id);
|
||||
if (it != local_cache.end())
|
||||
value = StringRef(it->second);
|
||||
else
|
||||
value = default_value_extractor[row];
|
||||
|
||||
out->insertData(value.data, value.size);
|
||||
}
|
||||
|
||||
/// Nothing to do else.
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/// We will request both cache_not_found_ids and cache_expired_ids sync.
|
||||
std::vector<Key> required_ids;
|
||||
required_ids.reserve(cache_not_found_count + cache_expired_count);
|
||||
std::transform(
|
||||
std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
waitForCurrentUpdateFinish(update_unit_ptr);
|
||||
|
||||
const size_t attribute_index = getAttributeIndex(attribute.name);
|
||||
|
||||
/// Only calculate the total length.
|
||||
for (auto & [key, value] : update_unit_ptr->found_ids)
|
||||
{
|
||||
if (value.found)
|
||||
{
|
||||
const auto found_value_ref = std::get<String>(value.values[attribute_index]);
|
||||
total_length += (found_value_ref.size() + 1) * cache_expired_or_not_found_ids[key].size();
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto row : cache_expired_or_not_found_ids[key])
|
||||
total_length += default_value_extractor[row].size + 1;
|
||||
}
|
||||
}
|
||||
|
||||
out->getChars().reserve(total_length);
|
||||
|
||||
for (const auto row : ext::range(0, ext::size(ids)))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
StringRef value;
|
||||
|
||||
/// We have two maps: found in cache and found in source.
|
||||
const auto local_it = local_cache.find(id);
|
||||
if (local_it != local_cache.end())
|
||||
value = StringRef(local_it->second);
|
||||
else
|
||||
{
|
||||
const auto found_it = update_unit_ptr->found_ids.find(id);
|
||||
|
||||
/// Previously we didn't store defaults in local cache.
|
||||
if (found_it != update_unit_ptr->found_ids.end() && found_it->second.found)
|
||||
value = std::get<String>(found_it->second.values[attribute_index]);
|
||||
else
|
||||
value = default_value_extractor[row];
|
||||
}
|
||||
|
||||
out->insertData(value.data, value.size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class... Ts>
|
||||
struct Overloaded : Ts... {using Ts::operator()...;};
|
||||
|
||||
@ -375,8 +727,14 @@ size_t CacheDictionary::findCellIdxForSet(const Key & id) const
|
||||
return oldest_id;
|
||||
}
|
||||
|
||||
void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
|
||||
ColumnUInt8::Ptr CacheDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
|
||||
{
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
|
||||
auto result = ColumnUInt8::create(ext::size(ids));
|
||||
auto& out = result->getData();
|
||||
|
||||
/// There are three types of ids.
|
||||
/// - Valid ids. These ids are presented in local cache and their lifetime is not expired.
|
||||
/// - CacheExpired ids. Ids that are in local cache, but their values are rotted (lifetime is expired).
|
||||
@ -444,7 +802,7 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
|
||||
{
|
||||
/// Nothing to update - return;
|
||||
if (!cache_expired_count)
|
||||
return;
|
||||
return result;
|
||||
|
||||
if (allow_read_expired_keys)
|
||||
{
|
||||
@ -458,7 +816,7 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
/// Update is async - no need to wait.
|
||||
return;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
@ -483,6 +841,8 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
|
||||
for (const auto row : cache_expired_or_not_found_ids[key])
|
||||
out[row] = true;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@ -707,7 +1067,7 @@ PaddedPODArray<CacheDictionary::Key> CacheDictionary::getCachedIds() const
|
||||
|
||||
BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<CacheDictionary, Key>;
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<Key>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getCachedIds(), column_names);
|
||||
}
|
||||
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
@ -119,77 +120,20 @@ public:
|
||||
|
||||
std::exception_ptr getLastException() const override;
|
||||
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
|
||||
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void
|
||||
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
|
||||
const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
|
||||
|
||||
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
private:
|
||||
@ -260,12 +204,18 @@ private:
|
||||
/* NOLINTNEXTLINE(readability-convert-member-functions-to-static) */
|
||||
Attribute createAttributeWithTypeAndName(const AttributeUnderlyingType type, const String & name, const Field & null_value);
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
|
||||
void getItemsNumberImpl(
|
||||
Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
|
||||
Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ResultArrayType<OutputType> & out,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename DefaultGetter>
|
||||
void getItemsString(Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const;
|
||||
void getItemsString(
|
||||
Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ColumnString * out,
|
||||
DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
|
||||
|
||||
PaddedPODArray<Key> getCachedIds() const;
|
||||
|
||||
@ -456,5 +406,6 @@ private:
|
||||
mutable std::condition_variable is_update_finished;
|
||||
|
||||
std::atomic<bool> finished{false};
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,368 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include "CacheDictionary.h"
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Common/ProfilingScopedRWLock.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <ext/chrono_io.h>
|
||||
#include <ext/map.h>
|
||||
#include <ext/range.h>
|
||||
#include <ext/size.h>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event DictCacheKeysRequested;
|
||||
extern const Event DictCacheKeysRequestedMiss;
|
||||
extern const Event DictCacheKeysRequestedFound;
|
||||
extern const Event DictCacheKeysExpired;
|
||||
extern const Event DictCacheKeysNotFound;
|
||||
extern const Event DictCacheKeysHit;
|
||||
extern const Event DictCacheRequestTimeNs;
|
||||
extern const Event DictCacheRequests;
|
||||
extern const Event DictCacheLockWriteNs;
|
||||
extern const Event DictCacheLockReadNs;
|
||||
}
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric DictCacheRequests;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultGetter>
|
||||
void CacheDictionary::getItemsNumberImpl(
|
||||
Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
|
||||
{
|
||||
/// First fill everything with default values
|
||||
const auto rows = ext::size(ids);
|
||||
for (const auto row : ext::range(0, rows))
|
||||
out[row] = get_default(row);
|
||||
|
||||
/// Maybe there are duplicate keys, so we remember their indices.
|
||||
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
|
||||
|
||||
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
|
||||
|
||||
size_t cache_hit = 0;
|
||||
size_t cache_not_found_count = 0;
|
||||
size_t cache_expired_cound = 0;
|
||||
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
auto insert_to_answer_routine = [&](size_t row, size_t idx)
|
||||
{
|
||||
auto & cell = cells[idx];
|
||||
if (!cell.isDefault())
|
||||
out[row] = static_cast<OutputType>(attribute_array[idx]);
|
||||
};
|
||||
|
||||
/// fetch up-to-date values, decide which ones require update
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
|
||||
/** cell should be updated if either:
|
||||
* 1. ids do not match,
|
||||
* 2. cell has expired,
|
||||
* 3. explicit defaults were specified and cell was set default. */
|
||||
|
||||
const auto [cell_idx, state] = findCellIdxForGet(id, now);
|
||||
|
||||
if (state == ResultState::FoundAndValid)
|
||||
{
|
||||
++cache_hit;
|
||||
insert_to_answer_routine(row, cell_idx);
|
||||
}
|
||||
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
|
||||
{
|
||||
++cache_not_found_count;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
}
|
||||
else if (state == ResultState::FoundButExpired)
|
||||
{
|
||||
cache_expired_cound++;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
|
||||
if (allow_read_expired_keys)
|
||||
insert_to_answer_routine(row, cell_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_cound);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows - cache_not_found_count - cache_expired_cound, std::memory_order_release);
|
||||
|
||||
if (!cache_not_found_count)
|
||||
{
|
||||
/// Nothing to update - return
|
||||
if (!cache_expired_cound)
|
||||
return;
|
||||
|
||||
/// Update async only if allow_read_expired_keys_is_enabledadd condvar usage and better code
|
||||
if (allow_read_expired_keys)
|
||||
{
|
||||
std::vector<Key> required_expired_ids;
|
||||
required_expired_ids.reserve(cache_expired_cound);
|
||||
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
/// request new values
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
|
||||
/// Nothing to do - return
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/// From this point we have to update all keys sync.
|
||||
/// Maybe allow_read_expired_keys_from_cache_dictionary is disabled
|
||||
/// and there no cache_not_found_ids but some cache_expired.
|
||||
|
||||
std::vector<Key> required_ids;
|
||||
required_ids.reserve(cache_not_found_count + cache_expired_cound);
|
||||
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
/// Request new values
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
waitForCurrentUpdateFinish(update_unit_ptr);
|
||||
|
||||
/// Add updated keys to answer.
|
||||
|
||||
const size_t attribute_index = getAttributeIndex(attribute.name);
|
||||
|
||||
for (auto & [key, value] : update_unit_ptr->found_ids)
|
||||
{
|
||||
if (value.found)
|
||||
{
|
||||
for (const size_t row : cache_expired_or_not_found_ids[key])
|
||||
out[row] = std::get<OutputType>(value.values[attribute_index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename DefaultGetter>
|
||||
void CacheDictionary::getItemsString(
|
||||
Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const
|
||||
{
|
||||
const auto rows = ext::size(ids);
|
||||
|
||||
/// Save on some allocations.
|
||||
out->getOffsets().reserve(rows);
|
||||
|
||||
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
|
||||
|
||||
auto found_outdated_values = false;
|
||||
|
||||
/// Perform optimistic version, fallback to pessimistic if failed.
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
/// Fetch up-to-date values, discard on fail.
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
const auto [cell_idx, state] = findCellIdxForGet(id, now);
|
||||
|
||||
if (state == ResultState::FoundAndValid)
|
||||
{
|
||||
auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
}
|
||||
else
|
||||
{
|
||||
found_outdated_values = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Optimistic code completed successfully.
|
||||
if (!found_outdated_values)
|
||||
{
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows, std::memory_order_release);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, ids.size());
|
||||
return;
|
||||
}
|
||||
|
||||
/// Now onto the pessimistic one, discard possible partial results from the optimistic path.
|
||||
out->getChars().resize_assume_reserved(0);
|
||||
out->getOffsets().resize_assume_reserved(0);
|
||||
|
||||
/// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
|
||||
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
|
||||
/// we are going to store every string separately
|
||||
std::unordered_map<Key, String> local_cache;
|
||||
|
||||
size_t cache_not_found_count = 0;
|
||||
size_t cache_expired_count = 0;
|
||||
|
||||
size_t total_length = 0;
|
||||
size_t cache_hit = 0;
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
auto insert_value_routine = [&](size_t row, size_t id, size_t cell_idx)
|
||||
{
|
||||
const auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
|
||||
|
||||
/// Do not store default, but count it in total length.
|
||||
if (!cell.isDefault())
|
||||
local_cache[id] = String{string_ref};
|
||||
|
||||
total_length += string_ref.size + 1;
|
||||
};
|
||||
|
||||
for (const auto row : ext::range(0, ids.size()))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
const auto [cell_idx, state] = findCellIdxForGet(id, now);
|
||||
|
||||
if (state == ResultState::FoundAndValid)
|
||||
{
|
||||
++cache_hit;
|
||||
insert_value_routine(row, id, cell_idx);
|
||||
}
|
||||
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
|
||||
{
|
||||
++cache_not_found_count;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
}
|
||||
else if (state == ResultState::FoundButExpired)
|
||||
{
|
||||
++cache_expired_count;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
|
||||
if (allow_read_expired_keys)
|
||||
insert_value_routine(row, id, cell_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_count);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows - cache_expired_count - cache_not_found_count, std::memory_order_release);
|
||||
|
||||
/// Async update of expired keys.
|
||||
if (!cache_not_found_count)
|
||||
{
|
||||
if (allow_read_expired_keys && cache_expired_count)
|
||||
{
|
||||
std::vector<Key> required_expired_ids;
|
||||
required_expired_ids.reserve(cache_expired_count);
|
||||
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
|
||||
/// Insert all found keys and defaults to output array.
|
||||
out->getChars().reserve(total_length);
|
||||
|
||||
for (const auto row : ext::range(0, ext::size(ids)))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
StringRef value;
|
||||
|
||||
/// Previously we stored found keys in map.
|
||||
const auto it = local_cache.find(id);
|
||||
if (it != local_cache.end())
|
||||
value = StringRef(it->second);
|
||||
else
|
||||
value = get_default(row);
|
||||
|
||||
out->insertData(value.data, value.size);
|
||||
}
|
||||
|
||||
/// Nothing to do else.
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/// We will request both cache_not_found_ids and cache_expired_ids sync.
|
||||
std::vector<Key> required_ids;
|
||||
required_ids.reserve(cache_not_found_count + cache_expired_count);
|
||||
std::transform(
|
||||
std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
waitForCurrentUpdateFinish(update_unit_ptr);
|
||||
|
||||
const size_t attribute_index = getAttributeIndex(attribute.name);
|
||||
|
||||
/// Only calculate the total length.
|
||||
for (auto & [key, value] : update_unit_ptr->found_ids)
|
||||
{
|
||||
if (value.found)
|
||||
{
|
||||
const auto found_value_ref = std::get<String>(value.values[attribute_index]);
|
||||
total_length += (found_value_ref.size() + 1) * cache_expired_or_not_found_ids[key].size();
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto row : cache_expired_or_not_found_ids[key])
|
||||
total_length += get_default(row).size + 1;
|
||||
}
|
||||
}
|
||||
|
||||
out->getChars().reserve(total_length);
|
||||
|
||||
for (const auto row : ext::range(0, ext::size(ids)))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
StringRef value;
|
||||
|
||||
/// We have two maps: found in cache and found in source.
|
||||
const auto local_it = local_cache.find(id);
|
||||
if (local_it != local_cache.end())
|
||||
value = StringRef(local_it->second);
|
||||
else
|
||||
{
|
||||
const auto found_it = update_unit_ptr->found_ids.find(id);
|
||||
|
||||
/// Previously we didn't store defaults in local cache.
|
||||
if (found_it != update_unit_ptr->found_ids.end() && found_it->second.found)
|
||||
value = std::get<String>(found_it->second.values[attribute_index]);
|
||||
else
|
||||
value = get_default(row);
|
||||
}
|
||||
|
||||
out->insertData(value.data, value.size);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
#include <Dictionaries/CacheDictionary.h>
|
||||
#include <Dictionaries/CacheDictionary.inc.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
#define DEFINE(TYPE) \
|
||||
void CacheDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) \
|
||||
const \
|
||||
{ \
|
||||
auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
const auto null_value = std::get<TYPE>(attribute.null_value); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t) { return null_value; }); \
|
||||
}
|
||||
|
||||
DEFINE(UInt8)
|
||||
DEFINE(UInt16)
|
||||
DEFINE(UInt32)
|
||||
DEFINE(UInt64)
|
||||
DEFINE(UInt128)
|
||||
DEFINE(Int8)
|
||||
DEFINE(Int16)
|
||||
DEFINE(Int32)
|
||||
DEFINE(Int64)
|
||||
DEFINE(Float32)
|
||||
DEFINE(Float64)
|
||||
DEFINE(Decimal32)
|
||||
DEFINE(Decimal64)
|
||||
DEFINE(Decimal128)
|
||||
|
||||
#undef DEFINE
|
||||
}
|
@ -1,34 +0,0 @@
|
||||
#include <Dictionaries/CacheDictionary.h>
|
||||
#include <Dictionaries/CacheDictionary.inc.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
#define DEFINE(TYPE) \
|
||||
void CacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
|
||||
DEFINE(UInt8)
|
||||
DEFINE(UInt16)
|
||||
DEFINE(UInt32)
|
||||
DEFINE(UInt64)
|
||||
DEFINE(UInt128)
|
||||
DEFINE(Int8)
|
||||
DEFINE(Int16)
|
||||
DEFINE(Int32)
|
||||
DEFINE(Int64)
|
||||
DEFINE(Float32)
|
||||
DEFINE(Float64)
|
||||
DEFINE(Decimal32)
|
||||
DEFINE(Decimal64)
|
||||
DEFINE(Decimal128)
|
||||
|
||||
#undef DEFINE
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
#include <Dictionaries/CacheDictionary.h>
|
||||
#include <Dictionaries/CacheDictionary.inc.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
#define DEFINE(TYPE) \
|
||||
void CacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t) { return def; }); \
|
||||
}
|
||||
|
||||
DEFINE(UInt8)
|
||||
DEFINE(UInt16)
|
||||
DEFINE(UInt32)
|
||||
DEFINE(UInt64)
|
||||
DEFINE(UInt128)
|
||||
DEFINE(Int8)
|
||||
DEFINE(Int16)
|
||||
DEFINE(Int32)
|
||||
DEFINE(Int64)
|
||||
DEFINE(Float32)
|
||||
DEFINE(Float64)
|
||||
DEFINE(Decimal32)
|
||||
DEFINE(Decimal64)
|
||||
DEFINE(Decimal128)
|
||||
|
||||
#undef DEFINE
|
||||
}
|
@ -10,7 +10,8 @@
|
||||
#include <ext/range.h>
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -70,48 +71,50 @@ ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(
|
||||
createAttributes();
|
||||
}
|
||||
|
||||
|
||||
void ComplexKeyCacheDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto null_value = StringRef{std::get<String>(attribute.null_values)};
|
||||
|
||||
getItemsString(attribute, key_columns, out, [&](const size_t) { return null_value; });
|
||||
}
|
||||
|
||||
void ComplexKeyCacheDictionary::getString(
|
||||
ColumnPtr ComplexKeyCacheDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsString(attribute, key_columns, out, [&](const size_t row) { return def->getDataAt(row); });
|
||||
}
|
||||
|
||||
void ComplexKeyCacheDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
ColumnPtr result;
|
||||
|
||||
auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
getItemsString(attribute, key_columns, out, [&](const size_t) { return StringRef{def}; });
|
||||
auto keys_size = key_columns.front()->size();
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto & null_value = std::get<AttributeType>(attribute.null_values);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
getItemsString(attribute, key_columns, out, default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
getItemsNumberImpl<AttributeType, AttributeType>(attribute, key_columns, out, default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// returns cell_idx (always valid for replacing), 'cell is valid' flag, 'cell is outdated' flag,
|
||||
@ -158,15 +161,21 @@ ComplexKeyCacheDictionary::findCellIdx(const StringRef & key, const CellMetadata
|
||||
return {oldest_id, false, false};
|
||||
}
|
||||
|
||||
void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
|
||||
ColumnUInt8::Ptr ComplexKeyCacheDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
|
||||
auto result = ColumnUInt8::create(rows_num);
|
||||
auto& out = result->getData();
|
||||
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
out[row] = false;
|
||||
|
||||
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
|
||||
MapType<std::vector<size_t>> outdated_keys;
|
||||
|
||||
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
Arena temporary_keys_pool;
|
||||
@ -212,7 +221,7 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
|
||||
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
|
||||
|
||||
if (outdated_keys.empty())
|
||||
return;
|
||||
return result;
|
||||
|
||||
std::vector<size_t> required_rows(outdated_keys.size());
|
||||
std::transform(
|
||||
@ -233,8 +242,395 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
|
||||
for (const auto out_idx : outdated_keys[key])
|
||||
out[out_idx] = false;
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
|
||||
void ComplexKeyCacheDictionary::getItemsNumberImpl(
|
||||
Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
PaddedPODArray<OutputType> & out,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
|
||||
MapType<std::vector<size_t>> outdated_keys;
|
||||
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
|
||||
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
Arena temporary_keys_pool;
|
||||
PODArray<StringRef> keys_array(rows_num);
|
||||
|
||||
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
/// fetch up-to-date values, decide which ones require update
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
keys_array[row] = key;
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
/** cell should be updated if either:
|
||||
* 1. keys (or hash) do not match,
|
||||
* 2. cell has expired,
|
||||
* 3. explicit defaults were specified and cell was set default. */
|
||||
|
||||
if (!find_result.valid)
|
||||
{
|
||||
outdated_keys[key].push_back(row);
|
||||
if (find_result.outdated)
|
||||
++cache_expired;
|
||||
else
|
||||
++cache_not_found;
|
||||
}
|
||||
else
|
||||
{
|
||||
++cache_hit;
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
const auto & cell = cells[cell_idx];
|
||||
out[row] = cell.isDefault() ? default_value_extractor[row] : static_cast<OutputType>(attribute_array[cell_idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
query_count.fetch_add(rows_num, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
|
||||
|
||||
if (outdated_keys.empty())
|
||||
return;
|
||||
|
||||
std::vector<size_t> required_rows(outdated_keys.size());
|
||||
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
|
||||
{
|
||||
return pair.getMapped().front();
|
||||
});
|
||||
|
||||
/// request new values
|
||||
update(
|
||||
key_columns,
|
||||
keys_array,
|
||||
required_rows,
|
||||
[&](const StringRef key, const size_t cell_idx)
|
||||
{
|
||||
for (const auto row : outdated_keys[key])
|
||||
out[row] = static_cast<OutputType>(attribute_array[cell_idx]);
|
||||
},
|
||||
[&](const StringRef key, const size_t)
|
||||
{
|
||||
for (const auto row : outdated_keys[key])
|
||||
out[row] = default_value_extractor[row];
|
||||
});
|
||||
}
|
||||
|
||||
void ComplexKeyCacheDictionary::getItemsString(
|
||||
Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ColumnString * out,
|
||||
DictionaryDefaultValueExtractor<String> & default_value_extractor) const
|
||||
{
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
/// save on some allocations
|
||||
out->getOffsets().reserve(rows_num);
|
||||
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
Arena temporary_keys_pool;
|
||||
|
||||
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
|
||||
|
||||
auto found_outdated_values = false;
|
||||
|
||||
/// perform optimistic version, fallback to pessimistic if failed
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
/// fetch up-to-date values, discard on fail
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
SCOPE_EXIT(temporary_keys_pool.rollback(key.size));
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
if (!find_result.valid)
|
||||
{
|
||||
found_outdated_values = true;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
const auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// optimistic code completed successfully
|
||||
if (!found_outdated_values)
|
||||
{
|
||||
query_count.fetch_add(rows_num, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows_num, std::memory_order_release);
|
||||
return;
|
||||
}
|
||||
|
||||
/// now onto the pessimistic one, discard possible partial results from the optimistic path
|
||||
out->getChars().resize_assume_reserved(0);
|
||||
out->getOffsets().resize_assume_reserved(0);
|
||||
|
||||
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
|
||||
MapType<std::vector<size_t>> outdated_keys;
|
||||
/// we are going to store every string separately
|
||||
MapType<StringRef> map;
|
||||
PODArray<StringRef> keys_array(rows_num);
|
||||
|
||||
size_t total_length = 0;
|
||||
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
keys_array[row] = key;
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
if (!find_result.valid)
|
||||
{
|
||||
outdated_keys[key].push_back(row);
|
||||
if (find_result.outdated)
|
||||
++cache_expired;
|
||||
else
|
||||
++cache_not_found;
|
||||
}
|
||||
else
|
||||
{
|
||||
++cache_hit;
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
const auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
|
||||
|
||||
if (!cell.isDefault())
|
||||
map[key] = copyIntoArena(string_ref, temporary_keys_pool);
|
||||
|
||||
total_length += string_ref.size + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
|
||||
query_count.fetch_add(rows_num, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
|
||||
|
||||
/// request new values
|
||||
if (!outdated_keys.empty())
|
||||
{
|
||||
std::vector<size_t> required_rows(outdated_keys.size());
|
||||
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
|
||||
{
|
||||
return pair.getMapped().front();
|
||||
});
|
||||
|
||||
update(
|
||||
key_columns,
|
||||
keys_array,
|
||||
required_rows,
|
||||
[&](const StringRef key, const size_t cell_idx)
|
||||
{
|
||||
const StringRef attribute_value = attribute_array[cell_idx];
|
||||
|
||||
/// We must copy key and value to own memory, because it may be replaced with another
|
||||
/// in next iterations of inner loop of update.
|
||||
const StringRef copied_key = copyIntoArena(key, temporary_keys_pool);
|
||||
const StringRef copied_value = copyIntoArena(attribute_value, temporary_keys_pool);
|
||||
|
||||
map[copied_key] = copied_value;
|
||||
total_length += (attribute_value.size + 1) * outdated_keys[key].size();
|
||||
},
|
||||
[&](const StringRef key, const size_t)
|
||||
{
|
||||
for (const auto row : outdated_keys[key])
|
||||
total_length += default_value_extractor[row].size + 1;
|
||||
});
|
||||
}
|
||||
|
||||
out->getChars().reserve(total_length);
|
||||
|
||||
for (const auto row : ext::range(0, ext::size(keys_array)))
|
||||
{
|
||||
const StringRef key = keys_array[row];
|
||||
auto * const it = map.find(key);
|
||||
const auto string_ref = it ? it->getMapped() : default_value_extractor[row];
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename PresentKeyHandler, typename AbsentKeyHandler>
|
||||
void ComplexKeyCacheDictionary::update(
|
||||
const Columns & in_key_columns,
|
||||
const PODArray<StringRef> & in_keys,
|
||||
const std::vector<size_t> & in_requested_rows,
|
||||
PresentKeyHandler && on_cell_updated,
|
||||
AbsentKeyHandler && on_key_not_found) const
|
||||
{
|
||||
MapType<bool> remaining_keys{in_requested_rows.size()};
|
||||
for (const auto row : in_requested_rows)
|
||||
remaining_keys.insert({in_keys[row], false});
|
||||
|
||||
std::uniform_int_distribution<UInt64> distribution(dict_lifetime.min_sec, dict_lifetime.max_sec);
|
||||
|
||||
const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
|
||||
{
|
||||
Stopwatch watch;
|
||||
auto stream = source_ptr->loadKeys(in_key_columns, in_requested_rows);
|
||||
stream->readPrefix();
|
||||
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
|
||||
const auto attributes_size = attributes.size();
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
/// cache column pointers
|
||||
const auto key_columns = ext::map<Columns>(
|
||||
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
|
||||
|
||||
const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
|
||||
{
|
||||
return block.safeGetByPosition(keys_size + attribute_idx).column;
|
||||
});
|
||||
|
||||
const auto rows_num = block.rows();
|
||||
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
auto key = allocKey(row, key_columns, keys);
|
||||
const auto hash = StringRefHash{}(key);
|
||||
const auto find_result = findCellIdx(key, now, hash);
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
auto & cell = cells[cell_idx];
|
||||
|
||||
for (const auto attribute_idx : ext::range(0, attributes.size()))
|
||||
{
|
||||
const auto & attribute_column = *attribute_columns[attribute_idx];
|
||||
auto & attribute = attributes[attribute_idx];
|
||||
|
||||
setAttributeValue(attribute, cell_idx, attribute_column[row]);
|
||||
}
|
||||
|
||||
/// if cell id is zero and zero does not map to this cell, then the cell is unused
|
||||
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
|
||||
element_count.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
/// handle memory allocated for old key
|
||||
if (key == cell.key)
|
||||
{
|
||||
freeKey(key);
|
||||
key = cell.key;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// new key is different from the old one
|
||||
if (cell.key.data)
|
||||
freeKey(cell.key);
|
||||
|
||||
cell.key = key;
|
||||
}
|
||||
|
||||
cell.hash = hash;
|
||||
|
||||
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
|
||||
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
|
||||
else
|
||||
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
|
||||
|
||||
/// inform caller
|
||||
on_cell_updated(key, cell_idx);
|
||||
/// mark corresponding id as found
|
||||
remaining_keys[key] = true;
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, in_requested_rows.size());
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed());
|
||||
}
|
||||
|
||||
size_t found_num = 0;
|
||||
size_t not_found_num = 0;
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
/// Check which ids have not been found and require setting null_value
|
||||
for (const auto & key_found_pair : remaining_keys)
|
||||
{
|
||||
if (key_found_pair.getMapped())
|
||||
{
|
||||
++found_num;
|
||||
continue;
|
||||
}
|
||||
|
||||
++not_found_num;
|
||||
|
||||
auto key = key_found_pair.getKey();
|
||||
const auto hash = StringRefHash{}(key);
|
||||
const auto find_result = findCellIdx(key, now, hash);
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
auto & cell = cells[cell_idx];
|
||||
|
||||
/// Set null_value for each attribute
|
||||
for (auto & attribute : attributes)
|
||||
setDefaultAttributeValue(attribute, cell_idx);
|
||||
|
||||
/// Check if cell had not been occupied before and increment element counter if it hadn't
|
||||
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
|
||||
element_count.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
if (key == cell.key)
|
||||
key = cell.key;
|
||||
else
|
||||
{
|
||||
if (cell.key.data)
|
||||
freeKey(cell.key);
|
||||
|
||||
/// copy key from temporary pool
|
||||
key = copyKey(key);
|
||||
cell.key = key;
|
||||
}
|
||||
|
||||
cell.hash = hash;
|
||||
|
||||
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
|
||||
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
|
||||
else
|
||||
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
|
||||
|
||||
cell.setDefault();
|
||||
|
||||
/// inform caller that the cell has not been found
|
||||
on_key_not_found(key, cell_idx);
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num);
|
||||
}
|
||||
|
||||
|
||||
void ComplexKeyCacheDictionary::createAttributes()
|
||||
{
|
||||
const auto attributes_size = dict_struct.attributes.size();
|
||||
@ -263,6 +659,102 @@ ComplexKeyCacheDictionary::Attribute & ComplexKeyCacheDictionary::getAttribute(c
|
||||
return attributes[it->second];
|
||||
}
|
||||
|
||||
void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute, const size_t idx) const
|
||||
{
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
const auto & null_value_ref = std::get<String>(attribute.null_values);
|
||||
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
|
||||
|
||||
if (string_ref.data != null_value_ref.data())
|
||||
{
|
||||
if (string_ref.data)
|
||||
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
|
||||
|
||||
string_ref = StringRef{null_value_ref};
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::get<ContainerPtrType<AttributeType>>(attribute.arrays)[idx] = std::get<AttributeType>(attribute.null_values);
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
ComplexKeyCacheDictionary::Attribute
|
||||
ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||
{
|
||||
Attribute attr{type, {}, {}};
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
attr.null_values = null_value.get<String>();
|
||||
attr.arrays = std::make_unique<ContainerType<StringRef>>(size);
|
||||
bytes_allocated += size * sizeof(StringRef);
|
||||
if (!string_arena)
|
||||
string_arena = std::make_unique<ArenaWithFreeLists>();
|
||||
}
|
||||
else
|
||||
{
|
||||
attr.null_values = AttributeType(null_value.get<NearestFieldType<AttributeType>>()); /* NOLINT */
|
||||
attr.arrays = std::make_unique<ContainerType<AttributeType>>(size); /* NOLINT */
|
||||
bytes_allocated += size * sizeof(AttributeType);
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const size_t idx, const Field & value) const
|
||||
{
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
const auto & string = value.get<String>();
|
||||
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
|
||||
const auto & null_value_ref = std::get<String>(attribute.null_values);
|
||||
|
||||
/// free memory unless it points to a null_value
|
||||
if (string_ref.data && string_ref.data != null_value_ref.data())
|
||||
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
|
||||
|
||||
const auto str_size = string.size();
|
||||
if (str_size != 0)
|
||||
{
|
||||
auto * str_ptr = string_arena->alloc(str_size);
|
||||
std::copy(string.data(), string.data() + str_size, str_ptr);
|
||||
string_ref = StringRef{str_ptr, str_size};
|
||||
}
|
||||
else
|
||||
string_ref = {};
|
||||
}
|
||||
else
|
||||
{
|
||||
std::get<ContainerPtrType<AttributeType>>(attribute.arrays)[idx] = value.get<NearestFieldType<AttributeType>>();
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
StringRef ComplexKeyCacheDictionary::allocKey(const size_t row, const Columns & key_columns, StringRefs & keys) const
|
||||
{
|
||||
if (key_size_is_fixed)
|
||||
@ -388,7 +880,7 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names &
|
||||
keys.push_back(cells[idx].key);
|
||||
}
|
||||
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyCacheDictionary, UInt64>;
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, keys, column_names);
|
||||
}
|
||||
|
||||
|
@ -23,7 +23,7 @@
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -89,93 +89,16 @@ public:
|
||||
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
|
||||
|
||||
/// In all functions below, key_columns must be full (non-constant) columns.
|
||||
/// See the requirement in IDataType.h for text-serialization functions.
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const;
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const;
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
@ -252,227 +175,18 @@ private:
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
|
||||
void getItemsNumberImpl(
|
||||
Attribute & attribute, const Columns & key_columns, PaddedPODArray<OutputType> & out, DefaultGetter && get_default) const
|
||||
{
|
||||
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
|
||||
MapType<std::vector<size_t>> outdated_keys;
|
||||
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
|
||||
Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
PaddedPODArray<OutputType> & out,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
Arena temporary_keys_pool;
|
||||
PODArray<StringRef> keys_array(rows_num);
|
||||
|
||||
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
/// fetch up-to-date values, decide which ones require update
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
keys_array[row] = key;
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
/** cell should be updated if either:
|
||||
* 1. keys (or hash) do not match,
|
||||
* 2. cell has expired,
|
||||
* 3. explicit defaults were specified and cell was set default. */
|
||||
|
||||
if (!find_result.valid)
|
||||
{
|
||||
outdated_keys[key].push_back(row);
|
||||
if (find_result.outdated)
|
||||
++cache_expired;
|
||||
else
|
||||
++cache_not_found;
|
||||
}
|
||||
else
|
||||
{
|
||||
++cache_hit;
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
const auto & cell = cells[cell_idx];
|
||||
out[row] = cell.isDefault() ? get_default(row) : static_cast<OutputType>(attribute_array[cell_idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
query_count.fetch_add(rows_num, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
|
||||
|
||||
if (outdated_keys.empty())
|
||||
return;
|
||||
|
||||
std::vector<size_t> required_rows(outdated_keys.size());
|
||||
std::transform(
|
||||
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); });
|
||||
|
||||
/// request new values
|
||||
update(
|
||||
key_columns,
|
||||
keys_array,
|
||||
required_rows,
|
||||
[&](const StringRef key, const size_t cell_idx)
|
||||
{
|
||||
for (const auto row : outdated_keys[key])
|
||||
out[row] = static_cast<OutputType>(attribute_array[cell_idx]);
|
||||
},
|
||||
[&](const StringRef key, const size_t)
|
||||
{
|
||||
for (const auto row : outdated_keys[key])
|
||||
out[row] = get_default(row);
|
||||
});
|
||||
}
|
||||
|
||||
template <typename DefaultGetter>
|
||||
void getItemsString(Attribute & attribute, const Columns & key_columns, ColumnString * out, DefaultGetter && get_default) const
|
||||
{
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
/// save on some allocations
|
||||
out->getOffsets().reserve(rows_num);
|
||||
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
Arena temporary_keys_pool;
|
||||
|
||||
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
|
||||
|
||||
auto found_outdated_values = false;
|
||||
|
||||
/// perform optimistic version, fallback to pessimistic if failed
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
/// fetch up-to-date values, discard on fail
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
SCOPE_EXIT(temporary_keys_pool.rollback(key.size));
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
if (!find_result.valid)
|
||||
{
|
||||
found_outdated_values = true;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
const auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// optimistic code completed successfully
|
||||
if (!found_outdated_values)
|
||||
{
|
||||
query_count.fetch_add(rows_num, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows_num, std::memory_order_release);
|
||||
return;
|
||||
}
|
||||
|
||||
/// now onto the pessimistic one, discard possible partial results from the optimistic path
|
||||
out->getChars().resize_assume_reserved(0);
|
||||
out->getOffsets().resize_assume_reserved(0);
|
||||
|
||||
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
|
||||
MapType<std::vector<size_t>> outdated_keys;
|
||||
/// we are going to store every string separately
|
||||
MapType<StringRef> map;
|
||||
PODArray<StringRef> keys_array(rows_num);
|
||||
|
||||
size_t total_length = 0;
|
||||
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
keys_array[row] = key;
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
if (!find_result.valid)
|
||||
{
|
||||
outdated_keys[key].push_back(row);
|
||||
if (find_result.outdated)
|
||||
++cache_expired;
|
||||
else
|
||||
++cache_not_found;
|
||||
}
|
||||
else
|
||||
{
|
||||
++cache_hit;
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
const auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
|
||||
|
||||
if (!cell.isDefault())
|
||||
map[key] = copyIntoArena(string_ref, temporary_keys_pool);
|
||||
|
||||
total_length += string_ref.size + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
|
||||
query_count.fetch_add(rows_num, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
|
||||
|
||||
/// request new values
|
||||
if (!outdated_keys.empty())
|
||||
{
|
||||
std::vector<size_t> required_rows(outdated_keys.size());
|
||||
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
|
||||
{
|
||||
return pair.getMapped().front();
|
||||
});
|
||||
|
||||
update(
|
||||
key_columns,
|
||||
keys_array,
|
||||
required_rows,
|
||||
[&](const StringRef key, const size_t cell_idx)
|
||||
{
|
||||
const StringRef attribute_value = attribute_array[cell_idx];
|
||||
|
||||
/// We must copy key and value to own memory, because it may be replaced with another
|
||||
/// in next iterations of inner loop of update.
|
||||
const StringRef copied_key = copyIntoArena(key, temporary_keys_pool);
|
||||
const StringRef copied_value = copyIntoArena(attribute_value, temporary_keys_pool);
|
||||
|
||||
map[copied_key] = copied_value;
|
||||
total_length += (attribute_value.size + 1) * outdated_keys[key].size();
|
||||
},
|
||||
[&](const StringRef key, const size_t)
|
||||
{
|
||||
for (const auto row : outdated_keys[key])
|
||||
total_length += get_default(row).size + 1;
|
||||
});
|
||||
}
|
||||
|
||||
out->getChars().reserve(total_length);
|
||||
|
||||
for (const auto row : ext::range(0, ext::size(keys_array)))
|
||||
{
|
||||
const StringRef key = keys_array[row];
|
||||
const auto it = map.find(key);
|
||||
const auto string_ref = it ? it->getMapped() : get_default(row);
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
}
|
||||
}
|
||||
void getItemsString(
|
||||
Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ColumnString * out,
|
||||
DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
|
||||
|
||||
template <typename PresentKeyHandler, typename AbsentKeyHandler>
|
||||
void update(
|
||||
@ -480,152 +194,7 @@ private:
|
||||
const PODArray<StringRef> & in_keys,
|
||||
const std::vector<size_t> & in_requested_rows,
|
||||
PresentKeyHandler && on_cell_updated,
|
||||
AbsentKeyHandler && on_key_not_found) const
|
||||
{
|
||||
MapType<bool> remaining_keys{in_requested_rows.size()};
|
||||
for (const auto row : in_requested_rows)
|
||||
remaining_keys.insert({in_keys[row], false});
|
||||
|
||||
std::uniform_int_distribution<UInt64> distribution(dict_lifetime.min_sec, dict_lifetime.max_sec);
|
||||
|
||||
const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
|
||||
{
|
||||
Stopwatch watch;
|
||||
auto stream = source_ptr->loadKeys(in_key_columns, in_requested_rows);
|
||||
stream->readPrefix();
|
||||
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
|
||||
const auto attributes_size = attributes.size();
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
/// cache column pointers
|
||||
const auto key_columns = ext::map<Columns>(
|
||||
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
|
||||
|
||||
const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
|
||||
{
|
||||
return block.safeGetByPosition(keys_size + attribute_idx).column;
|
||||
});
|
||||
|
||||
const auto rows_num = block.rows();
|
||||
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
auto key = allocKey(row, key_columns, keys);
|
||||
const auto hash = StringRefHash{}(key);
|
||||
const auto find_result = findCellIdx(key, now, hash);
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
auto & cell = cells[cell_idx];
|
||||
|
||||
for (const auto attribute_idx : ext::range(0, attributes.size()))
|
||||
{
|
||||
const auto & attribute_column = *attribute_columns[attribute_idx];
|
||||
auto & attribute = attributes[attribute_idx];
|
||||
|
||||
setAttributeValue(attribute, cell_idx, attribute_column[row]);
|
||||
}
|
||||
|
||||
/// if cell id is zero and zero does not map to this cell, then the cell is unused
|
||||
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
|
||||
element_count.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
/// handle memory allocated for old key
|
||||
if (key == cell.key)
|
||||
{
|
||||
freeKey(key);
|
||||
key = cell.key;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// new key is different from the old one
|
||||
if (cell.key.data)
|
||||
freeKey(cell.key);
|
||||
|
||||
cell.key = key;
|
||||
}
|
||||
|
||||
cell.hash = hash;
|
||||
|
||||
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
|
||||
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
|
||||
else
|
||||
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
|
||||
|
||||
/// inform caller
|
||||
on_cell_updated(key, cell_idx);
|
||||
/// mark corresponding id as found
|
||||
remaining_keys[key] = true;
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, in_requested_rows.size());
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed());
|
||||
}
|
||||
|
||||
size_t found_num = 0;
|
||||
size_t not_found_num = 0;
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
/// Check which ids have not been found and require setting null_value
|
||||
for (const auto & key_found_pair : remaining_keys)
|
||||
{
|
||||
if (key_found_pair.getMapped())
|
||||
{
|
||||
++found_num;
|
||||
continue;
|
||||
}
|
||||
|
||||
++not_found_num;
|
||||
|
||||
auto key = key_found_pair.getKey();
|
||||
const auto hash = StringRefHash{}(key);
|
||||
const auto find_result = findCellIdx(key, now, hash);
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
auto & cell = cells[cell_idx];
|
||||
|
||||
/// Set null_value for each attribute
|
||||
for (auto & attribute : attributes)
|
||||
setDefaultAttributeValue(attribute, cell_idx);
|
||||
|
||||
/// Check if cell had not been occupied before and increment element counter if it hadn't
|
||||
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
|
||||
element_count.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
if (key == cell.key)
|
||||
key = cell.key;
|
||||
else
|
||||
{
|
||||
if (cell.key.data)
|
||||
freeKey(cell.key);
|
||||
|
||||
/// copy key from temporary pool
|
||||
key = copyKey(key);
|
||||
cell.key = key;
|
||||
}
|
||||
|
||||
cell.hash = hash;
|
||||
|
||||
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
|
||||
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
|
||||
else
|
||||
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
|
||||
|
||||
cell.setDefault();
|
||||
|
||||
/// inform caller that the cell has not been found
|
||||
on_key_not_found(key, cell_idx);
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num);
|
||||
}
|
||||
AbsentKeyHandler && on_key_not_found) const;
|
||||
|
||||
UInt64 getCellIdx(const StringRef key) const;
|
||||
|
||||
|
@ -1,45 +0,0 @@
|
||||
#include "ComplexKeyCacheDictionary.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
ComplexKeyCacheDictionary::Attribute
|
||||
ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||
{
|
||||
Attribute attr{type, {}, {}};
|
||||
|
||||
switch (type)
|
||||
{
|
||||
#define DISPATCH(TYPE) \
|
||||
case AttributeUnderlyingType::ut##TYPE: \
|
||||
attr.null_values = TYPE(null_value.get<NearestFieldType<TYPE>>()); /* NOLINT */ \
|
||||
attr.arrays = std::make_unique<ContainerType<TYPE>>(size); /* NOLINT */ \
|
||||
bytes_allocated += size * sizeof(TYPE); \
|
||||
break;
|
||||
DISPATCH(UInt8)
|
||||
DISPATCH(UInt16)
|
||||
DISPATCH(UInt32)
|
||||
DISPATCH(UInt64)
|
||||
DISPATCH(UInt128)
|
||||
DISPATCH(Int8)
|
||||
DISPATCH(Int16)
|
||||
DISPATCH(Int32)
|
||||
DISPATCH(Int64)
|
||||
DISPATCH(Decimal32)
|
||||
DISPATCH(Decimal64)
|
||||
DISPATCH(Decimal128)
|
||||
DISPATCH(Float32)
|
||||
DISPATCH(Float64)
|
||||
#undef DISPATCH
|
||||
case AttributeUnderlyingType::utString:
|
||||
attr.null_values = null_value.get<String>();
|
||||
attr.arrays = std::make_unique<ContainerType<StringRef>>(size);
|
||||
bytes_allocated += size * sizeof(StringRef);
|
||||
if (!string_arena)
|
||||
string_arena = std::make_unique<ArenaWithFreeLists>();
|
||||
break;
|
||||
}
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
#include <Dictionaries/ComplexKeyCacheDictionary.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
#define DEFINE(TYPE) \
|
||||
void ComplexKeyCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
const auto null_value = std::get<TYPE>(attribute.null_values); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t) { return null_value; }); \
|
||||
}
|
||||
|
||||
DEFINE(UInt8)
|
||||
DEFINE(UInt16)
|
||||
DEFINE(UInt32)
|
||||
DEFINE(UInt64)
|
||||
DEFINE(UInt128)
|
||||
DEFINE(Int8)
|
||||
DEFINE(Int16)
|
||||
DEFINE(Int32)
|
||||
DEFINE(Int64)
|
||||
DEFINE(Float32)
|
||||
DEFINE(Float64)
|
||||
DEFINE(Decimal32)
|
||||
DEFINE(Decimal64)
|
||||
DEFINE(Decimal128)
|
||||
|
||||
#undef DEFINE
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
#include <Dictionaries/ComplexKeyCacheDictionary.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
#define DEFINE(TYPE) \
|
||||
void ComplexKeyCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
|
||||
DEFINE(UInt8)
|
||||
DEFINE(UInt16)
|
||||
DEFINE(UInt32)
|
||||
DEFINE(UInt64)
|
||||
DEFINE(UInt128)
|
||||
DEFINE(Int8)
|
||||
DEFINE(Int16)
|
||||
DEFINE(Int32)
|
||||
DEFINE(Int64)
|
||||
DEFINE(Float32)
|
||||
DEFINE(Float64)
|
||||
DEFINE(Decimal32)
|
||||
DEFINE(Decimal64)
|
||||
DEFINE(Decimal128)
|
||||
|
||||
#undef DEFINE
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
#include <Dictionaries/ComplexKeyCacheDictionary.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
#define DEFINE(TYPE) \
|
||||
void ComplexKeyCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t) { return def; }); \
|
||||
}
|
||||
|
||||
DEFINE(UInt8)
|
||||
DEFINE(UInt16)
|
||||
DEFINE(UInt32)
|
||||
DEFINE(UInt64)
|
||||
DEFINE(UInt128)
|
||||
DEFINE(Int8)
|
||||
DEFINE(Int16)
|
||||
DEFINE(Int32)
|
||||
DEFINE(Int64)
|
||||
DEFINE(Float32)
|
||||
DEFINE(Float64)
|
||||
DEFINE(Decimal32)
|
||||
DEFINE(Decimal64)
|
||||
DEFINE(Decimal128)
|
||||
|
||||
#undef DEFINE
|
||||
}
|
@ -1,78 +0,0 @@
|
||||
#include "ComplexKeyCacheDictionary.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const size_t idx, const Field & value) const
|
||||
{
|
||||
switch (attribute.type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = value.get<UInt64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = value.get<UInt64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = value.get<UInt64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = value.get<UInt64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = value.get<UInt128>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = value.get<Int64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = value.get<Int64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = value.get<Int64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = value.get<Int64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = value.get<Float64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = value.get<Float64>();
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = value.get<Decimal32>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = value.get<Decimal64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = value.get<Decimal128>();
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
const auto & string = value.get<String>();
|
||||
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
|
||||
const auto & null_value_ref = std::get<String>(attribute.null_values);
|
||||
|
||||
/// free memory unless it points to a null_value
|
||||
if (string_ref.data && string_ref.data != null_value_ref.data())
|
||||
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
|
||||
|
||||
const auto str_size = string.size();
|
||||
if (str_size != 0)
|
||||
{
|
||||
auto * str_ptr = string_arena->alloc(str_size);
|
||||
std::copy(string.data(), string.data() + str_size, str_ptr);
|
||||
string_ref = StringRef{str_ptr, str_size};
|
||||
}
|
||||
else
|
||||
string_ref = {};
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,71 +0,0 @@
|
||||
#include "ComplexKeyCacheDictionary.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute, const size_t idx) const
|
||||
{
|
||||
switch (attribute.type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = std::get<UInt8>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = std::get<UInt16>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = std::get<UInt32>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = std::get<UInt64>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = std::get<UInt128>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = std::get<Int8>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = std::get<Int16>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = std::get<Int32>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = std::get<Int64>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = std::get<Float32>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = std::get<Float64>(attribute.null_values);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = std::get<Decimal32>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = std::get<Decimal64>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = std::get<Decimal128>(attribute.null_values);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
const auto & null_value_ref = std::get<String>(attribute.null_values);
|
||||
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
|
||||
|
||||
if (string_ref.data != null_value_ref.data())
|
||||
{
|
||||
if (string_ref.data)
|
||||
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
|
||||
|
||||
string_ref = StringRef{null_value_ref};
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -3,6 +3,9 @@
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
#include <Core/Defines.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -31,194 +34,151 @@ ComplexKeyDirectDictionary::ComplexKeyDirectDictionary(
|
||||
createAttributes();
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void ComplexKeyDirectDictionary::get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
const auto null_value = std::get<TYPE>(attribute.null_values); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void ComplexKeyDirectDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
|
||||
ColumnPtr ComplexKeyDirectDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto & null_value = std::get<StringRef>(attribute.null_values);
|
||||
getItemsStringImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
|
||||
[&](const size_t) { return String(null_value.data, null_value.size); });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void ComplexKeyDirectDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void ComplexKeyDirectDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
ColumnPtr result;
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
getItemsStringImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
|
||||
[&](const size_t row) { const auto ref = def->getDataAt(row); return String(ref.data, ref.size); });
|
||||
}
|
||||
auto keys_size = key_columns.front()->size();
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void ComplexKeyDirectDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const TYPE def, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void ComplexKeyDirectDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const String & def, ColumnString * const out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
ComplexKeyDirectDictionary::getItemsStringImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
|
||||
[&](const size_t) { return def; });
|
||||
}
|
||||
|
||||
|
||||
void ComplexKeyDirectDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
ColumnUInt8::MutablePtr col_null_map_to;
|
||||
ColumnUInt8::Container * vec_null_map_to = nullptr;
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
has<UInt8>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
has<UInt16>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
has<UInt32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
has<UInt64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
has<UInt128>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
has<Int8>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
has<Int16>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
has<Int32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
has<Int64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
has<Float32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
has<Float64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
has<String>(attribute, key_columns, out);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
has<Decimal32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
has<Decimal64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
has<Decimal128>(attribute, key_columns, out);
|
||||
break;
|
||||
col_null_map_to = ColumnUInt8::create(keys_size, false);
|
||||
vec_null_map_to = &col_null_map_to->getData();
|
||||
}
|
||||
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
|
||||
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
|
||||
getItemsImpl<String, String>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const String value, bool is_null)
|
||||
{
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
const auto ref = StringRef{value};
|
||||
out->insertData(ref.data, ref.size);
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
getItemsImpl<AttributeType, AttributeType>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const auto value, bool is_null)
|
||||
{
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
out[row] = value;
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
result = ColumnNullable::create(result, std::move(col_null_map_to));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
ColumnUInt8::Ptr ComplexKeyDirectDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
auto size = key_columns.front()->size();
|
||||
auto result = ColumnUInt8::create(size);
|
||||
auto& out = result->getData();
|
||||
|
||||
const auto rows = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys_array(keys_size);
|
||||
MapType<UInt8> has_key;
|
||||
Arena temporary_keys_pool;
|
||||
std::vector<size_t> to_load(rows);
|
||||
PODArray<StringRef> keys(rows);
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
|
||||
keys[row] = key;
|
||||
has_key[key] = 0;
|
||||
to_load[row] = row;
|
||||
}
|
||||
|
||||
auto stream = source_ptr->loadKeys(key_columns, to_load);
|
||||
|
||||
stream->readPrefix();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
const auto columns = ext::map<Columns>(
|
||||
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
|
||||
|
||||
Arena pool;
|
||||
|
||||
StringRefs keys_temp(keys_size);
|
||||
|
||||
const auto columns_size = columns.front()->size();
|
||||
|
||||
for (const auto row_idx : ext::range(0, columns_size))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
|
||||
if (has_key.has(key))
|
||||
{
|
||||
has_key[key] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
out[row] = has_key[keys[row]];
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void ComplexKeyDirectDictionary::createAttributes()
|
||||
{
|
||||
@ -229,7 +189,7 @@ void ComplexKeyDirectDictionary::createAttributes()
|
||||
{
|
||||
attribute_index_by_name.emplace(attribute.name, attributes.size());
|
||||
attribute_name_by_index.emplace(attributes.size(), attribute.name);
|
||||
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value, attribute.name));
|
||||
attributes.push_back(createAttribute(attribute, attribute.null_value, attribute.name));
|
||||
|
||||
if (attribute.hierarchical)
|
||||
throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
|
||||
@ -237,7 +197,6 @@ void ComplexKeyDirectDictionary::createAttributes()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void ComplexKeyDirectDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
@ -254,59 +213,19 @@ void ComplexKeyDirectDictionary::createAttributeImpl<String>(Attribute & attribu
|
||||
}
|
||||
|
||||
|
||||
ComplexKeyDirectDictionary::Attribute ComplexKeyDirectDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & attr_name)
|
||||
ComplexKeyDirectDictionary::Attribute ComplexKeyDirectDictionary::createAttribute(
|
||||
const DictionaryAttribute & attribute, const Field & null_value, const std::string & attr_name)
|
||||
{
|
||||
Attribute attr{type, {}, {}, attr_name};
|
||||
Attribute attr{attribute.underlying_type, attribute.is_nullable, {}, {}, attr_name};
|
||||
|
||||
switch (type)
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
createAttributeImpl<UInt8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
createAttributeImpl<UInt16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
createAttributeImpl<UInt32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
createAttributeImpl<UInt64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
createAttributeImpl<UInt128>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
createAttributeImpl<Int8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
createAttributeImpl<Int16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
createAttributeImpl<Int32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
createAttributeImpl<Int64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
createAttributeImpl<Float32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
createAttributeImpl<Float64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
createAttributeImpl<String>(attr, null_value);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
createAttributeImpl<AttributeType>(attr, null_value);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
createAttributeImpl<Decimal32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
createAttributeImpl<Decimal64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
createAttributeImpl<Decimal128>(attr, null_value);
|
||||
break;
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
@ -356,14 +275,18 @@ StringRef ComplexKeyDirectDictionary::placeKeysInPool(
|
||||
}
|
||||
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void ComplexKeyDirectDictionary::getItemsImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto rows = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys_array(keys_size);
|
||||
MapType<OutputType> value_by_key;
|
||||
HashMapWithSavedHash<StringRef, bool, StringRefHash> value_is_null;
|
||||
Arena temporary_keys_pool;
|
||||
std::vector<size_t> to_load(rows);
|
||||
PODArray<StringRef> keys(rows);
|
||||
@ -372,8 +295,9 @@ void ComplexKeyDirectDictionary::getItemsImpl(
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
|
||||
keys[row] = key;
|
||||
value_by_key[key] = get_default(row);
|
||||
value_by_key[key] = static_cast<AttributeType>(default_value_extractor[row]);
|
||||
to_load[row] = row;
|
||||
value_is_null[key] = false;
|
||||
}
|
||||
|
||||
auto stream = source_ptr->loadKeys(key_columns, to_load);
|
||||
@ -392,6 +316,11 @@ void ComplexKeyDirectDictionary::getItemsImpl(
|
||||
});
|
||||
for (const size_t attribute_idx : ext::range(0, attributes.size()))
|
||||
{
|
||||
if (attribute.name != attribute_name_by_index.at(attribute_idx))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
const IColumn & attribute_column = *attribute_columns[attribute_idx];
|
||||
Arena pool;
|
||||
|
||||
@ -402,17 +331,15 @@ void ComplexKeyDirectDictionary::getItemsImpl(
|
||||
for (const auto row_idx : ext::range(0, columns_size))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
|
||||
if (value_by_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
|
||||
|
||||
if (value_by_key.has(key))
|
||||
{
|
||||
if (attribute.type == AttributeUnderlyingType::utFloat32)
|
||||
{
|
||||
value_by_key[key] = static_cast<Float32>(attribute_column[row_idx].template get<Float64>());
|
||||
}
|
||||
auto value = attribute_column[row_idx];
|
||||
|
||||
if (value.isNull())
|
||||
value_is_null[key] = true;
|
||||
else
|
||||
{
|
||||
value_by_key[key] = static_cast<OutputType>(attribute_column[row_idx].template get<AttributeType>());
|
||||
}
|
||||
|
||||
value_by_key[key] = static_cast<OutputType>(value.template get<NearestFieldType<AttributeType>>());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -422,78 +349,13 @@ void ComplexKeyDirectDictionary::getItemsImpl(
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
set_value(row, value_by_key[keys[row]]);
|
||||
auto key = keys[row];
|
||||
set_value(row, value_by_key[key], value_is_null[key]);
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void ComplexKeyDirectDictionary::getItemsStringImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
{
|
||||
const auto rows = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys_array(keys_size);
|
||||
MapType<String> value_by_key;
|
||||
Arena temporary_keys_pool;
|
||||
std::vector<size_t> to_load(rows);
|
||||
PODArray<StringRef> keys(rows);
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
|
||||
keys[row] = key;
|
||||
value_by_key[key] = get_default(row);
|
||||
to_load[row] = row;
|
||||
}
|
||||
|
||||
auto stream = source_ptr->loadKeys(key_columns, to_load);
|
||||
const auto attributes_size = attributes.size();
|
||||
|
||||
stream->readPrefix();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
const auto columns = ext::map<Columns>(
|
||||
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
|
||||
|
||||
const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
|
||||
{
|
||||
return block.safeGetByPosition(keys_size + attribute_idx).column;
|
||||
});
|
||||
for (const size_t attribute_idx : ext::range(0, attributes.size()))
|
||||
{
|
||||
const IColumn & attribute_column = *attribute_columns[attribute_idx];
|
||||
Arena pool;
|
||||
|
||||
StringRefs keys_temp(keys_size);
|
||||
|
||||
const auto columns_size = columns.front()->size();
|
||||
|
||||
for (const auto row_idx : ext::range(0, columns_size))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
|
||||
if (value_by_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
|
||||
{
|
||||
const String from_source = attribute_column[row_idx].template get<String>();
|
||||
value_by_key[key] = from_source;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
set_value(row, value_by_key[keys[row]]);
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
|
||||
const ComplexKeyDirectDictionary::Attribute & ComplexKeyDirectDictionary::getAttribute(const std::string & attribute_name) const
|
||||
{
|
||||
const auto it = attribute_index_by_name.find(attribute_name);
|
||||
@ -503,65 +365,6 @@ const ComplexKeyDirectDictionary::Attribute & ComplexKeyDirectDictionary::getAtt
|
||||
return attributes[it->second];
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void ComplexKeyDirectDictionary::has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
const auto rows = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys_array(keys_size);
|
||||
MapType<UInt8> has_key;
|
||||
Arena temporary_keys_pool;
|
||||
std::vector<size_t> to_load(rows);
|
||||
PODArray<StringRef> keys(rows);
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
|
||||
keys[row] = key;
|
||||
has_key[key] = 0;
|
||||
to_load[row] = row;
|
||||
}
|
||||
|
||||
auto stream = source_ptr->loadKeys(key_columns, to_load);
|
||||
|
||||
stream->readPrefix();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
const auto columns = ext::map<Columns>(
|
||||
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
|
||||
|
||||
for (const size_t attribute_idx : ext::range(0, attributes.size()))
|
||||
{
|
||||
Arena pool;
|
||||
|
||||
StringRefs keys_temp(keys_size);
|
||||
|
||||
const auto columns_size = columns.front()->size();
|
||||
|
||||
for (const auto row_idx : ext::range(0, columns_size))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
|
||||
if (has_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
|
||||
{
|
||||
has_key[key] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
out[row] = has_key[keys[row]];
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
|
||||
BlockInputStreamPtr ComplexKeyDirectDictionary::getBlockInputStream(const Names & /* column_names */, size_t /* max_block_size */) const
|
||||
{
|
||||
return source_ptr->loadAll();
|
||||
|
@ -12,14 +12,13 @@
|
||||
#include <ext/range.h>
|
||||
#include <ext/size.h>
|
||||
#include <ext/map.h>
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
|
||||
#include "DictionaryStructure.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using BlockPtr = std::shared_ptr<Block>;
|
||||
|
||||
class ComplexKeyDirectDictionary final : public IDictionaryBase
|
||||
{
|
||||
@ -60,78 +59,16 @@ public:
|
||||
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const TYPE def, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const String & def, ColumnString * const out) const;
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
@ -142,6 +79,8 @@ private:
|
||||
struct Attribute final
|
||||
{
|
||||
AttributeUnderlyingType type;
|
||||
bool is_nullable;
|
||||
|
||||
std::variant<
|
||||
UInt8,
|
||||
UInt16,
|
||||
@ -168,27 +107,21 @@ private:
|
||||
template <typename T>
|
||||
void addAttributeSize(const Attribute & attribute);
|
||||
|
||||
void calculateBytesAllocated();
|
||||
|
||||
template <typename T>
|
||||
void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & name);
|
||||
static Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value, const std::string & name);
|
||||
|
||||
template <typename Pool>
|
||||
StringRef placeKeysInPool(
|
||||
const size_t row, const Columns & key_columns, StringRefs & keys, const std::vector<DictionaryAttribute> & key_attributes, Pool & pool) const;
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void getItemsStringImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
|
||||
template <typename T>
|
||||
void resize(Attribute & attribute, const Key id);
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename T>
|
||||
void setAttributeValueImpl(Attribute & attribute, const Key id, const T & value);
|
||||
@ -197,9 +130,6 @@ private:
|
||||
|
||||
const Attribute & getAttribute(const std::string & attribute_name) const;
|
||||
|
||||
template <typename T>
|
||||
void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
const DictionaryStructure dict_struct;
|
||||
const DictionarySourcePtr source_ptr;
|
||||
const DictionaryLifetime dict_lifetime;
|
||||
|
@ -1,6 +1,10 @@
|
||||
#include "ComplexKeyHashedDictionary.h"
|
||||
#include <ext/map.h>
|
||||
#include <ext/range.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
|
||||
@ -32,216 +36,111 @@ ComplexKeyHashedDictionary::ComplexKeyHashedDictionary(
|
||||
calculateBytesAllocated();
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void ComplexKeyHashedDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
\
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
const auto null_value = std::get<TYPE>(attribute.null_values); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, \
|
||||
key_columns, \
|
||||
[&](const size_t row, const auto value) { out[row] = value; }, \
|
||||
[&](const size_t) { return null_value; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void ComplexKeyHashedDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return null_value; });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void ComplexKeyHashedDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
\
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, \
|
||||
key_columns, \
|
||||
[&](const size_t row, const auto value) { out[row] = value; }, \
|
||||
[&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void ComplexKeyHashedDictionary::getString(
|
||||
ColumnPtr ComplexKeyHashedDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
ColumnPtr result;
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t row) { return def->getDataAt(row); });
|
||||
}
|
||||
auto keys_size = key_columns.front()->size();
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void ComplexKeyHashedDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
\
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
|
||||
ColumnUInt8::MutablePtr col_null_map_to;
|
||||
ColumnUInt8::Container * vec_null_map_to = nullptr;
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
col_null_map_to = ColumnUInt8::create(keys_size, false);
|
||||
vec_null_map_to = &col_null_map_to->getData();
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void ComplexKeyHashedDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
|
||||
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return StringRef{def}; });
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const StringRef value, bool is_null)
|
||||
{
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
out->insertData(value.data, value.size);
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
getItemsImpl<AttributeType, AttributeType>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const auto value, bool is_null)
|
||||
{
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
out[row] = value;
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
result = ColumnNullable::create(result, std::move(col_null_map_to));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void ComplexKeyHashedDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
|
||||
ColumnUInt8::Ptr ComplexKeyHashedDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
auto size = key_columns.front()->size();
|
||||
auto result = ColumnUInt8::create(size);
|
||||
auto& out = result->getData();
|
||||
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
has<UInt8>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
has<UInt16>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
has<UInt32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
has<UInt64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
has<UInt128>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
has<Int8>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
has<Int16>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
has<Int32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
has<Int64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
has<Float32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
has<Float64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
has<StringRef>(attribute, key_columns, out);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
has<Decimal32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
has<Decimal64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
has<Decimal128>(attribute, key_columns, out);
|
||||
break;
|
||||
}
|
||||
has<ValueType>(attribute, key_columns, out);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void ComplexKeyHashedDictionary::createAttributes()
|
||||
@ -252,7 +151,7 @@ void ComplexKeyHashedDictionary::createAttributes()
|
||||
for (const auto & attribute : dict_struct.attributes)
|
||||
{
|
||||
attribute_index_by_name.emplace(attribute.name, attributes.size());
|
||||
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
|
||||
attributes.push_back(createAttribute(attribute, attribute.null_value));
|
||||
|
||||
if (attribute.hierarchical)
|
||||
throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
|
||||
@ -407,66 +306,30 @@ void ComplexKeyHashedDictionary::addAttributeSize(const Attribute & attribute)
|
||||
bucket_count = map_ref.getBufferSizeInCells();
|
||||
}
|
||||
|
||||
template <>
|
||||
void ComplexKeyHashedDictionary::addAttributeSize<String>(const Attribute & attribute)
|
||||
{
|
||||
const auto & map_ref = std::get<ContainerType<StringRef>>(attribute.maps);
|
||||
bytes_allocated += sizeof(ContainerType<StringRef>) + map_ref.getBufferSizeInBytes();
|
||||
bucket_count = map_ref.getBufferSizeInCells();
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
}
|
||||
|
||||
void ComplexKeyHashedDictionary::calculateBytesAllocated()
|
||||
{
|
||||
bytes_allocated += attributes.size() * sizeof(attributes.front());
|
||||
|
||||
for (const auto & attribute : attributes)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
addAttributeSize<UInt8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
addAttributeSize<UInt16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
addAttributeSize<UInt32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
addAttributeSize<UInt64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
addAttributeSize<UInt128>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
addAttributeSize<Int8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
addAttributeSize<Int16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
addAttributeSize<Int32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
addAttributeSize<Int64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
addAttributeSize<Float32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
addAttributeSize<Float64>(attribute);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
addAttributeSize<Decimal32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
addAttributeSize<Decimal64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
addAttributeSize<Decimal128>(attribute);
|
||||
break;
|
||||
addAttributeSize<AttributeType>(attribute);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
bytes_allocated += keys_pool.size();
|
||||
@ -479,73 +342,41 @@ void ComplexKeyHashedDictionary::createAttributeImpl(Attribute & attribute, cons
|
||||
attribute.maps.emplace<ContainerType<T>>();
|
||||
}
|
||||
|
||||
ComplexKeyHashedDictionary::Attribute
|
||||
ComplexKeyHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||
template <>
|
||||
void ComplexKeyHashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
Attribute attr{type, {}, {}, {}};
|
||||
attribute.string_arena = std::make_unique<Arena>();
|
||||
const String & string = null_value.get<String>();
|
||||
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
|
||||
attribute.maps.emplace<ContainerType<StringRef>>();
|
||||
}
|
||||
|
||||
switch (type)
|
||||
ComplexKeyHashedDictionary::Attribute
|
||||
ComplexKeyHashedDictionary::createAttribute(const DictionaryAttribute & attribute, const Field & null_value)
|
||||
{
|
||||
auto nullable_set = attribute.is_nullable ? std::make_unique<NullableSet>() : nullptr;
|
||||
Attribute attr{attribute.underlying_type, attribute.is_nullable, std::move(nullable_set), {}, {}, {}};
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
createAttributeImpl<UInt8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
createAttributeImpl<UInt16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
createAttributeImpl<UInt32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
createAttributeImpl<UInt64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
createAttributeImpl<UInt128>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
createAttributeImpl<Int8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
createAttributeImpl<Int16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
createAttributeImpl<Int32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
createAttributeImpl<Int64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
createAttributeImpl<Float32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
createAttributeImpl<Float64>(attr, null_value);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
createAttributeImpl<AttributeType>(attr, null_value);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
createAttributeImpl<Decimal32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
createAttributeImpl<Decimal64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
createAttributeImpl<Decimal128>(attr, null_value);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
attr.null_values = null_value.get<String>();
|
||||
attr.maps.emplace<ContainerType<StringRef>>();
|
||||
attr.string_arena = std::make_unique<Arena>();
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void ComplexKeyHashedDictionary::getItemsImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto & attr = std::get<ContainerType<AttributeType>>(attribute.maps);
|
||||
|
||||
@ -560,7 +391,18 @@ void ComplexKeyHashedDictionary::getItemsImpl(
|
||||
const auto key = placeKeysInPool(i, key_columns, keys, temporary_keys_pool);
|
||||
|
||||
const auto it = attr.find(key);
|
||||
set_value(i, it ? static_cast<OutputType>(it->getMapped()) : get_default(i));
|
||||
|
||||
if (it)
|
||||
{
|
||||
set_value(i, static_cast<OutputType>(it->getMapped()), false);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (attribute.is_nullable && attribute.nullable_set->find(key) != nullptr)
|
||||
set_value(i, default_value_extractor[i], true);
|
||||
else
|
||||
set_value(i, default_value_extractor[i], false);
|
||||
}
|
||||
|
||||
/// free memory allocated for the key
|
||||
temporary_keys_pool.rollback(key.size);
|
||||
@ -578,51 +420,42 @@ bool ComplexKeyHashedDictionary::setAttributeValueImpl(Attribute & attribute, co
|
||||
return pair.second;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool ComplexKeyHashedDictionary::setAttributeValueImpl<String>(Attribute & attribute, const StringRef key, const String value)
|
||||
{
|
||||
const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
|
||||
return setAttributeValueImpl<StringRef>(attribute, key, StringRef{string_in_arena, value.size()});
|
||||
}
|
||||
|
||||
bool ComplexKeyHashedDictionary::setAttributeValue(Attribute & attribute, const StringRef key, const Field & value)
|
||||
{
|
||||
switch (attribute.type)
|
||||
bool result = false;
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
return setAttributeValueImpl<UInt8>(attribute, key, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
return setAttributeValueImpl<UInt16>(attribute, key, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
return setAttributeValueImpl<UInt32>(attribute, key, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
return setAttributeValueImpl<UInt64>(attribute, key, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
return setAttributeValueImpl<UInt128>(attribute, key, value.get<UInt128>());
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
return setAttributeValueImpl<Int8>(attribute, key, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
return setAttributeValueImpl<Int16>(attribute, key, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
return setAttributeValueImpl<Int32>(attribute, key, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
return setAttributeValueImpl<Int64>(attribute, key, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
return setAttributeValueImpl<Float32>(attribute, key, value.get<Float64>());
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
return setAttributeValueImpl<Float64>(attribute, key, value.get<Float64>());
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
return setAttributeValueImpl<Decimal32>(attribute, key, value.get<Decimal32>());
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
return setAttributeValueImpl<Decimal64>(attribute, key, value.get<Decimal64>());
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
return setAttributeValueImpl<Decimal128>(attribute, key, value.get<Decimal128>());
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
auto & map = std::get<ContainerType<StringRef>>(attribute.maps);
|
||||
const auto & string = value.get<String>();
|
||||
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
const auto pair = map.insert({key, StringRef{string_in_arena, string.size()}});
|
||||
return pair.second;
|
||||
if (value.isNull())
|
||||
{
|
||||
attribute.nullable_set->insert(key);
|
||||
result = true;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
attribute.nullable_set->erase(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
result = setAttributeValueImpl<AttributeType>(attribute, key, value.get<NearestFieldType<AttributeType>>());
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
const ComplexKeyHashedDictionary::Attribute & ComplexKeyHashedDictionary::getAttribute(const std::string & attribute_name) const
|
||||
@ -673,6 +506,9 @@ void ComplexKeyHashedDictionary::has(const Attribute & attribute, const Columns
|
||||
const auto it = attr.find(key);
|
||||
out[i] = static_cast<bool>(it);
|
||||
|
||||
if (attribute.is_nullable && !out[i])
|
||||
out[i] = attribute.nullable_set->find(key) != nullptr;
|
||||
|
||||
/// free memory allocated for the key
|
||||
temporary_keys_pool.rollback(key.size);
|
||||
}
|
||||
@ -684,41 +520,26 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys() const
|
||||
{
|
||||
const Attribute & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
return getKeys<UInt8>(attribute);
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
return getKeys<UInt16>(attribute);
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
return getKeys<UInt32>(attribute);
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
return getKeys<UInt64>(attribute);
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
return getKeys<UInt128>(attribute);
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
return getKeys<Int8>(attribute);
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
return getKeys<Int16>(attribute);
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
return getKeys<Int32>(attribute);
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
return getKeys<Int64>(attribute);
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
return getKeys<Float32>(attribute);
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
return getKeys<Float64>(attribute);
|
||||
case AttributeUnderlyingType::utString:
|
||||
return getKeys<StringRef>(attribute);
|
||||
std::vector<StringRef> result;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
return getKeys<Decimal32>(attribute);
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
return getKeys<Decimal64>(attribute);
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
return getKeys<Decimal128>(attribute);
|
||||
}
|
||||
return {};
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
result = getKeys<StringRef>(attribute);
|
||||
}
|
||||
else
|
||||
{
|
||||
result = getKeys<AttributeType>(attribute);
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -730,12 +551,18 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys(const Attribute & att
|
||||
for (const auto & key : attr)
|
||||
keys.push_back(key.getKey());
|
||||
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
for (const auto & key: *attribute.nullable_set)
|
||||
keys.push_back(key.getKey());
|
||||
}
|
||||
|
||||
return keys;
|
||||
}
|
||||
|
||||
BlockInputStreamPtr ComplexKeyHashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyHashedDictionary, UInt64>;
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getKeys(), column_names);
|
||||
}
|
||||
|
||||
|
@ -7,17 +7,17 @@
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Core/Block.h>
|
||||
#include <common/StringRef.h>
|
||||
#include <ext/range.h>
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
|
||||
#include "DictionaryStructure.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using BlockPtr = std::shared_ptr<Block>;
|
||||
|
||||
class ComplexKeyHashedDictionary final : public IDictionaryBase
|
||||
{
|
||||
@ -60,91 +60,16 @@ public:
|
||||
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const;
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const;
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
@ -152,9 +77,14 @@ private:
|
||||
template <typename Value>
|
||||
using ContainerType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
|
||||
|
||||
using NullableSet = HashSetWithSavedHash<StringRef, StringRefHash>;
|
||||
|
||||
struct Attribute final
|
||||
{
|
||||
AttributeUnderlyingType type;
|
||||
bool is_nullable;
|
||||
std::unique_ptr<NullableSet> nullable_set;
|
||||
|
||||
std::variant<
|
||||
UInt8,
|
||||
UInt16,
|
||||
@ -170,7 +100,7 @@ private:
|
||||
Decimal128,
|
||||
Float32,
|
||||
Float64,
|
||||
String>
|
||||
StringRef>
|
||||
null_values;
|
||||
std::variant<
|
||||
ContainerType<UInt8>,
|
||||
@ -206,18 +136,21 @@ private:
|
||||
void calculateBytesAllocated();
|
||||
|
||||
template <typename T>
|
||||
void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
static Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value);
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void
|
||||
getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsImpl(
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename T>
|
||||
bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
|
||||
static bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
|
||||
|
||||
bool setAttributeValue(Attribute & attribute, const StringRef key, const Field & value);
|
||||
static bool setAttributeValue(Attribute & attribute, const StringRef key, const Field & value);
|
||||
|
||||
const Attribute & getAttribute(const std::string & attribute_name) const;
|
||||
|
||||
|
@ -25,12 +25,10 @@ namespace ErrorCodes
|
||||
/* BlockInputStream implementation for external dictionaries
|
||||
* read() returns blocks consisting of the in-memory contents of the dictionaries
|
||||
*/
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Key>
|
||||
class DictionaryBlockInputStream : public DictionaryBlockInputStreamBase
|
||||
{
|
||||
public:
|
||||
using DictionaryPtr = std::shared_ptr<DictionaryType const>;
|
||||
|
||||
DictionaryBlockInputStream(
|
||||
std::shared_ptr<const IDictionaryBase> dictionary, UInt64 max_block_size, PaddedPODArray<Key> && ids, const Names & column_names);
|
||||
|
||||
@ -60,111 +58,9 @@ protected:
|
||||
Block getBlock(size_t start, size_t size) const override;
|
||||
|
||||
private:
|
||||
// pointer types to getXXX functions
|
||||
// for single key dictionaries
|
||||
template <typename Type>
|
||||
using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, PaddedPODArray<Type> &) const;
|
||||
|
||||
template <typename Type>
|
||||
using DictionaryDecimalGetter
|
||||
= void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, DecimalPaddedPODArray<Type> &) const;
|
||||
|
||||
using DictionaryStringGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, ColumnString *) const;
|
||||
|
||||
// for complex complex key dictionaries
|
||||
template <typename Type>
|
||||
using GetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, PaddedPODArray<Type> & out) const;
|
||||
|
||||
template <typename Type>
|
||||
using DecimalGetterByKey
|
||||
= void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, DecimalPaddedPODArray<Type> & out) const;
|
||||
|
||||
using StringGetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, ColumnString * out) const;
|
||||
|
||||
// call getXXX
|
||||
// for single key dictionaries
|
||||
template <typename Type, typename Container>
|
||||
void callGetter(
|
||||
DictionaryGetter<Type> getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
|
||||
template <typename Type, typename Container>
|
||||
void callGetter(
|
||||
DictionaryDecimalGetter<Type> getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
|
||||
template <typename Container>
|
||||
void callGetter(
|
||||
DictionaryStringGetter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
|
||||
// for complex complex key dictionaries
|
||||
template <typename Type, typename Container>
|
||||
void callGetter(
|
||||
GetterByKey<Type> getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
|
||||
template <typename Type, typename Container>
|
||||
void callGetter(
|
||||
DecimalGetterByKey<Type> getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
|
||||
template <typename Container>
|
||||
void callGetter(
|
||||
StringGetterByKey getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
|
||||
template <template <typename> class Getter, template <typename> class DecimalGetter, typename StringGetter>
|
||||
Block
|
||||
fillBlock(const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const;
|
||||
|
||||
|
||||
template <typename AttributeType, typename Getter>
|
||||
ColumnPtr getColumnFromAttribute(
|
||||
Getter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
template <typename Getter>
|
||||
ColumnPtr getColumnFromStringAttribute(
|
||||
Getter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
ColumnPtr getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const;
|
||||
|
||||
void fillKeyColumns(
|
||||
@ -174,65 +70,54 @@ private:
|
||||
const DictionaryStructure & dictionary_structure,
|
||||
ColumnsWithTypeAndName & columns) const;
|
||||
|
||||
DictionaryPtr dictionary;
|
||||
std::shared_ptr<const IDictionaryBase> dictionary;
|
||||
Names column_names;
|
||||
PaddedPODArray<Key> ids;
|
||||
ColumnsWithTypeAndName key_columns;
|
||||
Poco::Logger * logger;
|
||||
|
||||
using FillBlockFunction = Block (DictionaryBlockInputStream<DictionaryType, Key>::*)(
|
||||
const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const;
|
||||
|
||||
FillBlockFunction fill_block_function;
|
||||
|
||||
Columns data_columns;
|
||||
GetColumnsFunction get_key_columns_function;
|
||||
GetColumnsFunction get_view_columns_function;
|
||||
|
||||
enum class DictionaryKeyType
|
||||
enum class DictionaryInputStreamKeyType
|
||||
{
|
||||
Id,
|
||||
ComplexKey,
|
||||
Callback
|
||||
};
|
||||
|
||||
DictionaryKeyType key_type;
|
||||
DictionaryInputStreamKeyType key_type;
|
||||
};
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
|
||||
template <typename Key>
|
||||
DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
|
||||
std::shared_ptr<const IDictionaryBase> dictionary_, UInt64 max_block_size_, PaddedPODArray<Key> && ids_, const Names & column_names_)
|
||||
: DictionaryBlockInputStreamBase(ids_.size(), max_block_size_)
|
||||
, dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
|
||||
, dictionary(dictionary_)
|
||||
, column_names(column_names_)
|
||||
, ids(std::move(ids_))
|
||||
, logger(&Poco::Logger::get("DictionaryBlockInputStream"))
|
||||
, fill_block_function(
|
||||
&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<DictionaryGetter, DictionaryDecimalGetter, DictionaryStringGetter>)
|
||||
, key_type(DictionaryKeyType::Id)
|
||||
, key_type(DictionaryInputStreamKeyType::Id)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
|
||||
template <typename Key>
|
||||
DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
|
||||
std::shared_ptr<const IDictionaryBase> dictionary_,
|
||||
UInt64 max_block_size_,
|
||||
const std::vector<StringRef> & keys,
|
||||
const Names & column_names_)
|
||||
: DictionaryBlockInputStreamBase(keys.size(), max_block_size_)
|
||||
, dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
|
||||
, dictionary(dictionary_)
|
||||
, column_names(column_names_)
|
||||
, logger(&Poco::Logger::get("DictionaryBlockInputStream"))
|
||||
, fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>)
|
||||
, key_type(DictionaryKeyType::ComplexKey)
|
||||
, key_type(DictionaryInputStreamKeyType::ComplexKey)
|
||||
{
|
||||
const DictionaryStructure & dictionaty_structure = dictionary->getStructure();
|
||||
fillKeyColumns(keys, 0, keys.size(), dictionaty_structure, key_columns);
|
||||
const DictionaryStructure & dictionary_structure = dictionary->getStructure();
|
||||
fillKeyColumns(keys, 0, keys.size(), dictionary_structure, key_columns);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
|
||||
template <typename Key>
|
||||
DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
|
||||
std::shared_ptr<const IDictionaryBase> dictionary_,
|
||||
UInt64 max_block_size_,
|
||||
const Columns & data_columns_,
|
||||
@ -240,24 +125,23 @@ DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
|
||||
GetColumnsFunction && get_key_columns_function_,
|
||||
GetColumnsFunction && get_view_columns_function_)
|
||||
: DictionaryBlockInputStreamBase(data_columns_.front()->size(), max_block_size_)
|
||||
, dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
|
||||
, dictionary(dictionary_)
|
||||
, column_names(column_names_)
|
||||
, logger(&Poco::Logger::get("DictionaryBlockInputStream"))
|
||||
, fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>)
|
||||
, data_columns(data_columns_)
|
||||
, get_key_columns_function(get_key_columns_function_)
|
||||
, get_view_columns_function(get_view_columns_function_)
|
||||
, key_type(DictionaryKeyType::Callback)
|
||||
, get_key_columns_function(std::move(get_key_columns_function_))
|
||||
, get_view_columns_function(std::move(get_view_columns_function_))
|
||||
, key_type(DictionaryInputStreamKeyType::Callback)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, size_t length) const
|
||||
template <typename Key>
|
||||
Block DictionaryBlockInputStream<Key>::getBlock(size_t start, size_t length) const
|
||||
{
|
||||
/// TODO: Rewrite
|
||||
switch (key_type)
|
||||
{
|
||||
case DictionaryKeyType::ComplexKey:
|
||||
case DictionaryInputStreamKeyType::ComplexKey:
|
||||
{
|
||||
Columns columns;
|
||||
ColumnsWithTypeAndName view_columns;
|
||||
@ -268,16 +152,16 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, si
|
||||
columns.emplace_back(column);
|
||||
view_columns.emplace_back(column, key_column.type, key_column.name);
|
||||
}
|
||||
return (this->*fill_block_function)({}, columns, {}, std::move(view_columns));
|
||||
return fillBlock({}, columns, {}, std::move(view_columns));
|
||||
}
|
||||
|
||||
case DictionaryKeyType::Id:
|
||||
case DictionaryInputStreamKeyType::Id:
|
||||
{
|
||||
PaddedPODArray<Key> ids_to_fill(ids.begin() + start, ids.begin() + start + length);
|
||||
return (this->*fill_block_function)(ids_to_fill, {}, {}, {});
|
||||
return fillBlock(ids_to_fill, {}, {}, {});
|
||||
}
|
||||
|
||||
case DictionaryKeyType::Callback:
|
||||
case DictionaryInputStreamKeyType::Callback:
|
||||
{
|
||||
Columns columns;
|
||||
columns.reserve(data_columns.size());
|
||||
@ -294,102 +178,15 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, si
|
||||
columns.push_back(key_column.column);
|
||||
types.push_back(key_column.type);
|
||||
}
|
||||
return (this->*fill_block_function)({}, columns, types, std::move(view_with_type_and_name));
|
||||
return fillBlock({}, columns, types, std::move(view_with_type_and_name));
|
||||
}
|
||||
}
|
||||
|
||||
throw Exception("Unexpected DictionaryKeyType.", ErrorCodes::LOGICAL_ERROR);
|
||||
throw Exception("Unexpected DictionaryInputStreamKeyType.", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Type, typename Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
DictionaryGetter<Type> getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & /*keys*/,
|
||||
const DataTypes & /*data_types*/,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
(dict.*getter)(attribute.name, ids_to_fill, container);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Type, typename Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
DictionaryDecimalGetter<Type> getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & /*keys*/,
|
||||
const DataTypes & /*data_types*/,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
(dict.*getter)(attribute.name, ids_to_fill, container);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
DictionaryStringGetter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & /*keys*/,
|
||||
const DataTypes & /*data_types*/,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
(dict.*getter)(attribute.name, ids_to_fill, container);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Type, typename Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
GetterByKey<Type> getter,
|
||||
const PaddedPODArray<Key> & /*ids_to_fill*/,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
(dict.*getter)(attribute.name, keys, data_types, container);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Type, typename Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
DecimalGetterByKey<Type> getter,
|
||||
const PaddedPODArray<Key> & /*ids_to_fill*/,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
(dict.*getter)(attribute.name, keys, data_types, container);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
StringGetterByKey getter,
|
||||
const PaddedPODArray<Key> & /*ids_to_fill*/,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
(dict.*getter)(attribute.name, keys, data_types, container);
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <template <typename> class Getter, template <typename> class DecimalGetter, typename StringGetter>
|
||||
Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||
template <typename Key>
|
||||
Block DictionaryBlockInputStream<Key>::fillBlock(
|
||||
const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const
|
||||
{
|
||||
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
|
||||
@ -408,9 +205,14 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||
block_columns.push_back(column);
|
||||
|
||||
const DictionaryStructure & structure = dictionary->getStructure();
|
||||
ColumnPtr ids_column = getColumnFromIds(ids_to_fill);
|
||||
|
||||
if (structure.id && names.find(structure.id->name) != names.end())
|
||||
block_columns.emplace_back(getColumnFromIds(ids_to_fill), std::make_shared<DataTypeUInt64>(), structure.id->name);
|
||||
{
|
||||
block_columns.emplace_back(ids_column, std::make_shared<DataTypeUInt64>(), structure.id->name);
|
||||
}
|
||||
|
||||
auto dictionary_key_type = dictionary->getKeyType();
|
||||
|
||||
for (const auto idx : ext::range(0, structure.attributes.size()))
|
||||
{
|
||||
@ -418,126 +220,35 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||
if (names.find(attribute.name) != names.end())
|
||||
{
|
||||
ColumnPtr column;
|
||||
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
|
||||
column = getColumnFromAttribute<TYPE, Getter<TYPE>>(&DictionaryType::get##TYPE, ids_to_fill, keys, data_types, attribute, *dictionary)
|
||||
switch (attribute.underlying_type)
|
||||
|
||||
if (dictionary_key_type == DictionaryKeyType::simple)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt128);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int8);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int16);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Float32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Float64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
{
|
||||
column = getColumnFromAttribute<Decimal32, DecimalGetter<Decimal32>>(
|
||||
&DictionaryType::getDecimal32, ids_to_fill, keys, data_types, attribute, *dictionary);
|
||||
break;
|
||||
}
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
{
|
||||
column = getColumnFromAttribute<Decimal64, DecimalGetter<Decimal64>>(
|
||||
&DictionaryType::getDecimal64, ids_to_fill, keys, data_types, attribute, *dictionary);
|
||||
break;
|
||||
}
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
{
|
||||
column = getColumnFromAttribute<Decimal128, DecimalGetter<Decimal128>>(
|
||||
&DictionaryType::getDecimal128, ids_to_fill, keys, data_types, attribute, *dictionary);
|
||||
break;
|
||||
}
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
column = getColumnFromStringAttribute<StringGetter>(
|
||||
&DictionaryType::getString, ids_to_fill, keys, data_types, attribute, *dictionary);
|
||||
break;
|
||||
}
|
||||
column = dictionary->getColumn(
|
||||
attribute.name,
|
||||
attribute.type,
|
||||
{ids_column},
|
||||
{std::make_shared<DataTypeUInt64>()},
|
||||
nullptr /* default_values_column */);
|
||||
}
|
||||
#undef GET_COLUMN_FORM_ATTRIBUTE
|
||||
else
|
||||
{
|
||||
column = dictionary->getColumn(
|
||||
attribute.name,
|
||||
attribute.type,
|
||||
keys,
|
||||
data_types,
|
||||
nullptr /* default_values_column*/);
|
||||
}
|
||||
|
||||
block_columns.emplace_back(column, attribute.type, attribute.name);
|
||||
}
|
||||
}
|
||||
|
||||
return Block(block_columns);
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename AttributeType, typename Getter>
|
||||
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribute(
|
||||
Getter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
if constexpr (IsDecimalNumber<AttributeType>)
|
||||
{
|
||||
auto size = ids_to_fill.size();
|
||||
if (!keys.empty())
|
||||
size = keys.front()->size();
|
||||
auto column = ColumnDecimal<AttributeType>::create(size, 0); /// NOTE: There's wrong scale here, but it's unused.
|
||||
callGetter(getter, ids_to_fill, keys, data_types, column->getData(), attribute, dict);
|
||||
return column;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto size = ids_to_fill.size();
|
||||
if (!keys.empty())
|
||||
size = keys.front()->size();
|
||||
auto column_vector = ColumnVector<AttributeType>::create(size);
|
||||
callGetter(getter, ids_to_fill, keys, data_types, column_vector->getData(), attribute, dict);
|
||||
return column_vector;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Getter>
|
||||
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromStringAttribute(
|
||||
Getter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
auto column_string = ColumnString::create();
|
||||
auto ptr = column_string.get();
|
||||
callGetter(getter, ids_to_fill, keys, data_types, ptr, attribute, dict);
|
||||
return column_string;
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const
|
||||
template <typename Key>
|
||||
ColumnPtr DictionaryBlockInputStream<Key>::getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const
|
||||
{
|
||||
auto column_vector = ColumnVector<UInt64>::create();
|
||||
column_vector->getData().reserve(ids_to_fill.size());
|
||||
@ -547,8 +258,8 @@ ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(cons
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::fillKeyColumns(
|
||||
template <typename Key>
|
||||
void DictionaryBlockInputStream<Key>::fillKeyColumns(
|
||||
const std::vector<StringRef> & keys,
|
||||
size_t start,
|
||||
size_t size,
|
||||
|
149
src/Dictionaries/DictionaryHelpers.h
Normal file
149
src/Dictionaries/DictionaryHelpers.h
Normal file
@ -0,0 +1,149 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include "DictionaryStructure.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TYPE_MISMATCH;
|
||||
}
|
||||
|
||||
/**
|
||||
* In Dictionaries implementation String attribute is stored in arena and StringRefs are pointing to it.
|
||||
*/
|
||||
template <typename DictionaryAttributeType>
|
||||
using DictionaryValueType =
|
||||
std::conditional_t<std::is_same_v<DictionaryAttributeType, String>, StringRef, DictionaryAttributeType>;
|
||||
|
||||
/**
|
||||
* Used to create column with right type for DictionaryAttributeType.
|
||||
*/
|
||||
template <typename DictionaryAttributeType>
|
||||
class DictionaryAttributeColumnProvider
|
||||
{
|
||||
public:
|
||||
using ColumnType =
|
||||
std::conditional_t<std::is_same_v<DictionaryAttributeType, String>, ColumnString,
|
||||
std::conditional_t<IsDecimalNumber<DictionaryAttributeType>, ColumnDecimal<DictionaryAttributeType>,
|
||||
ColumnVector<DictionaryAttributeType>>>;
|
||||
|
||||
using ColumnPtr = typename ColumnType::MutablePtr;
|
||||
|
||||
static ColumnPtr getColumn(const DictionaryAttribute & dictionary_attribute, size_t size)
|
||||
{
|
||||
if constexpr (std::is_same_v<DictionaryAttributeType, String>)
|
||||
{
|
||||
return ColumnType::create();
|
||||
}
|
||||
if constexpr (IsDecimalNumber<DictionaryAttributeType>)
|
||||
{
|
||||
auto scale = getDecimalScale(*dictionary_attribute.nested_type);
|
||||
return ColumnType::create(size, scale);
|
||||
}
|
||||
else if constexpr (IsNumber<DictionaryAttributeType>)
|
||||
return ColumnType::create(size);
|
||||
else
|
||||
throw Exception{"Unsupported attribute type.", ErrorCodes::TYPE_MISMATCH};
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* DictionaryDefaultValueExtractor used to simplify getting default value for IDictionary function `getColumn`.
|
||||
* Provides interface for getting default value with operator[];
|
||||
*
|
||||
* If default_values_column is null then attribute_default_value will be used.
|
||||
* If default_values_column is not null in constructor than this column values will be used as default values.
|
||||
*/
|
||||
template <typename DictionaryAttributeType>
|
||||
class DictionaryDefaultValueExtractor
|
||||
{
|
||||
using DefaultColumnType = typename DictionaryAttributeColumnProvider<DictionaryAttributeType>::ColumnType;
|
||||
|
||||
public:
|
||||
using DefaultValueType = DictionaryValueType<DictionaryAttributeType>;
|
||||
|
||||
DictionaryDefaultValueExtractor(DictionaryAttributeType attribute_default_value, ColumnPtr default_values_column_ = nullptr)
|
||||
: default_value(std::move(attribute_default_value))
|
||||
{
|
||||
if (default_values_column_ == nullptr)
|
||||
use_default_value_from_column = false;
|
||||
else
|
||||
{
|
||||
if (const auto * const default_col = checkAndGetColumn<DefaultColumnType>(*default_values_column_))
|
||||
{
|
||||
default_values_column = default_col;
|
||||
use_default_value_from_column = true;
|
||||
}
|
||||
else if (const auto * const default_col_const = checkAndGetColumnConst<DefaultColumnType>(default_values_column_.get()))
|
||||
{
|
||||
default_value = default_col_const->template getValue<DictionaryAttributeType>();
|
||||
use_default_value_from_column = false;
|
||||
}
|
||||
else
|
||||
throw Exception{"Type of default column is not the same as dictionary attribute type.", ErrorCodes::TYPE_MISMATCH};
|
||||
}
|
||||
}
|
||||
|
||||
DefaultValueType operator[](size_t row)
|
||||
{
|
||||
if (!use_default_value_from_column)
|
||||
return static_cast<DefaultValueType>(default_value);
|
||||
|
||||
assert(default_values_column != nullptr);
|
||||
|
||||
if constexpr (std::is_same_v<DefaultColumnType, ColumnString>)
|
||||
return default_values_column->getDataAt(row);
|
||||
else
|
||||
return default_values_column->getData()[row];
|
||||
}
|
||||
private:
|
||||
DictionaryAttributeType default_value;
|
||||
const DefaultColumnType * default_values_column = nullptr;
|
||||
bool use_default_value_from_column = false;
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns ColumnVector data as PaddedPodArray.
|
||||
|
||||
* If column is constant parameter backup_storage is used to store values.
|
||||
*/
|
||||
template <typename T>
|
||||
static const PaddedPODArray<T> & getColumnVectorData(
|
||||
const IDictionaryBase * dictionary,
|
||||
const ColumnPtr column,
|
||||
PaddedPODArray<T> & backup_storage)
|
||||
{
|
||||
bool is_const_column = isColumnConst(*column);
|
||||
auto full_column = column->convertToFullColumnIfConst();
|
||||
auto vector_col = checkAndGetColumn<ColumnVector<T>>(full_column.get());
|
||||
|
||||
if (!vector_col)
|
||||
{
|
||||
throw Exception{ErrorCodes::TYPE_MISMATCH,
|
||||
"{}: type mismatch: column has wrong type expected {}",
|
||||
dictionary->getDictionaryID().getNameForLogs(),
|
||||
TypeName<T>::get()};
|
||||
}
|
||||
|
||||
if (is_const_column)
|
||||
{
|
||||
// With type conversion and const columns we need to use backup storage here
|
||||
auto & data = vector_col->getData();
|
||||
backup_storage.assign(data);
|
||||
|
||||
return backup_storage;
|
||||
}
|
||||
else
|
||||
{
|
||||
return vector_col->getData();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -2,6 +2,8 @@
|
||||
#include <Columns/IColumn.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/Operators.h>
|
||||
@ -12,7 +14,6 @@
|
||||
#include <unordered_set>
|
||||
#include <ext/range.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
@ -41,54 +42,46 @@ namespace
|
||||
}
|
||||
|
||||
|
||||
AttributeUnderlyingType getAttributeUnderlyingType(const std::string & type)
|
||||
AttributeUnderlyingType getAttributeUnderlyingType(const DataTypePtr & type)
|
||||
{
|
||||
static const std::unordered_map<std::string, AttributeUnderlyingType> dictionary
|
||||
auto type_index = type->getTypeId();
|
||||
|
||||
switch (type_index)
|
||||
{
|
||||
{"UInt8", AttributeUnderlyingType::utUInt8},
|
||||
{"UInt16", AttributeUnderlyingType::utUInt16},
|
||||
{"UInt32", AttributeUnderlyingType::utUInt32},
|
||||
{"UInt64", AttributeUnderlyingType::utUInt64},
|
||||
{"UUID", AttributeUnderlyingType::utUInt128},
|
||||
{"Int8", AttributeUnderlyingType::utInt8},
|
||||
{"Int16", AttributeUnderlyingType::utInt16},
|
||||
{"Int32", AttributeUnderlyingType::utInt32},
|
||||
{"Int64", AttributeUnderlyingType::utInt64},
|
||||
{"Float32", AttributeUnderlyingType::utFloat32},
|
||||
{"Float64", AttributeUnderlyingType::utFloat64},
|
||||
{"String", AttributeUnderlyingType::utString},
|
||||
{"Date", AttributeUnderlyingType::utUInt16},
|
||||
};
|
||||
case TypeIndex::UInt8: return AttributeUnderlyingType::utUInt8;
|
||||
case TypeIndex::UInt16: return AttributeUnderlyingType::utUInt16;
|
||||
case TypeIndex::UInt32: return AttributeUnderlyingType::utUInt32;
|
||||
case TypeIndex::UInt64: return AttributeUnderlyingType::utUInt64;
|
||||
case TypeIndex::UInt128: return AttributeUnderlyingType::utUInt128;
|
||||
|
||||
const auto it = dictionary.find(type);
|
||||
if (it != std::end(dictionary))
|
||||
return it->second;
|
||||
case TypeIndex::Int8: return AttributeUnderlyingType::utInt8;
|
||||
case TypeIndex::Int16: return AttributeUnderlyingType::utInt16;
|
||||
case TypeIndex::Int32: return AttributeUnderlyingType::utInt32;
|
||||
case TypeIndex::Int64: return AttributeUnderlyingType::utInt64;
|
||||
|
||||
/// Can contain arbitrary scale and timezone parameters.
|
||||
if (type.find("DateTime64") == 0)
|
||||
return AttributeUnderlyingType::utUInt64;
|
||||
case TypeIndex::Float32: return AttributeUnderlyingType::utFloat32;
|
||||
case TypeIndex::Float64: return AttributeUnderlyingType::utFloat64;
|
||||
|
||||
/// Can contain arbitrary timezone as parameter.
|
||||
if (type.find("DateTime") == 0)
|
||||
return AttributeUnderlyingType::utUInt32;
|
||||
case TypeIndex::Decimal32: return AttributeUnderlyingType::utDecimal32;
|
||||
case TypeIndex::Decimal64: return AttributeUnderlyingType::utDecimal64;
|
||||
case TypeIndex::Decimal128: return AttributeUnderlyingType::utDecimal128;
|
||||
|
||||
if (type.find("Decimal") == 0)
|
||||
{
|
||||
size_t start = strlen("Decimal");
|
||||
if (type.find("32", start) == start)
|
||||
return AttributeUnderlyingType::utDecimal32;
|
||||
if (type.find("64", start) == start)
|
||||
return AttributeUnderlyingType::utDecimal64;
|
||||
if (type.find("128", start) == start)
|
||||
return AttributeUnderlyingType::utDecimal128;
|
||||
case TypeIndex::Date: return AttributeUnderlyingType::utUInt16;
|
||||
case TypeIndex::DateTime: return AttributeUnderlyingType::utUInt32;
|
||||
case TypeIndex::DateTime64: return AttributeUnderlyingType::utUInt64;
|
||||
|
||||
case TypeIndex::UUID: return AttributeUnderlyingType::utUInt128;
|
||||
|
||||
case TypeIndex::String: return AttributeUnderlyingType::utString;
|
||||
|
||||
// Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries.
|
||||
// TODO: This should be fixed by fully supporting arrays in dictionaries.
|
||||
case TypeIndex::Array: return AttributeUnderlyingType::utString;
|
||||
|
||||
default: break;
|
||||
}
|
||||
|
||||
// Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries.
|
||||
// TODO: This should be fixed by fully supporting arrays in dictionaries.
|
||||
if (type.find("Array") == 0)
|
||||
return AttributeUnderlyingType::utString;
|
||||
|
||||
throw Exception{"Unknown type " + type, ErrorCodes::UNKNOWN_TYPE};
|
||||
throw Exception{"Unknown type for dictionary" + type->getName(), ErrorCodes::UNKNOWN_TYPE};
|
||||
}
|
||||
|
||||
|
||||
@ -215,16 +208,32 @@ void DictionaryStructure::validateKeyTypes(const DataTypes & key_types) const
|
||||
|
||||
for (const auto i : ext::range(0, key_types.size()))
|
||||
{
|
||||
const auto & expected_type = (*key)[i].type->getName();
|
||||
const auto & actual_type = key_types[i]->getName();
|
||||
const auto & expected_type = (*key)[i].type;
|
||||
const auto & actual_type = key_types[i];
|
||||
|
||||
if (expected_type != actual_type)
|
||||
throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type + ", found "
|
||||
+ actual_type,
|
||||
ErrorCodes::TYPE_MISMATCH};
|
||||
if (!areTypesEqual(expected_type, actual_type))
|
||||
throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type->getName() + ", found "
|
||||
+ actual_type->getName(),
|
||||
ErrorCodes::TYPE_MISMATCH};
|
||||
}
|
||||
}
|
||||
|
||||
const DictionaryAttribute & DictionaryStructure::getAttribute(const String& attribute_name, const DataTypePtr & type) const
|
||||
{
|
||||
auto find_iter
|
||||
= std::find_if(attributes.begin(), attributes.end(), [&](const auto & attribute) { return attribute.name == attribute_name; });
|
||||
|
||||
if (find_iter == attributes.end())
|
||||
throw Exception{"No such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
|
||||
|
||||
const auto & attribute = *find_iter;
|
||||
|
||||
if (!areTypesEqual(attribute.type, type))
|
||||
throw Exception{"Attribute type does not match, expected " + attribute.type->getName() + ", found " + type->getName(),
|
||||
ErrorCodes::TYPE_MISMATCH};
|
||||
|
||||
return *find_iter;
|
||||
}
|
||||
|
||||
std::string DictionaryStructure::getKeyDescription() const
|
||||
{
|
||||
@ -318,9 +327,20 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
|
||||
if ((range_min && name == range_min->name) || (range_max && name == range_max->name))
|
||||
continue;
|
||||
|
||||
|
||||
const auto type_string = config.getString(prefix + "type");
|
||||
const auto type = DataTypeFactory::instance().get(type_string);
|
||||
const auto underlying_type = getAttributeUnderlyingType(type_string);
|
||||
const auto initial_type = DataTypeFactory::instance().get(type_string);
|
||||
auto type = initial_type;
|
||||
bool is_array = false;
|
||||
bool is_nullable = false;
|
||||
|
||||
if (type->isNullable())
|
||||
{
|
||||
is_nullable = true;
|
||||
type = removeNullable(type);
|
||||
}
|
||||
|
||||
const auto underlying_type = getAttributeUnderlyingType(type);
|
||||
|
||||
const auto expression = config.getString(prefix + "expression", "");
|
||||
if (!expression.empty())
|
||||
@ -333,7 +353,9 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
|
||||
try
|
||||
{
|
||||
if (null_value_string.empty())
|
||||
{
|
||||
null_value = type->getDefault();
|
||||
}
|
||||
else
|
||||
{
|
||||
ReadBufferFromString null_value_buffer{null_value_string};
|
||||
@ -344,7 +366,9 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("error parsing null_value");
|
||||
String dictionary_name = config.getString(".dictionary.name", "");
|
||||
e.addMessage("While parsing null_value for attribute with name " + name
|
||||
+ " in dictionary " + dictionary_name);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
@ -363,8 +387,18 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
|
||||
|
||||
has_hierarchy = has_hierarchy || hierarchical;
|
||||
|
||||
res_attributes.emplace_back(
|
||||
DictionaryAttribute{name, underlying_type, type, expression, null_value, hierarchical, injective, is_object_id});
|
||||
res_attributes.emplace_back(DictionaryAttribute{
|
||||
name,
|
||||
underlying_type,
|
||||
initial_type,
|
||||
type,
|
||||
expression,
|
||||
null_value,
|
||||
hierarchical,
|
||||
injective,
|
||||
is_object_id,
|
||||
is_nullable,
|
||||
is_array});
|
||||
}
|
||||
|
||||
return res_attributes;
|
||||
|
@ -42,7 +42,6 @@ std::string toString(const AttributeUnderlyingType type);
|
||||
/// Min and max lifetimes for a dictionary or it's entry
|
||||
using DictionaryLifetime = ExternalLoadableLifetime;
|
||||
|
||||
|
||||
/** Holds the description of a single dictionary attribute:
|
||||
* - name, used for lookup into dictionary and source;
|
||||
* - type, used in conjunction with DataTypeFactory and getAttributeUnderlyingTypeByname;
|
||||
@ -57,13 +56,74 @@ struct DictionaryAttribute final
|
||||
const std::string name;
|
||||
const AttributeUnderlyingType underlying_type;
|
||||
const DataTypePtr type;
|
||||
const DataTypePtr nested_type;
|
||||
const std::string expression;
|
||||
const Field null_value;
|
||||
const bool hierarchical;
|
||||
const bool injective;
|
||||
const bool is_object_id;
|
||||
const bool is_nullable;
|
||||
const bool is_array;
|
||||
};
|
||||
|
||||
template <typename Type>
|
||||
struct DictionaryAttributeType
|
||||
{
|
||||
using AttributeType = Type;
|
||||
};
|
||||
|
||||
template <typename F>
|
||||
void callOnDictionaryAttributeType(AttributeUnderlyingType type, F&& func)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
func(DictionaryAttributeType<UInt8>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
func(DictionaryAttributeType<UInt16>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
func(DictionaryAttributeType<UInt32>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
func(DictionaryAttributeType<UInt64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
func(DictionaryAttributeType<UInt128>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
func(DictionaryAttributeType<Int8>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
func(DictionaryAttributeType<Int16>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
func(DictionaryAttributeType<Int32>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
func(DictionaryAttributeType<Int64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
func(DictionaryAttributeType<Float32>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
func(DictionaryAttributeType<Float64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
func(DictionaryAttributeType<String>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
func(DictionaryAttributeType<Decimal32>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
func(DictionaryAttributeType<Decimal64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
func(DictionaryAttributeType<Decimal128>());
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
struct DictionarySpecialAttribute final
|
||||
{
|
||||
@ -94,10 +154,10 @@ struct DictionaryStructure final
|
||||
DictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
|
||||
|
||||
void validateKeyTypes(const DataTypes & key_types) const;
|
||||
const DictionaryAttribute &getAttribute(const String& attribute_name, const DataTypePtr & type) const;
|
||||
std::string getKeyDescription() const;
|
||||
bool isKeySizeFixed() const;
|
||||
size_t getKeySize() const;
|
||||
|
||||
private:
|
||||
/// range_min and range_max have to be parsed before this function call
|
||||
std::vector<DictionaryAttribute> getAttributes(
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user