Merge branch 'master' into aku/laglead

This commit is contained in:
Alexander Kuzmenkov 2021-03-22 21:16:27 +03:00 committed by GitHub
commit b0284f20c3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
241 changed files with 3496 additions and 1920 deletions

View File

@ -16,6 +16,10 @@ if (ENABLE_CLANG_TIDY)
set (USE_CLANG_TIDY ON)
# clang-tidy requires assertions to guide the analysis
# Note that NDEBUG is set implicitly by CMake for non-debug builds
set(COMPILER_FLAGS "${COMPILER_FLAGS} -UNDEBUG")
# The variable CMAKE_CXX_CLANG_TIDY will be set inside src and base directories with non third-party code.
# set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
elseif (FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION)

2
contrib/libpq vendored

@ -1 +1 @@
Subproject commit 1f9c286dba60809edb64e384d6727d80d269b6cf
Subproject commit c7624588ddd84f153dd5990e81b886e4568bddde

View File

@ -760,7 +760,7 @@ create view test_times_view as
total_client_time,
queries,
query_max,
real / queries avg_real_per_query,
real / if(queries > 0, queries, 1) avg_real_per_query,
query_min,
runs
from test_time
@ -781,7 +781,7 @@ create view test_times_view_total as
sum(total_client_time),
sum(queries),
max(query_max),
sum(real) / sum(queries) avg_real_per_query,
sum(real) / if(sum(queries) > 0, sum(queries), 1) avg_real_per_query,
min(query_min),
-- Totaling the number of runs doesn't make sense, but use the max so
-- that the reporting script doesn't complain about queries being too

View File

@ -263,8 +263,16 @@ for query_index in queries_to_run:
for conn_index, c in enumerate(all_connections):
try:
prewarm_id = f'{query_prefix}.prewarm0'
try:
# Will also detect too long queries during warmup stage
res = c.execute(q, query_id = prewarm_id, settings = {'max_execution_time': args.max_query_seconds})
except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier.
e.args = (prewarm_id, *e.args)
e.message = prewarm_id + ': ' + e.message
raise
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
except KeyboardInterrupt:
raise
@ -311,9 +319,8 @@ for query_index in queries_to_run:
for conn_index, c in enumerate(this_query_connections):
try:
res = c.execute(q, query_id = run_id,
settings = {'max_execution_time': args.max_query_seconds})
except Exception as e:
res = c.execute(q, query_id = run_id, settings = {'max_execution_time': args.max_query_seconds})
except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier.
e.args = (run_id, *e.args)
e.message = run_id + ': ' + e.message
@ -390,7 +397,7 @@ for query_index in queries_to_run:
try:
res = c.execute(q, query_id = run_id, settings = {'query_profiler_real_time_period_ns': 10000000})
print(f'profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
except Exception as e:
except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier.
e.args = (run_id, *e.args)
e.message = run_id + ': ' + e.message

View File

@ -16,6 +16,14 @@ while true; do
done
set -e
echo "Configure to use Yandex dockerhub-proxy"
cat > /etc/docker/daemon.json << EOF
{
"insecure-registries": ["dockerhub-proxy.sas.yp-c.yandex.net:5000"],
"registry-mirrors": ["dockerhub-proxy.sas.yp-c.yandex.net:5000"]
}
EOF
echo "Start tests"
export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/clickhouse
export CLICKHOUSE_TESTS_CLIENT_BIN_PATH=/clickhouse

View File

@ -8,6 +8,10 @@ toc_title: Support
!!! info "Info"
If you have launched a ClickHouse commercial support service, feel free to [open a pull-request](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/commercial/support.md) adding it to the following list.
## Yandex.Cloud
ClickHouse worldwide support from the authors of ClickHouse. Supports on-premise and cloud deployments. Ask details on clickhouse-support@yandex-team.com
## Altinity {#altinity}
Altinity has offered enterprise ClickHouse support and services since 2017. Altinity customers range from Fortune 100 enterprises to startups. Visit [www.altinity.com](https://www.altinity.com/) for more information.

View File

@ -6,7 +6,7 @@ toc_title: Atomic
# Atomic {#atomic}
It is supports non-blocking `DROP` and `RENAME TABLE` queries and atomic `EXCHANGE TABLES t1 AND t2` queries. Atomic database engine is used by default.
It supports non-blocking `DROP` and `RENAME TABLE` queries and atomic `EXCHANGE TABLES t1 AND t2` queries. `Atomic` database engine is used by default.
## Creating a Database {#creating-a-database}
@ -14,4 +14,4 @@ It is supports non-blocking `DROP` and `RENAME TABLE` queries and atomic `EXCHAN
CREATE DATABASE test ENGINE = Atomic;
```
[Original article](https://clickhouse.tech/docs/en/engines/database_engines/atomic/) <!--hide-->
[Original article](https://clickhouse.tech/docs/en/engines/database-engines/atomic/) <!--hide-->

View File

@ -7,15 +7,17 @@ toc_title: Recipes Dataset
RecipeNLG dataset is available for download [here](https://recipenlg.cs.put.poznan.pl/dataset). It contains 2.2 million recipes. The size is slightly less than 1 GB.
## Download and unpack the dataset
## Download and Unpack the Dataset
Accept Terms and Conditions and download it [here](https://recipenlg.cs.put.poznan.pl/dataset). Unpack the zip file with `unzip`. You will get the `full_dataset.csv` file.
1. Go to the download page [https://recipenlg.cs.put.poznan.pl/dataset](https://recipenlg.cs.put.poznan.pl/dataset).
1. Accept Terms and Conditions and download zip file.
1. Unpack the zip file with `unzip`. You will get the `full_dataset.csv` file.
## Create a table
## Create a Table
Run clickhouse-client and execute the following CREATE query:
```
``` sql
CREATE TABLE recipes
(
title String,
@ -27,11 +29,11 @@ CREATE TABLE recipes
) ENGINE = MergeTree ORDER BY title;
```
## Insert the data
## Insert the Data
Run the following command:
```
``` bash
clickhouse-client --query "
INSERT INTO recipes
SELECT
@ -49,32 +51,41 @@ clickhouse-client --query "
This is a showcase how to parse custom CSV, as it requires multiple tunes.
Explanation:
- the dataset is in CSV format, but it requires some preprocessing on insertion; we use table function [input](../../sql-reference/table-functions/input/) to perform preprocessing;
- the structure of CSV file is specified in the argument of the table function `input`;
- the field `num` (row number) is unneeded - we parse it from file and ignore;
- we use `FORMAT CSVWithNames` but the header in CSV will be ignored (by command line parameter `--input_format_with_names_use_header 0`), because the header does not contain the name for the first field;
- file is using only double quotes to enclose CSV strings; some strings are not enclosed in double quotes, and single quote must not be parsed as the string enclosing - that's why we also add the `--format_csv_allow_single_quote 0` parameter;
- some strings from CSV cannot parse, because they contain `\M/` sequence at the beginning of the value; the only value starting with backslash in CSV can be `\N` that is parsed as SQL NULL. We add `--input_format_allow_errors_num 10` parameter and up to ten malformed records can be skipped;
- there are arrays for ingredients, directions and NER fields; these arrays are represented in unusual form: they are serialized into string as JSON and then placed in CSV - we parse them as String and then use [JSONExtract](../../sql-reference/functions/json-functions/) function to transform it to Array.
- The dataset is in CSV format, but it requires some preprocessing on insertion; we use table function [input](../../sql-reference/table-functions/input.md) to perform preprocessing;
- The structure of CSV file is specified in the argument of the table function `input`;
- The field `num` (row number) is unneeded - we parse it from file and ignore;
- We use `FORMAT CSVWithNames` but the header in CSV will be ignored (by command line parameter `--input_format_with_names_use_header 0`), because the header does not contain the name for the first field;
- File is using only double quotes to enclose CSV strings; some strings are not enclosed in double quotes, and single quote must not be parsed as the string enclosing - that's why we also add the `--format_csv_allow_single_quote 0` parameter;
- Some strings from CSV cannot parse, because they contain `\M/` sequence at the beginning of the value; the only value starting with backslash in CSV can be `\N` that is parsed as SQL NULL. We add `--input_format_allow_errors_num 10` parameter and up to ten malformed records can be skipped;
- There are arrays for ingredients, directions and NER fields; these arrays are represented in unusual form: they are serialized into string as JSON and then placed in CSV - we parse them as String and then use [JSONExtract](../../sql-reference/functions/json-functions/) function to transform it to Array.
## Validate the inserted data
## Validate the Inserted Data
By checking the row count:
```
SELECT count() FROM recipes
Query:
``` sq;
SELECT count() FROM recipes;
```
Result:
``` text
┌─count()─┐
│ 2231141 │
└─────────┘
```
## Example Queries
## Example queries
### Top Components by the Number of Recipes:
### Top components by the number of recipes:
In this example we learn how to use [arrayJoin](../../sql-reference/functions/array-join/) function to expand an array into a set of rows.
```
Query:
``` sql
SELECT
arrayJoin(NER) AS k,
count() AS c
@ -82,7 +93,11 @@ FROM recipes
GROUP BY k
ORDER BY c DESC
LIMIT 50
```
Result:
``` text
┌─k────────────────────┬──────c─┐
│ salt │ 890741 │
│ sugar │ 620027 │
@ -139,11 +154,9 @@ LIMIT 50
50 rows in set. Elapsed: 0.112 sec. Processed 2.23 million rows, 361.57 MB (19.99 million rows/s., 3.24 GB/s.)
```
In this example we learn how to use [arrayJoin](../../sql-reference/functions/array-join/) function to multiply data by array elements.
### The Most Complex Recipes with Strawberry
### The most complex recipes with strawberry
```
``` sql
SELECT
title,
length(NER),
@ -152,7 +165,11 @@ FROM recipes
WHERE has(NER, 'strawberry')
ORDER BY length(directions) DESC
LIMIT 10
```
Result:
``` text
┌─title────────────────────────────────────────────────────────────┬─length(NER)─┬─length(directions)─┐
│ Chocolate-Strawberry-Orange Wedding Cake │ 24 │ 126 │
│ Strawberry Cream Cheese Crumble Tart │ 19 │ 47 │
@ -171,15 +188,19 @@ LIMIT 10
In this example, we involve [has](../../sql-reference/functions/array-functions/#hasarr-elem) function to filter by array elements and sort by the number of directions.
There is a wedding cake that requires the whole 126 steps to produce!
There is a wedding cake that requires the whole 126 steps to produce! Show that directions:
Show that directions:
Query:
```
``` sql
SELECT arrayJoin(directions)
FROM recipes
WHERE title = 'Chocolate-Strawberry-Orange Wedding Cake'
```
Result:
``` text
┌─arrayJoin(directions)───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ Position 1 rack in center and 1 rack in bottom third of oven and preheat to 350F. │
│ Butter one 5-inch-diameter cake pan with 2-inch-high sides, one 8-inch-diameter cake pan with 2-inch-high sides and one 12-inch-diameter cake pan with 2-inch-high sides. │
@ -312,6 +333,8 @@ WHERE title = 'Chocolate-Strawberry-Orange Wedding Cake'
126 rows in set. Elapsed: 0.011 sec. Processed 8.19 thousand rows, 5.34 MB (737.75 thousand rows/s., 480.59 MB/s.)
```
### Online playground
### Online Playground
The dataset is also available in the [Playground](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUCiAgICBhcnJheUpvaW4oTkVSKSBBUyBrLAogICAgY291bnQoKSBBUyBjCkZST00gcmVjaXBlcwpHUk9VUCBCWSBrCk9SREVSIEJZIGMgREVTQwpMSU1JVCA1MA==).
The dataset is also available in the [Online Playground](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUCiAgICBhcnJheUpvaW4oTkVSKSBBUyBrLAogICAgY291bnQoKSBBUyBjCkZST00gcmVjaXBlcwpHUk9VUCBCWSBrCk9SREVSIEJZIGMgREVTQwpMSU1JVCA1MA==).
[Original article](https://clickhouse.tech/docs/en/getting-started/example-datasets/recipes/) <!--hide-->

View File

@ -1097,14 +1097,25 @@ See the section “WITH TOTALS modifier”.
## max_parallel_replicas {#settings-max_parallel_replicas}
The maximum number of replicas for each shard when executing a query. In limited circumstances, this can make a query faster by executing it on more servers. This setting is only useful for replicated tables with a sampling key. There are cases where performance will not improve or even worsen:
The maximum number of replicas for each shard when executing a query.
- the position of the sampling key in the partitioning key's order doesn't allow efficient range scans
- adding a sampling key to the table makes filtering by other columns less efficient
- the sampling key is an expression that is expensive to calculate
- the cluster's latency distribution has a long tail, so that querying more servers increases the query's overall latency
Possible values:
In addition, this setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain conditions. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries) for more details.
- Positive integer.
Default value: `1`.
**Additional Info**
This setting is useful for replicated tables with a sampling key. A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases:
- The position of the sampling key in the partitioning key doesn't allow efficient range scans.
- Adding a sampling key to the table makes filtering by other columns less efficient.
- The sampling key is an expression that is expensive to calculate.
- The cluster latency distribution has a long tail, so that querying more servers increases the query overall latency.
!!! warning "Warning"
This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries) for more details.
## compile {#compile}

View File

@ -44,9 +44,15 @@ Columns:
- `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of rows in a result of the `SELECT` query, or a number of rows in the `INSERT` query.
- `result_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — RAM volume in bytes used to store a query result.
- `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Memory consumption by the query.
- `current_database` ([String](../../sql-reference/data-types/string.md)) — Name of the current database.
- `query` ([String](../../sql-reference/data-types/string.md)) — Query string.
- `exception` ([String](../../sql-reference/data-types/string.md)) — Exception message.
- `normalized_query_hash` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Identical hash value without the values of literals for similar queries.
- `query_kind` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Type of the query.
- `databases` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the databases present in the query.
- `tables` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the tables present in the query.
- `columns` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the columns present in the query.
- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code of an exception.
- `exception` ([String](../../sql-reference/data-types/string.md)) — Exception message.
- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [Stack trace](https://en.wikipedia.org/wiki/Stack_trace). An empty string, if the query was completed successfully.
- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Query type. Possible values:
- 1 — Query was initiated by the client.
@ -73,69 +79,98 @@ Columns:
- 0 — The query was launched from the TCP interface.
- 1 — `GET` method was used.
- 2 — `POST` method was used.
- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — The `UserAgent` header passed in the HTTP request.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The “quota key” specified in the [quotas](../../operations/quotas.md) setting (see `keyed`).
- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — HTTP header `UserAgent` passed in the HTTP query.
- `http_referer` ([String](../../sql-reference/data-types/string.md)) — HTTP header `Referer` passed in the HTTP query (contains an absolute or partial address of the page making the query).
- `forwarded_for` ([String](../../sql-reference/data-types/string.md)) — HTTP header `X-Forwarded-For` passed in the HTTP query.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The `quota key` specified in the [quotas](../../operations/quotas.md) setting (see `keyed`).
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision.
- `thread_numbers` ([Array(UInt32)](../../sql-reference/data-types/array.md)) — Number of threads that are participating in query execution.
- `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#settings-max_query_size). An empty string if it is not defined.
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution.
- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events)
- `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Values of metrics that are listed in the `ProfileEvents.Names` column.
- `Settings.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1.
- `Settings.Values` ([Array(String)](../../sql-reference/data-types/array.md)) — Values of settings that are listed in the `Settings.Names` column.
- `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions`, which were used during query execution.
- `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions combinators`, which were used during query execution.
- `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `database engines`, which were used during query execution.
- `used_data_type_families` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `data type families`, which were used during query execution.
- `used_dictionaries` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `dictionaries`, which were used during query execution.
- `used_formats` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `formats`, which were used during query execution.
- `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `functions`, which were used during query execution.
- `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution.
- `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `table functions`, which were used during query execution.
**Example**
``` sql
SELECT * FROM system.query_log LIMIT 1 \G
SELECT * FROM system.query_log WHERE type = 'QueryFinish' AND (query LIKE '%toDate(\'2000-12-05\')%') ORDER BY query_start_time DESC LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
type: QueryStart
event_date: 2020-09-11
event_time: 2020-09-11 10:08:17
event_time_microseconds: 2020-09-11 10:08:17.063321
query_start_time: 2020-09-11 10:08:17
query_start_time_microseconds: 2020-09-11 10:08:17.063321
query_duration_ms: 0
read_rows: 0
read_bytes: 0
type: QueryFinish
event_date: 2021-03-18
event_time: 2021-03-18 20:54:18
event_time_microseconds: 2021-03-18 20:54:18.676686
query_start_time: 2021-03-18 20:54:18
query_start_time_microseconds: 2021-03-18 20:54:18.673934
query_duration_ms: 2
read_rows: 100
read_bytes: 800
written_rows: 0
written_bytes: 0
result_rows: 0
result_bytes: 0
result_rows: 2
result_bytes: 4858
memory_usage: 0
current_database: default
query: INSERT INTO test1 VALUES
query: SELECT uniqArray([1, 1, 2]), SUBSTRING('Hello, world', 7, 5), flatten([[[BIT_AND(123)]], [[mod(3, 2)], [CAST('1' AS INTEGER)]]]), week(toDate('2000-12-05')), CAST(arrayJoin([NULL, NULL]) AS Nullable(TEXT)), avgOrDefaultIf(number, number % 2), sumOrNull(number), toTypeName(sumOrNull(number)), countIf(toDate('2000-12-05') + number as d, toDayOfYear(d) % 2) FROM numbers(100)
normalized_query_hash: 17858008518552525706
query_kind: Select
databases: ['_table_function']
tables: ['_table_function.numbers']
columns: ['_table_function.numbers.number']
exception_code: 0
exception:
stack_trace:
is_initial_query: 1
user: default
query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
query_id: 58f3d392-0fa0-4663-ae1d-29917a1a9c9c
address: ::ffff:127.0.0.1
port: 33452
port: 37486
initial_user: default
initial_query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
initial_query_id: 58f3d392-0fa0-4663-ae1d-29917a1a9c9c
initial_address: ::ffff:127.0.0.1
initial_port: 33452
initial_port: 37486
interface: 1
os_user: bharatnc
client_hostname: tower
os_user: sevirov
client_hostname: clickhouse.ru-central1.internal
client_name: ClickHouse
client_revision: 54437
client_version_major: 20
client_version_minor: 7
client_version_patch: 2
client_revision: 54447
client_version_major: 21
client_version_minor: 4
client_version_patch: 1
http_method: 0
http_user_agent:
http_referer:
forwarded_for:
quota_key:
revision: 54440
thread_ids: []
ProfileEvents.Names: []
ProfileEvents.Values: []
Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage','allow_introspection_functions']
Settings.Values: ['0','random','1','10000000000','1']
revision: 54449
log_comment:
thread_ids: [587,11939]
ProfileEvents.Names: ['Query','SelectQuery','ReadCompressedBytes','CompressedReadBufferBlocks','CompressedReadBufferBytes','IOBufferAllocs','IOBufferAllocBytes','ArenaAllocChunks','ArenaAllocBytes','FunctionExecute','TableFunctionExecute','NetworkSendElapsedMicroseconds','SelectedRows','SelectedBytes','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SystemTimeMicroseconds','SoftPageFaults','OSCPUVirtualTimeMicroseconds','OSWriteBytes']
ProfileEvents.Values: [1,1,36,1,10,2,1048680,1,4096,36,1,110,100,800,77,1,3137,1476,1101,8,2577,8192]
Settings.Names: ['load_balancing','max_memory_usage']
Settings.Values: ['random','10000000000']
used_aggregate_functions: ['groupBitAnd','avg','sum','count','uniq']
used_aggregate_function_combinators: ['OrDefault','If','OrNull','Array']
used_database_engines: []
used_data_type_families: ['String','Array','Int32','Nullable']
used_dictionaries: []
used_formats: []
used_functions: ['toWeek','CAST','arrayFlatten','toTypeName','toDayOfYear','addDays','array','toDate','modulo','substring','plus']
used_storages: []
used_table_functions: ['numbers']
```
**See Also**
@ -143,4 +178,3 @@ Settings.Values: ['0','random','1','10000000000','1']
- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread.
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/query_log) <!--hide-->

View File

@ -853,7 +853,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %C | year divided by 100 and truncated to integer (00-99) | 20 |
| %d | day of the month, zero-padded (01-31) | 02 |
| %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/18 |
| %e | day of the month, space-padded ( 1-31) | 2 |
| %e | day of the month, space-padded ( 1-31) | &nbsp; 2 |
| %F | short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2018-01-02 |
| %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 |
| %g | two-digit year format, aligned to ISO 8601, abbreviated from four-digit notation | 18 |

View File

@ -907,66 +907,64 @@ WHERE diff != 1
## runningDifferenceStartingWithFirstValue {#runningdifferencestartingwithfirstvalue}
Same as for [runningDifference](../../sql-reference/functions/other-functions.md#other_functions-runningdifference), the difference is the value of the first row, returned the value of the first row, and each subsequent row returns the difference from the previous row.
Same as for [runningDifference](./other-functions.md#other_functions-runningdifference), the difference is the value of the first row, returned the value of the first row, and each subsequent row returns the difference from the previous row.
## runningConcurrency {#runningconcurrency}
Given a series of beginning time and ending time of events, this function calculates concurrency of the events at each of the data point, that is, the beginning time.
Calculates the number of concurrent events.
Each event has a start time and an end time. The start time is included in the event, while the end time is excluded. Columns with a start time and an end time must be of the same data type.
The function calculates the total number of active (concurrent) events for each event start time.
!!! warning "Warning"
Events spanning multiple data blocks will not be processed correctly. The function resets its state for each new data block.
The result of the function depends on the order of data in the block. It assumes the beginning time is sorted in ascending order.
Events must be ordered by the start time in ascending order. If this requirement is violated the function raises an exception.
Every data block is processed separately. If events from different data blocks overlap then they can not be processed correctly.
**Syntax**
``` sql
runningConcurrency(begin, end)
runningConcurrency(start, end)
```
**Arguments**
- `begin` — A column for the beginning time of events (inclusive). [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `end` — A column for the ending time of events (exclusive). [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md).
Note that two columns `begin` and `end` must have the same type.
- `start` — A column with the start time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `end` — A column with the end time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Returned values**
- The concurrency of events at the data point.
- The number of concurrent events at each event start time.
Type: [UInt32](../../sql-reference/data-types/int-uint.md)
**Example**
Input table:
Consider the table:
``` text
┌───────────────begin─┬─────────────────end─┐
│ 2020-12-01 00:00:00 │ 2020-12-01 00:59:59 │
│ 2020-12-01 00:30:00 │ 2020-12-01 00:59:59 │
│ 2020-12-01 00:40:00 │ 2020-12-01 01:30:30 │
│ 2020-12-01 01:10:00 │ 2020-12-01 01:30:30 │
│ 2020-12-01 01:50:00 │ 2020-12-01 01:59:59 │
└─────────────────────┴─────────────────────┘
┌──────start─┬────────end─┐
│ 2021-03-03 │ 2021-03-11 │
│ 2021-03-06 │ 2021-03-12 │
│ 2021-03-07 │ 2021-03-08 │
│ 2021-03-11 │ 2021-03-12 │
└────────────┴────────────┘
```
Query:
``` sql
SELECT runningConcurrency(begin, end) FROM example
SELECT start, runningConcurrency(start, end) FROM example_table;
```
Result:
``` text
┌─runningConcurrency(begin, end)─┐
│ 1 │
│ 2 │
│ 3 │
│ 2 │
│ 1 │
└────────────────────────────────┘
┌──────start─┬─runningConcurrency(start, end)─┐
│ 2021-03-03 │ 1 │
│ 2021-03-06 │ 2 │
│ 2021-03-07 │ 3 │
│ 2021-03-11 │ 2 │
└────────────┴────────────────────────────────┘
```
## MACNumToString(num) {#macnumtostringnum}

View File

@ -47,6 +47,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name AS table_function()
Creates a table with the same result as that of the [table function](../../../sql-reference/table-functions/index.md#table-functions) specified. The created table will also work in the same way as the corresponding table function that was specified.
### From SELECT query {#from-select-query}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
```

View File

@ -0,0 +1,17 @@
---
toc_priority: 32
toc_title: Atomic
---
# Atomic {#atomic}
Поддерживает неблокирующие запросы `DROP` и `RENAME TABLE` и запросы `EXCHANGE TABLES t1 AND t2`. Движок `Atomic` используется по умолчанию.
## Создание БД {#creating-a-database}
```sql
CREATE DATABASE test ENGINE = Atomic;
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/database-engines/atomic/) <!--hide-->

View File

@ -8,7 +8,7 @@ toc_title: "Введение"
Движки баз данных обеспечивают работу с таблицами.
По умолчанию ClickHouse использует собственный движок баз данных, который поддерживает конфигурируемые [движки таблиц](../../engines/database-engines/index.md) и [диалект SQL](../../engines/database-engines/index.md).
По умолчанию ClickHouse использует движок [Atomic](../../engines/database-engines/atomic.md). Он поддерживает конфигурируемые [движки таблиц](../../engines/table-engines/index.md) и [диалект SQL](../../sql-reference/syntax.md).
Также можно использовать следующие движки баз данных:

View File

@ -0,0 +1,342 @@
---
toc_priority: 16
toc_title: Набор данных кулинарных рецептов
---
# Набор данных кулинарных рецептов
Набор данных кулинарных рецептов от RecipeNLG доступен для загрузки [здесь](https://recipenlg.cs.put.poznan.pl/dataset). Он содержит 2.2 миллиона рецептов, а его размер чуть меньше 1 ГБ.
## Загрузите и распакуйте набор данных
1. Перейдите на страницу загрузки [https://recipenlg.cs.put.poznan.pl/dataset](https://recipenlg.cs.put.poznan.pl/dataset).
1. Примите Правила и условия и скачайте zip-архив с набором данных.
1. Распакуйте zip-архив и вы получите файл `full_dataset.csv`.
## Создайте таблицу
Запустите клиент ClickHouse и выполните следующий запрос для создания таблицы `recipes`:
``` sql
CREATE TABLE recipes
(
title String,
ingredients Array(String),
directions Array(String),
link String,
source LowCardinality(String),
NER Array(String)
) ENGINE = MergeTree ORDER BY title;
```
## Добавьте данные в таблицу
Чтобы добавить данные из файла `full_dataset.csv` в таблицу `recipes`, выполните команду:
``` bash
clickhouse-client --query "
INSERT INTO recipes
SELECT
title,
JSONExtract(ingredients, 'Array(String)'),
JSONExtract(directions, 'Array(String)'),
link,
source,
JSONExtract(NER, 'Array(String)')
FROM input('num UInt32, title String, ingredients String, directions String, link String, source LowCardinality(String), NER String')
FORMAT CSVWithNames
" --input_format_with_names_use_header 0 --format_csv_allow_single_quote 0 --input_format_allow_errors_num 10 < full_dataset.csv
```
Это один из примеров анализа пользовательских CSV-файлов с применением специальных настроек.
Пояснение:
- набор данных представлен в формате CSV и требует некоторой предварительной обработки при вставке. Для предварительной обработки используется табличная функция [input](../../sql-reference/table-functions/input.md);
- структура CSV-файла задается в аргументе табличной функции `input`;
- поле `num` (номер строки) не нужно — оно считывается из файла, но игнорируется;
- при загрузке используется `FORMAT CSVWithNames`, но заголовок в CSV будет проигнорирован (параметром командной строки `--input_format_with_names_use_header 0`), поскольку заголовок не содержит имени первого поля;
- в файле CSV для разделения строк используются только двойные кавычки. Но некоторые строки не заключены в двойные кавычки, и чтобы одинарная кавычка не рассматривалась как заключающая, используется параметр `--format_csv_allow_single_quote 0`;
- некоторые строки из CSV не могут быть считаны корректно, поскольку они начинаются с символов`\M/`, тогда как в CSV начинаться с обратной косой черты могут только символы `\N`, которые распознаются как `NULL` в SQL. Поэтому используется параметр `--input_format_allow_errors_num 10`, разрешающий пропустить до десяти некорректных записей;
- массивы `ingredients`, `directions` и `NER` представлены в необычном виде: они сериализуются в строку формата JSON, а затем помещаются в CSV — тогда они могут считываться и обрабатываться как обычные строки (`String`). Чтобы преобразовать строку в массив, используется функция [JSONExtract](../../sql-reference/functions/json-functions.md).
## Проверьте добавленные данные
Чтобы проверить добавленные данные, подсчитайте количество строк в таблице:
Запрос:
``` sql
SELECT count() FROM recipes;
```
Результат:
``` text
┌─count()─┐
│ 2231141 │
└─────────┘
```
## Примеры запросов
### Самые упоминаемые ингридиенты в рецептах:
В этом примере вы узнаете, как развернуть массив в набор строк с помощью функции [arrayJoin](../../sql-reference/functions/array-join.md).
Запрос:
``` sql
SELECT
arrayJoin(NER) AS k,
count() AS c
FROM recipes
GROUP BY k
ORDER BY c DESC
LIMIT 50
```
Результат:
``` text
┌─k────────────────────┬──────c─┐
│ salt │ 890741 │
│ sugar │ 620027 │
│ butter │ 493823 │
│ flour │ 466110 │
│ eggs │ 401276 │
│ onion │ 372469 │
│ garlic │ 358364 │
│ milk │ 346769 │
│ water │ 326092 │
│ vanilla │ 270381 │
│ olive oil │ 197877 │
│ pepper │ 179305 │
│ brown sugar │ 174447 │
│ tomatoes │ 163933 │
│ egg │ 160507 │
│ baking powder │ 148277 │
│ lemon juice │ 146414 │
│ Salt │ 122557 │
│ cinnamon │ 117927 │
│ sour cream │ 116682 │
│ cream cheese │ 114423 │
│ margarine │ 112742 │
│ celery │ 112676 │
│ baking soda │ 110690 │
│ parsley │ 102151 │
│ chicken │ 101505 │
│ onions │ 98903 │
│ vegetable oil │ 91395 │
│ oil │ 85600 │
│ mayonnaise │ 84822 │
│ pecans │ 79741 │
│ nuts │ 78471 │
│ potatoes │ 75820 │
│ carrots │ 75458 │
│ pineapple │ 74345 │
│ soy sauce │ 70355 │
│ black pepper │ 69064 │
│ thyme │ 68429 │
│ mustard │ 65948 │
│ chicken broth │ 65112 │
│ bacon │ 64956 │
│ honey │ 64626 │
│ oregano │ 64077 │
│ ground beef │ 64068 │
│ unsalted butter │ 63848 │
│ mushrooms │ 61465 │
│ Worcestershire sauce │ 59328 │
│ cornstarch │ 58476 │
│ green pepper │ 58388 │
│ Cheddar cheese │ 58354 │
└──────────────────────┴────────┘
50 rows in set. Elapsed: 0.112 sec. Processed 2.23 million rows, 361.57 MB (19.99 million rows/s., 3.24 GB/s.)
```
### Самые сложные рецепты с клубникой
Запрос:
``` sql
SELECT
title,
length(NER),
length(directions)
FROM recipes
WHERE has(NER, 'strawberry')
ORDER BY length(directions) DESC
LIMIT 10;
```
Результат:
``` text
┌─title────────────────────────────────────────────────────────────┬─length(NER)─┬─length(directions)─┐
│ Chocolate-Strawberry-Orange Wedding Cake │ 24 │ 126 │
│ Strawberry Cream Cheese Crumble Tart │ 19 │ 47 │
│ Charlotte-Style Ice Cream │ 11 │ 45 │
│ Sinfully Good a Million Layers Chocolate Layer Cake, With Strawb │ 31 │ 45 │
│ Sweetened Berries With Elderflower Sherbet │ 24 │ 44 │
│ Chocolate-Strawberry Mousse Cake │ 15 │ 42 │
│ Rhubarb Charlotte with Strawberries and Rum │ 20 │ 42 │
│ Chef Joey's Strawberry Vanilla Tart │ 7 │ 37 │
│ Old-Fashioned Ice Cream Sundae Cake │ 17 │ 37 │
│ Watermelon Cake │ 16 │ 36 │
└──────────────────────────────────────────────────────────────────┴─────────────┴────────────────────┘
10 rows in set. Elapsed: 0.215 sec. Processed 2.23 million rows, 1.48 GB (10.35 million rows/s., 6.86 GB/s.)
```
В этом примере используется функция [has](../../sql-reference/functions/array-functions.md#hasarr-elem) для проверки вхождения элемента в массив, а также сортировка по количеству шагов (`length(directions)`).
Существует свадебный торт, который требует целых 126 шагов для производства! Рассмотрим эти шаги:
Запрос:
``` sql
SELECT arrayJoin(directions)
FROM recipes
WHERE title = 'Chocolate-Strawberry-Orange Wedding Cake';
```
Результат:
``` text
┌─arrayJoin(directions)───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ Position 1 rack in center and 1 rack in bottom third of oven and preheat to 350F. │
│ Butter one 5-inch-diameter cake pan with 2-inch-high sides, one 8-inch-diameter cake pan with 2-inch-high sides and one 12-inch-diameter cake pan with 2-inch-high sides. │
│ Dust pans with flour; line bottoms with parchment. │
│ Combine 1/3 cup orange juice and 2 ounces unsweetened chocolate in heavy small saucepan. │
│ Stir mixture over medium-low heat until chocolate melts. │
│ Remove from heat. │
│ Gradually mix in 1 2/3 cups orange juice. │
│ Sift 3 cups flour, 2/3 cup cocoa, 2 teaspoons baking soda, 1 teaspoon salt and 1/2 teaspoon baking powder into medium bowl. │
│ using electric mixer, beat 1 cup (2 sticks) butter and 3 cups sugar in large bowl until blended (mixture will look grainy). │
│ Add 4 eggs, 1 at a time, beating to blend after each. │
│ Beat in 1 tablespoon orange peel and 1 tablespoon vanilla extract. │
│ Add dry ingredients alternately with orange juice mixture in 3 additions each, beating well after each addition. │
│ Mix in 1 cup chocolate chips. │
│ Transfer 1 cup plus 2 tablespoons batter to prepared 5-inch pan, 3 cups batter to prepared 8-inch pan and remaining batter (about 6 cups) to 12-inch pan. │
│ Place 5-inch and 8-inch pans on center rack of oven. │
│ Place 12-inch pan on lower rack of oven. │
│ Bake cakes until tester inserted into center comes out clean, about 35 minutes. │
│ Transfer cakes in pans to racks and cool completely. │
│ Mark 4-inch diameter circle on one 6-inch-diameter cardboard cake round. │
│ Cut out marked circle. │
│ Mark 7-inch-diameter circle on one 8-inch-diameter cardboard cake round. │
│ Cut out marked circle. │
│ Mark 11-inch-diameter circle on one 12-inch-diameter cardboard cake round. │
│ Cut out marked circle. │
│ Cut around sides of 5-inch-cake to loosen. │
│ Place 4-inch cardboard over pan. │
│ Hold cardboard and pan together; turn cake out onto cardboard. │
│ Peel off parchment.Wrap cakes on its cardboard in foil. │
│ Repeat turning out, peeling off parchment and wrapping cakes in foil, using 7-inch cardboard for 8-inch cake and 11-inch cardboard for 12-inch cake. │
│ Using remaining ingredients, make 1 more batch of cake batter and bake 3 more cake layers as described above. │
│ Cool cakes in pans. │
│ Cover cakes in pans tightly with foil. │
│ (Can be prepared ahead. │
│ Let stand at room temperature up to 1 day or double-wrap all cake layers and freeze up to 1 week. │
│ Bring cake layers to room temperature before using.) │
│ Place first 12-inch cake on its cardboard on work surface. │
│ Spread 2 3/4 cups ganache over top of cake and all the way to edge. │
│ Spread 2/3 cup jam over ganache, leaving 1/2-inch chocolate border at edge. │
│ Drop 1 3/4 cups white chocolate frosting by spoonfuls over jam. │
│ Gently spread frosting over jam, leaving 1/2-inch chocolate border at edge. │
│ Rub some cocoa powder over second 12-inch cardboard. │
│ Cut around sides of second 12-inch cake to loosen. │
│ Place cardboard, cocoa side down, over pan. │
│ Turn cake out onto cardboard. │
│ Peel off parchment. │
│ Carefully slide cake off cardboard and onto filling on first 12-inch cake. │
│ Refrigerate. │
│ Place first 8-inch cake on its cardboard on work surface. │
│ Spread 1 cup ganache over top all the way to edge. │
│ Spread 1/4 cup jam over, leaving 1/2-inch chocolate border at edge. │
│ Drop 1 cup white chocolate frosting by spoonfuls over jam. │
│ Gently spread frosting over jam, leaving 1/2-inch chocolate border at edge. │
│ Rub some cocoa over second 8-inch cardboard. │
│ Cut around sides of second 8-inch cake to loosen. │
│ Place cardboard, cocoa side down, over pan. │
│ Turn cake out onto cardboard. │
│ Peel off parchment. │
│ Slide cake off cardboard and onto filling on first 8-inch cake. │
│ Refrigerate. │
│ Place first 5-inch cake on its cardboard on work surface. │
│ Spread 1/2 cup ganache over top of cake and all the way to edge. │
│ Spread 2 tablespoons jam over, leaving 1/2-inch chocolate border at edge. │
│ Drop 1/3 cup white chocolate frosting by spoonfuls over jam. │
│ Gently spread frosting over jam, leaving 1/2-inch chocolate border at edge. │
│ Rub cocoa over second 6-inch cardboard. │
│ Cut around sides of second 5-inch cake to loosen. │
│ Place cardboard, cocoa side down, over pan. │
│ Turn cake out onto cardboard. │
│ Peel off parchment. │
│ Slide cake off cardboard and onto filling on first 5-inch cake. │
│ Chill all cakes 1 hour to set filling. │
│ Place 12-inch tiered cake on its cardboard on revolving cake stand. │
│ Spread 2 2/3 cups frosting over top and sides of cake as a first coat. │
│ Refrigerate cake. │
│ Place 8-inch tiered cake on its cardboard on cake stand. │
│ Spread 1 1/4 cups frosting over top and sides of cake as a first coat. │
│ Refrigerate cake. │
│ Place 5-inch tiered cake on its cardboard on cake stand. │
│ Spread 3/4 cup frosting over top and sides of cake as a first coat. │
│ Refrigerate all cakes until first coats of frosting set, about 1 hour. │
│ (Cakes can be made to this point up to 1 day ahead; cover and keep refrigerate.) │
│ Prepare second batch of frosting, using remaining frosting ingredients and following directions for first batch. │
│ Spoon 2 cups frosting into pastry bag fitted with small star tip. │
│ Place 12-inch cake on its cardboard on large flat platter. │
│ Place platter on cake stand. │
│ Using icing spatula, spread 2 1/2 cups frosting over top and sides of cake; smooth top. │
│ Using filled pastry bag, pipe decorative border around top edge of cake. │
│ Refrigerate cake on platter. │
│ Place 8-inch cake on its cardboard on cake stand. │
│ Using icing spatula, spread 1 1/2 cups frosting over top and sides of cake; smooth top. │
│ Using pastry bag, pipe decorative border around top edge of cake. │
│ Refrigerate cake on its cardboard. │
│ Place 5-inch cake on its cardboard on cake stand. │
│ Using icing spatula, spread 3/4 cup frosting over top and sides of cake; smooth top. │
│ Using pastry bag, pipe decorative border around top edge of cake, spooning more frosting into bag if necessary. │
│ Refrigerate cake on its cardboard. │
│ Keep all cakes refrigerated until frosting sets, about 2 hours. │
│ (Can be prepared 2 days ahead. │
│ Cover loosely; keep refrigerated.) │
│ Place 12-inch cake on platter on work surface. │
│ Press 1 wooden dowel straight down into and completely through center of cake. │
│ Mark dowel 1/4 inch above top of frosting. │
│ Remove dowel and cut with serrated knife at marked point. │
│ Cut 4 more dowels to same length. │
│ Press 1 cut dowel back into center of cake. │
│ Press remaining 4 cut dowels into cake, positioning 3 1/2 inches inward from cake edges and spacing evenly. │
│ Place 8-inch cake on its cardboard on work surface. │
│ Press 1 dowel straight down into and completely through center of cake. │
│ Mark dowel 1/4 inch above top of frosting. │
│ Remove dowel and cut with serrated knife at marked point. │
│ Cut 3 more dowels to same length. │
│ Press 1 cut dowel back into center of cake. │
│ Press remaining 3 cut dowels into cake, positioning 2 1/2 inches inward from edges and spacing evenly. │
│ Using large metal spatula as aid, place 8-inch cake on its cardboard atop dowels in 12-inch cake, centering carefully. │
│ Gently place 5-inch cake on its cardboard atop dowels in 8-inch cake, centering carefully. │
│ Using citrus stripper, cut long strips of orange peel from oranges. │
│ Cut strips into long segments. │
│ To make orange peel coils, wrap peel segment around handle of wooden spoon; gently slide peel off handle so that peel keeps coiled shape. │
│ Garnish cake with orange peel coils, ivy or mint sprigs, and some berries. │
│ (Assembled cake can be made up to 8 hours ahead. │
│ Let stand at cool room temperature.) │
│ Remove top and middle cake tiers. │
│ Remove dowels from cakes. │
│ Cut top and middle cakes into slices. │
│ To cut 12-inch cake: Starting 3 inches inward from edge and inserting knife straight down, cut through from top to bottom to make 6-inch-diameter circle in center of cake. │
│ Cut outer portion of cake into slices; cut inner portion into slices and serve with strawberries. │
└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
126 rows in set. Elapsed: 0.011 sec. Processed 8.19 thousand rows, 5.34 MB (737.75 thousand rows/s., 480.59 MB/s.)
```
### Online Playground
Этот набор данных доступен в [Online Playground](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUCiAgICBhcnJheUpvaW4oTkVSKSBBUyBrLAogICAgY291bnQoKSBBUyBjCkZST00gcmVjaXBlcwpHUk9VUCBCWSBrCk9SREVSIEJZIGMgREVTQwpMSU1JVCA1MA==).
[Оригинальная статья](https://clickhouse.tech/docs/ru/getting-started/example-datasets/recipes/) <!--hide-->

View File

@ -103,7 +103,11 @@ toc_title: "Визуальные интерфейсы от сторонних р
[xeus-clickhouse](https://github.com/wangfenjin/xeus-clickhouse) — это ядро Jupyter для ClickHouse, которое поддерживает запрос ClickHouse-данных с использованием SQL в Jupyter.
## Коммерческие {#kommercheskie}
### MindsDB Studio {#mindsdb}
[MindsDB](https://mindsdb.com/) — это продукт с открытым исходным кодом, реализующий слой искусственного интеллекта (Artificial Intelligence, AI) для различных СУБД, в том числе для ClickHouse. MindsDB облегчает процессы создания, обучения и развертывания современных моделей машинного обучения. Графический пользовательский интерфейс MindsDB Studio позволяет обучать новые модели на основе данных в БД, интерпретировать сделанные моделями прогнозы, выявлять потенциальные ошибки в данных, визуализировать и оценивать достоверность моделей с помощью функции Explainable AI, так чтобы вы могли быстрее адаптировать и настраивать ваши модели машинного обучения.
## Коммерческие {#commercial}
### DataGrip {#datagrip}

View File

@ -69,6 +69,9 @@ toc_title: "Библиотеки для интеграции от сторонн
- Гео
- [MaxMind](https://dev.maxmind.com/geoip/)
- [clickhouse-maxmind-geoip](https://github.com/AlexeyKupershtokh/clickhouse-maxmind-geoip)
- AutoML
- [MindsDB](https://mindsdb.com/)
- [MindsDB](https://github.com/mindsdb/mindsdb) - Слой предиктивной аналитики и искусственного интеллекта для СУБД ClickHouse.
## Экосистемы вокруг языков программирования {#ekosistemy-vokrug-iazykov-programmirovaniia}

View File

@ -1086,14 +1086,24 @@ load_balancing = round_robin
## max_parallel_replicas {#settings-max_parallel_replicas}
Максимальное кол-во реплик для каждого шарда во время исполениня запроса из distributed. В некоторых случаях, это может привести к более быстрому исполнению запроса за счет выполнения на большем кол-ве серверов. Эта настройка полезна только для реплицируемых таблиц созданных с использованием SAMPLING KEY выражения. Есть случаи когда производительность не улучшится или даже ухудшится:
Максимальное количество используемых реплик каждого шарда при выполнении запроса.
- Позиция ключа семплирования в ключе партицирования не позволяет делать эффективные сканирования по диапозонам
- Добавление семплирующего ключа к таблице, делает фильтрацию других колонок менее эффективной
- Выражение используемое для вычисления ключа семплирования требует больших вычислительных затрат
- Распределение сетевых задержек внутри кластера имеет длинный хвост, так что запрос большего количества серверов может увеличить общую задержку запроса
Возможные значения:
- Целое положительное число.
**Дополнительная информация**
Эта настройка полезна для реплицируемых таблиц с ключом сэмплирования. Запрос может обрабатываться быстрее, если он выполняется на нескольких серверах параллельно. Однако производительность обработки запроса, наоборот, может упасть в следующих ситуациях:
- Позиция ключа сэмплирования в ключе партиционирования не позволяет выполнять эффективное сканирование.
- Добавление ключа сэмплирования в таблицу делает фильтрацию по другим столбцам менее эффективной.
- Ключ сэмплирования является выражением, которое сложно вычисляется.
- У распределения сетевых задержек в кластере длинный «хвост», из-за чего при параллельных запросах к нескольким серверам увеличивается среднее время задержки.
!!! warning "Предупреждение"
Параллельное выполнение запроса может привести к неверному результату, если в запросе есть объединение или подзапросы и при этом таблицы не удовлетворяют определенным требованиям. Подробности смотрите в разделе [Распределенные подзапросы и max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries).
Кроме того, эта настройка может привести к некорректным результатам когда используются join или подзапросы и все таблицы не соответсвуют определенным условиям. Подробнее [Распределенные подзапросы и max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries) for more details.
## compile {#compile}

View File

@ -27,7 +27,7 @@ toc_title: "Системные таблицы"
- `database` — база данных, к которой принадлежит системная таблица. Эта опция на текущий момент устарела. Все системные таблицы находятся в базе данных `system`.
- `table` — таблица для добавления данных.
- `partition_by` — [ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md).
- `ttl` — [время жизни](../../sql-reference/statements/alter/ttl.md) таблицы.
- `ttl` — [время жизни](../../sql-reference/statements/alter/ttl.md) записей в таблице.
- `flush_interval_milliseconds` — интервал сброса данных на диск, в миллисекундах.
- `engine` — полное имя движка (начиная с `ENGINE =` ) с параметрами. Эта опция противоречит `partition_by` и `ttl`. Если указать оба параметра вместе, сервер вернет ошибку и завершит работу.

View File

@ -44,9 +44,15 @@ ClickHouse не удаляет данные из таблица автомати
- `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество строк в результате запроса `SELECT` или количество строк в запросе `INSERT`.
- `result_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — объём RAM в байтах, использованный для хранения результата запроса.
- `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — потребление RAM запросом.
- `current_database` ([String](../../sql-reference/data-types/string.md)) — имя текущей базы данных.
- `query` ([String](../../sql-reference/data-types/string.md)) — текст запроса.
- `exception` ([String](../../sql-reference/data-types/string.md)) — сообщение исключения, если запрос завершился по исключению.
- `normalized_query_hash` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — идентичная хэш-сумма без значений литералов для аналогичных запросов.
- `query_kind` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — тип запроса.
- `databases` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена баз данных, присутствующих в запросе.
- `tables` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена таблиц, присутствующих в запросе.
- `columns` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена столбцов, присутствующих в запросе.
- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — код исключения.
- `exception` ([String](../../sql-reference/data-types/string.md)) — сообщение исключения, если запрос завершился по исключению.
- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [stack trace](https://en.wikipedia.org/wiki/Stack_trace). Пустая строка, если запрос успешно завершен.
- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md)) — вид запроса. Возможные значения:
- 1 — запрос был инициирован клиентом.
@ -74,72 +80,101 @@ ClickHouse не удаляет данные из таблица автомати
- 1 — `GET`.
- 2 — `POST`.
- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — HTTP заголовок `UserAgent`.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — «ключ квоты» из настроек [квот](quotas.md) (см. `keyed`).
- `http_referer` ([String](../../sql-reference/data-types/string.md)) — HTTP заголовок `Referer` (содержит полный или частичный адрес страницы, с которой был выполнен запрос).
- `forwarded_for` ([String](../../sql-reference/data-types/string.md)) — HTTP заголовок `X-Forwarded-For`.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — `ключ квоты` из настроек [квот](quotas.md) (см. `keyed`).
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ревизия ClickHouse.
- `thread_numbers` ([Array(UInt32)](../../sql-reference/data-types/array.md)) — количество потоков, участвующих в обработке запросов.
- `log_comment` ([String](../../sql-reference/data-types/string.md)) — комментарий к записи в логе. Представляет собой произвольную строку, длина которой должна быть не больше, чем [max_query_size](../../operations/settings/settings.md#settings-max_query_size). Если нет комментария, то пустая строка.
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — идентификаторы потоков, участвующих в обработке запросов.
- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — счетчики для изменения различных метрик. Описание метрик можно получить из таблицы [system.events](#system_tables-events)(#system_tables-events
- `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — метрики, перечисленные в столбце `ProfileEvents.Names`.
- `Settings.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — имена настроек, которые меняются, когда клиент выполняет запрос. Чтобы разрешить логирование изменений настроек, установите параметр `log_query_settings` равным 1.
- `Settings.Values` ([Array(String)](../../sql-reference/data-types/array.md)) — значения настроек, которые перечислены в столбце `Settings.Names`.
- `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `агрегатных функций`, использованных при выполнении запроса.
- `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `комбинаторов агрегатных функций`, использованных при выполнении запроса.
- `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `движков баз данных`, использованных при выполнении запроса.
- `used_data_type_families` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `семейств типов данных`, использованных при выполнении запроса.
- `used_dictionaries` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `источников словарей`, использованных при выполнении запроса.
- `used_formats` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `форматов`, использованных при выполнении запроса.
- `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `функций`, использованных при выполнении запроса.
- `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `движков таблиц`, использованных при выполнении запроса.
- `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `табличных функций`, использованных при выполнении запроса.
**Пример**
``` sql
SELECT * FROM system.query_log LIMIT 1 \G
SELECT * FROM system.query_log WHERE type = 'QueryFinish' AND (query LIKE '%toDate(\'2000-12-05\')%') ORDER BY query_start_time DESC LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
type: QueryStart
event_date: 2020-09-11
event_time: 2020-09-11 10:08:17
event_time_microseconds: 2020-09-11 10:08:17.063321
query_start_time: 2020-09-11 10:08:17
query_start_time_microseconds: 2020-09-11 10:08:17.063321
query_duration_ms: 0
read_rows: 0
read_bytes: 0
type: QueryFinish
event_date: 2021-03-18
event_time: 2021-03-18 20:54:18
event_time_microseconds: 2021-03-18 20:54:18.676686
query_start_time: 2021-03-18 20:54:18
query_start_time_microseconds: 2021-03-18 20:54:18.673934
query_duration_ms: 2
read_rows: 100
read_bytes: 800
written_rows: 0
written_bytes: 0
result_rows: 0
result_bytes: 0
result_rows: 2
result_bytes: 4858
memory_usage: 0
current_database: default
query: INSERT INTO test1 VALUES
query: SELECT uniqArray([1, 1, 2]), SUBSTRING('Hello, world', 7, 5), flatten([[[BIT_AND(123)]], [[mod(3, 2)], [CAST('1' AS INTEGER)]]]), week(toDate('2000-12-05')), CAST(arrayJoin([NULL, NULL]) AS Nullable(TEXT)), avgOrDefaultIf(number, number % 2), sumOrNull(number), toTypeName(sumOrNull(number)), countIf(toDate('2000-12-05') + number as d, toDayOfYear(d) % 2) FROM numbers(100)
normalized_query_hash: 17858008518552525706
query_kind: Select
databases: ['_table_function']
tables: ['_table_function.numbers']
columns: ['_table_function.numbers.number']
exception_code: 0
exception:
stack_trace:
is_initial_query: 1
user: default
query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
query_id: 58f3d392-0fa0-4663-ae1d-29917a1a9c9c
address: ::ffff:127.0.0.1
port: 33452
port: 37486
initial_user: default
initial_query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
initial_query_id: 58f3d392-0fa0-4663-ae1d-29917a1a9c9c
initial_address: ::ffff:127.0.0.1
initial_port: 33452
initial_port: 37486
interface: 1
os_user: bharatnc
client_hostname: tower
os_user: sevirov
client_hostname: clickhouse.ru-central1.internal
client_name: ClickHouse
client_revision: 54437
client_version_major: 20
client_version_minor: 7
client_version_patch: 2
client_revision: 54447
client_version_major: 21
client_version_minor: 4
client_version_patch: 1
http_method: 0
http_user_agent:
http_referer:
forwarded_for:
quota_key:
revision: 54440
thread_ids: []
ProfileEvents.Names: []
ProfileEvents.Values: []
Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage','allow_introspection_functions']
Settings.Values: ['0','random','1','10000000000','1']
revision: 54449
log_comment:
thread_ids: [587,11939]
ProfileEvents.Names: ['Query','SelectQuery','ReadCompressedBytes','CompressedReadBufferBlocks','CompressedReadBufferBytes','IOBufferAllocs','IOBufferAllocBytes','ArenaAllocChunks','ArenaAllocBytes','FunctionExecute','TableFunctionExecute','NetworkSendElapsedMicroseconds','SelectedRows','SelectedBytes','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SystemTimeMicroseconds','SoftPageFaults','OSCPUVirtualTimeMicroseconds','OSWriteBytes']
ProfileEvents.Values: [1,1,36,1,10,2,1048680,1,4096,36,1,110,100,800,77,1,3137,1476,1101,8,2577,8192]
Settings.Names: ['load_balancing','max_memory_usage']
Settings.Values: ['random','10000000000']
used_aggregate_functions: ['groupBitAnd','avg','sum','count','uniq']
used_aggregate_function_combinators: ['OrDefault','If','OrNull','Array']
used_database_engines: []
used_data_type_families: ['String','Array','Int32','Nullable']
used_dictionaries: []
used_formats: []
used_functions: ['toWeek','CAST','arrayFlatten','toTypeName','toDayOfYear','addDays','array','toDate','modulo','substring','plus']
used_storages: []
used_table_functions: ['numbers']
```
**Смотрите также**
- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — в этой таблице содержится информация о цепочке каждого выполненного запроса.
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/query_log) <!--hide-->

View File

@ -866,7 +866,7 @@ formatDateTime(Time, Format\[, Timezone\])
| %C | номер года, поделённый на 100 (00-99) | 20 |
| %d | день месяца, с ведущим нулём (01-31) | 02 |
| %D | короткая запись %m/%d/%y | 01/02/18 |
| %e | день месяца, с ведущим пробелом ( 1-31) | 2 |
| %e | день месяца, с ведущим пробелом ( 1-31) | &nbsp; 2 |
| %F | короткая запись %Y-%m-%d | 2018-01-02 |
| %G | четырехзначный формат вывода ISO-года, который основывается на особом подсчете номера недели согласно [стандарту ISO 8601](https://ru.wikipedia.org/wiki/ISO_8601), обычно используется вместе с %V | 2018 |
| %g | двузначный формат вывода года по стандарту ISO 8601 | 18 |
@ -877,6 +877,7 @@ formatDateTime(Time, Format\[, Timezone\])
| %M | минуты, с ведущим нулём (00-59) | 33 |
| %n | символ переноса строки () | |
| %p | обозначения AM или PM | PM |
| %Q | квартал (1-4) | 1 |
| %R | короткая запись %H:%M | 22:33 |
| %S | секунды, с ведущими нулями (00-59) | 44 |
| %t | символ табуляции () | |

View File

@ -772,7 +772,7 @@ FROM numbers(16)
└────────────┴───────┴───────────┴────────────────┘
```
## runningDifference(x) {#runningdifferencex}
## runningDifference(x) {#other_functions-runningdifference}
Считает разницу между последовательными значениями строк в блоке данных.
Возвращает 0 для первой строки и разницу с предыдущей строкой для каждой последующей строки.
@ -849,7 +849,64 @@ WHERE diff != 1
## runningDifferenceStartingWithFirstValue {#runningdifferencestartingwithfirstvalue}
То же, что и \[runningDifference\] (./other_functions.md # other_functions-runningdifference), но в первой строке возвращается значение первой строки, а не ноль.
То же, что и [runningDifference](./other-functions.md#other_functions-runningdifference), но в первой строке возвращается значение первой строки, а не ноль.
## runningConcurrency {#runningconcurrency}
Подсчитывает количество одновременно идущих событий.
У каждого события есть время начала и время окончания. Считается, что время начала включено в событие, а время окончания исключено из него. Столбцы со временем начала и окончания событий должны иметь одинаковый тип данных.
Функция подсчитывает количество событий, происходящих одновременно на момент начала каждого из событий в выборке.
!!! warning "Предупреждение"
События должны быть отсортированы по возрастанию времени начала. Если это требование нарушено, то функция вызывает исключение.
Каждый блок данных обрабатывается независимо. Если события из разных блоков данных накладываются по времени, они не могут быть корректно обработаны.
**Синтаксис**
``` sql
runningConcurrency(start, end)
```
**Аргументы**
- `start` — Столбец с временем начала событий. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
- `end` — Столбец с временем окончания событий. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
**Возвращаемое значение**
- Количество одновременно идущих событий на момент начала каждого события.
Тип: [UInt32](../../sql-reference/data-types/int-uint.md)
**Пример**
Рассмотрим таблицу:
``` text
┌──────start─┬────────end─┐
│ 2021-03-03 │ 2021-03-11 │
│ 2021-03-06 │ 2021-03-12 │
│ 2021-03-07 │ 2021-03-08 │
│ 2021-03-11 │ 2021-03-12 │
└────────────┴────────────┘
```
Запрос:
``` sql
SELECT start, runningConcurrency(start, end) FROM example_table;
```
Результат:
``` text
┌──────start─┬─runningConcurrency(start, end)─┐
│ 2021-03-03 │ 1 │
│ 2021-03-06 │ 2 │
│ 2021-03-07 │ 3 │
│ 2021-03-11 │ 2 │
└────────────┴────────────────────────────────┘
```
## MACNumToString(num) {#macnumtostringnum}

View File

@ -5,7 +5,11 @@ toc_title: "Таблица"
# CREATE TABLE {#create-table-query}
Запрос `CREATE TABLE` может иметь несколько форм.
Запрос `CREATE TABLE` может иметь несколько форм, которые используются в зависимости от контекста и решаемых задач.
По умолчанию таблицы создаются на текущем сервере. Распределенные DDL запросы создаются с помощью секции `ON CLUSTER`, которая [описана отдельно](../../../sql-reference/distributed-ddl.md).
## Варианты синтаксиса {#syntax-forms}
### С описанием структуры {#with-explicit-schema}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
@ -23,17 +27,23 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Также могут быть указаны выражения для значений по умолчанию - смотрите ниже.
При необходимости можно указать [первичный ключ](#primary-key) с одним или несколькими ключевыми выражениями.
### Со структурой, аналогичной другой таблице {#with-a-schema-similar-to-other-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name AS [db2.]name2 [ENGINE = engine]
```
Создаёт таблицу с такой же структурой, как другая таблица. Можно указать другой движок для таблицы. Если движок не указан, то будет выбран такой же движок, как у таблицы `db2.name2`.
### Из табличной функции {#from-a-table-function}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name AS table_function()
```
Создаёт таблицу с такой же структурой и данными, как результат соответствующей табличной функции. Созданная таблица будет работать так же, как и указанная табличная функция.
Создаёт таблицу с такой же структурой и данными, как результат соответствующей табличной функцией.
### Из запроса SELECT {#from-select-query}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
@ -53,7 +63,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
Смотрите также настройку [data_type_default_nullable](../../../operations/settings/settings.md#data_type_default_nullable).
### Значения по умолчанию {#create-default-values}
## Значения по умолчанию {#create-default-values}
В описании столбца, может быть указано выражение для значения по умолчанию, одного из следующих видов:
`DEFAULT expr`, `MATERIALIZED expr`, `ALIAS expr`.
@ -67,16 +77,22 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
В качестве выражения для умолчания, может быть указано произвольное выражение от констант и столбцов таблицы. При создании и изменении структуры таблицы, проверяется, что выражения не содержат циклов. При INSERT-е проверяется разрешимость выражений - что все столбцы, из которых их можно вычислить, переданы.
### DEFAULT {#default}
`DEFAULT expr`
Обычное значение по умолчанию. Если в запросе INSERT не указан соответствующий столбец, то он будет заполнен путём вычисления соответствующего выражения.
### MATERIALIZED {#materialized}
`MATERIALIZED expr`
Материализованное выражение. Такой столбец не может быть указан при INSERT, то есть, он всегда вычисляется.
При INSERT без указания списка столбцов, такие столбцы не рассматриваются.
Также этот столбец не подставляется при использовании звёздочки в запросе SELECT. Это необходимо, чтобы сохранить инвариант, что дамп, полученный путём `SELECT *`, можно вставить обратно в таблицу INSERT-ом без указания списка столбцов.
### ALIAS {#alias}
`ALIAS expr`
Синоним. Такой столбец вообще не хранится в таблице.
@ -118,7 +134,7 @@ PRIMARY KEY(expr1[, expr2,...]);
!!! warning "Предупреждение"
Вы не можете сочетать оба способа в одном запросе.
### Ограничения (constraints) {#constraints}
## Ограничения {#constraints}
Наряду с объявлением столбцов можно объявить ограничения на значения в столбцах таблицы:
@ -136,11 +152,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Добавление большого числа ограничений может негативно повлиять на производительность `INSERT` запросов.
### Выражение для TTL {#vyrazhenie-dlia-ttl}
## Выражение для TTL {#vyrazhenie-dlia-ttl}
Определяет время хранения значений. Может быть указано только для таблиц семейства MergeTree. Подробнее смотрите в [TTL для столбцов и таблиц](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
### Кодеки сжатия столбцов {#codecs}
## Кодеки сжатия столбцов {#codecs}
По умолчанию, ClickHouse применяет к столбцу метод сжатия, определённый в [конфигурации сервера](../../../operations/server-configuration-parameters/settings.md). Кроме этого, можно задать метод сжатия для каждого отдельного столбца в запросе `CREATE TABLE`.
@ -182,7 +198,18 @@ ALTER TABLE codec_example MODIFY COLUMN float_value CODEC(Default);
ClickHouse поддерживает кодеки общего назначения и специализированные кодеки.
#### Специализированные кодеки {#create-query-specialized-codecs}
### Кодеки общего назначения {#create-query-common-purpose-codecs}
Кодеки:
- `NONE` — без сжатия.
- `LZ4` — [алгоритм сжатия без потерь](https://github.com/lz4/lz4) используемый по умолчанию. Применяет быстрое сжатие LZ4.
- `LZ4HC[(level)]` — алгоритм LZ4 HC (high compression) с настраиваемым уровнем сжатия. Уровень по умолчанию — 9. Настройка `level <= 0` устанавливает уровень сжания по умолчанию. Возможные уровни сжатия: \[1, 12\]. Рекомендуемый диапазон уровней: \[4, 9\].
- `ZSTD[(level)]` — [алгоритм сжатия ZSTD](https://en.wikipedia.org/wiki/Zstandard) с настраиваемым уровнем сжатия `level`. Возможные уровни сжатия: \[1, 22\]. Уровень сжатия по умолчанию: 1.
Высокие уровни сжатия полезны для ассимметричных сценариев, подобных «один раз сжал, много раз распаковал». Они подразумевают лучшее сжатие, но большее использование CPU.
### Специализированные кодеки {#create-query-specialized-codecs}
Эти кодеки разработаны для того, чтобы, используя особенности данных сделать сжатие более эффективным. Некоторые из этих кодеков не сжимают данные самостоятельно. Они готовят данные для кодеков общего назначения, которые сжимают подготовленные данные эффективнее, чем неподготовленные.
@ -203,18 +230,6 @@ CREATE TABLE codec_example
)
ENGINE = MergeTree()
```
#### Кодеки общего назначения {#create-query-common-purpose-codecs}
Кодеки:
- `NONE` — без сжатия.
- `LZ4` — [алгоритм сжатия без потерь](https://github.com/lz4/lz4) используемый по умолчанию. Применяет быстрое сжатие LZ4.
- `LZ4HC[(level)]` — алгоритм LZ4 HC (high compression) с настраиваемым уровнем сжатия. Уровень по умолчанию — 9. Настройка `level <= 0` устанавливает уровень сжания по умолчанию. Возможные уровни сжатия: \[1, 12\]. Рекомендуемый диапазон уровней: \[4, 9\].
- `ZSTD[(level)]` — [алгоритм сжатия ZSTD](https://en.wikipedia.org/wiki/Zstandard) с настраиваемым уровнем сжатия `level`. Возможные уровни сжатия: \[1, 22\]. Уровень сжатия по умолчанию: 1.
Высокие уровни сжатия полезны для ассимметричных сценариев, подобных «один раз сжал, много раз распаковал». Высокие уровни сжатия подразумеваю лучшее сжатие, но большее использование CPU.
## Временные таблицы {#vremennye-tablitsy}
ClickHouse поддерживает временные таблицы со следующими характеристиками:
@ -241,6 +256,77 @@ CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name
Вместо временных можно использовать обычные таблицы с [ENGINE = Memory](../../../engines/table-engines/special/memory.md).
## REPLACE TABLE {#replace-table-query}
Запрос `REPLACE` позволяет частично изменить таблицу (структуру или данные).
!!!note "Замечание"
Такие запросы поддерживаются только движком БД [Atomic](../../../engines/database-engines/atomic.md).
Чтобы удалить часть данных из таблицы, вы можете создать новую таблицу, добавить в нее данные из старой таблицы, которые вы хотите оставить (отобрав их с помощью запроса `SELECT`), затем удалить старую таблицу и переименовать новую таблицу так как старую:
```sql
CREATE TABLE myNewTable AS myOldTable;
INSERT INTO myNewTable SELECT * FROM myOldTable WHERE CounterID <12345;
DROP TABLE myOldTable;
RENAME TABLE myNewTable TO myOldTable;
```
Вместо перечисленных выше операций можно использовать один запрос:
```sql
REPLACE TABLE myOldTable SELECT * FROM myOldTable WHERE CounterID <12345;
```
### Синтаксис
{CREATE [OR REPLACE]|REPLACE} TABLE [db.]table_name
Для данного запроса можно использовать любые варианты синтаксиса запроса `CREATE`. Запрос `REPLACE` для несуществующей таблицы вызовет ошибку.
### Примеры:
Рассмотрим таблицу:
```sql
CREATE DATABASE base ENGINE = Atomic;
CREATE OR REPLACE TABLE base.t1 (n UInt64, s String) ENGINE = MergeTree ORDER BY n;
INSERT INTO base.t1 VALUES (1, 'test');
SELECT * FROM base.t1;
```
```text
┌─n─┬─s────┐
│ 1 │ test │
└───┴──────┘
```
Используем запрос `REPLACE` для удаления всех данных:
```sql
CREATE OR REPLACE TABLE base.t1 (n UInt64, s Nullable(String)) ENGINE = MergeTree ORDER BY n;
INSERT INTO base.t1 VALUES (2, null);
SELECT * FROM base.t1;
```
```text
┌─n─┬─s──┐
│ 2 │ \N │
└───┴────┘
```
Используем запрос `REPLACE` для изменения структуры таблицы:
```sql
REPLACE TABLE base.t1 (n UInt64) ENGINE = MergeTree ORDER BY n;
INSERT INTO base.t1 VALUES (3);
SELECT * FROM base.t1;
```
```text
┌─n─┐
│ 3 │
└───┘
```
<!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: mysql
# mysql {#mysql}
Позволяет выполнять запросы `SELECT` над данными, хранящимися на удалённом MySQL сервере.
Позволяет выполнять запросы `SELECT` и `INSERT` над данными, хранящимися на удалённом MySQL сервере.
**Синтаксис**
@ -31,7 +31,8 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_
- `on_duplicate_clause` — выражение `ON DUPLICATE KEY on_duplicate_clause`, добавляемое в запрос `INSERT`. Может быть передано только с помощью `replace_query = 0` (если вы одновременно передадите `replace_query = 1` и `on_duplicate_clause`, будет сгенерировано исключение).
Пример: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, где `on_duplicate_clause` это `UPDATE c2 = c2 + 1;`
Пример: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, где `on_duplicate_clause` это `UPDATE c2 = c2 + 1`.
Выражения, которые могут использоваться в качестве `on_duplicate_clause` в секции `ON DUPLICATE KEY`, можно посмотреть в документации по [MySQL](http://www.mysql.ru/docs/).
Простые условия `WHERE` такие как `=, !=, >, >=, <, =` выполняются на стороне сервера MySQL.
@ -42,7 +43,7 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_
Объект таблицы с теми же столбцами, что и в исходной таблице MySQL.
!!! note "Примечание"
Чтобы отличить табличную функцию `mysql (...)` в запросе `INSERT` от имени таблицы со списком имен столбцов, используйте ключевые слова `FUNCTION` или `TABLE FUNCTION`. См. примеры ниже.
Чтобы отличить табличную функцию `mysql (...)` в запросе `INSERT` от имени таблицы со списком столбцов, используйте ключевые слова `FUNCTION` или `TABLE FUNCTION`. См. примеры ниже.
**Примеры**

View File

@ -10,7 +10,7 @@ cssmin==0.2.0
future==0.18.2
htmlmin==0.1.12
idna==2.10
Jinja2==2.11.2
Jinja2>=2.11.3
jinja2-highlight==0.6.1
jsmin==2.2.2
livereload==2.6.2

View File

@ -1,5 +1,12 @@
---
toc_folder_title: 数据类型
toc_priority: 37
toc_title: 简介
---
# 数据类型 {#data_types}
ClickHouse 可以在数据表中存储多种数据类型。
本节描述 ClickHouse 支持的数据类型,以及使用或者实现它们时(如果有的话)的注意事项。
你可以在系统表 [system.data_type_families](../../operations/system-tables/data_type_families.md#system_tables-data_type_families) 中检查数据类型名称是否区分大小写。

View File

@ -1,23 +1,25 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_priority: 37
toc_title: "\u6587\u4EF6"
toc_title: file
---
# 文件 {#file}
# file {#file}
从文件创建表。 此表函数类似于 [url](url.md) 和 [hdfs](hdfs.md) 一些的。
从文件创建表。 此表函数类似于 [url](../../sql-reference/table-functions/url.md) 和 [hdfs](../../sql-reference/table-functions/hdfs.md)。
`file` 函数可用于对[File](../../engines/table-engines/special/file.md) 表中的数据进行 `SELECT``INSERT` 查询。
**语法**
``` sql
file(path, format, structure)
```
**输入参数**
**参数**
- `path`The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). 只读模式下的globs后的文件支持路径: `*`, `?`, `{abc,def}``{N..M}` 哪里 `N`, `M` — numbers, \``'abc', 'def'` — strings.
- `format` The [格式](../../interfaces/formats.md#formats) 的文件
- `structure`Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
- `path` — [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path)中文件的相对路径。在只读模式下,文件路径支持以下通配符: `*`, `?`, `{abc,def}``{N..M}`,其中 `N`, `M` 是数字, \``'abc', 'def'` 是字符串。
- `format`文件的[格式](../../interfaces/formats.md#formats)
- `structure`表的结构。格式 `'column1_name column1_type, column2_name column2_type, ...'`
**返回值**
@ -25,7 +27,7 @@ file(path, format, structure)
**示例**
设置 `user_files_path` 和文件的内容 `test.csv`:
设置 `user_files_path` 和文件 `test.csv` 的内容:
``` bash
$ grep user_files_path /etc/clickhouse-server/config.xml
@ -37,12 +39,10 @@ $ cat /var/lib/clickhouse/user_files/test.csv
78,43,45
```
从`test.csv` 并从中选择前两行:
`test.csv` 中的表中获取数据,并从中选择前两行:
``` sql
SELECT *
FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
LIMIT 2
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 2;
```
``` text
@ -52,25 +52,40 @@ LIMIT 2
└─────────┴─────────┴─────────┘
```
从CSV文件获取包含3列 [UInt32](../../sql-reference/data-types/int-uint.md) 类型的表的前10行:
``` sql
-- getting the first 10 lines of a table that contains 3 columns of UInt32 type from a CSV file
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10;
```
**路径中的水珠**
将文件中的数据插入表中:
多个路径组件可以具有globs。 对于正在处理的文件应该存在并匹配到整个路径模式(不仅后缀或前缀)。
``` sql
INSERT INTO FUNCTION file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') VALUES (1, 2, 3), (3, 2, 1);
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32');
```
- `*` — Substitutes any number of any characters except `/` 包括空字符串。
- `?` — Substitutes any single character.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}` — Substitutes any number in range from N to M including both borders.
``` text
┌─column1─┬─column2─┬─column3─┐
│ 1 │ 2 │ 3 │
│ 3 │ 2 │ 1 │
└─────────┴─────────┴─────────┘
```
建筑与 `{}` 类似于 [远程表功能](../../sql-reference/table-functions/remote.md)).
**路径中的通配符**
多个路径组件可以具有通配符。 对于要处理的文件必须存在并与整个路径模式匹配(不仅后缀或前缀)。
- `*` — 替换任意数量的任何字符,除了 `/` 包括空字符串。
- `?` — 替换任何单个字符。
- `{some_string,another_string,yet_another_one}` — 替换任何字符串 `'some_string', 'another_string', 'yet_another_one'`
- `{N..M}` — 替换范围从N到M的任何数字包括两个边界
使用 `{}` 的构造类似于 [remote](../../sql-reference/table-functions/remote.md))表函数。
**示例**
1. 假设我们有几个具有以下相对路径的文件:
假设我们有几个文件,这些文件具有以下相对路径:
- some_dir/some_file_1
- some_dir/some_file_2
@ -79,18 +94,14 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
- another_dir/some_file_2
- another_dir/some_file_3
1. 查询这些文件中的行数:
<!-- -->
查询这些文件中的行数:
``` sql
SELECT count(*)
FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32')
```
1. 查询这两个目录的所有文件中的行数:
<!-- -->
查询这两个目录的所有文件中的行数:
``` sql
SELECT count(*)
@ -98,11 +109,11 @@ FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32')
```
!!! warning "警告"
如果您的文件列表包含带前导零的数字范围,请单独使用带大括号的构造或使用 `?`.
如果您的文件列表包含带前导零的数字范围,请对每个数字分别使用带有大括号的结构或使用 `?`
**示例**
从名为 `file000`, `file001`, … , `file999`:
从名为 `file000`, `file001`, … , `file999`的文件中查询数据:
``` sql
SELECT count(*)
@ -111,8 +122,8 @@ FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32')
## 虚拟列 {#virtual-columns}
- `_path`Path to the file.
- `_file`Name of the file.
- `_path`文件路径。
- `_file`文件名称。
**另请参阅**

View File

@ -1,15 +1,13 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_priority: 47
toc_title: generateRandom
---
# generateRandom {#generaterandom}
使用给定的模式生成随机数据。
允许用数据填充测试表。
支持可以存储在表中的所有数据类型,除了 `LowCardinality``AggregateFunction`.
生成具用给定的模式的随机数据。
允许用数据填充测试表。
支持所有可以存储在表中的数据类型, `LowCardinality``AggregateFunction`除外。
``` sql
generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_string_length'[, 'max_array_length']]]);
@ -17,15 +15,15 @@ generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_stri
**参数**
- `name`Name of corresponding column.
- `TypeName`Type of corresponding column.
- `max_array_length`Maximum array length for all generated arrays. Defaults to `10`.
- `max_string_length`Maximum string length for all generated strings. Defaults to `10`.
- `random_seed`Specify random seed manually to produce stable results. If NULL — seed is randomly generated.
- `name`对应列的名称。
- `TypeName`对应列的类型。
- `max_array_length`生成数组的最大长度。 默认为10。
- `max_string_length`生成字符串的最大长度。 默认为10。
- `random_seed`手动指定随机种子以产生稳定的结果。 如果为NULL-种子是随机生成的。
**返回值**
具有请求架构的表对象。
具有请求模式的表对象。
## 用法示例 {#usage-example}

View File

@ -1,13 +1,11 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_priority: 45
toc_title: hdfs
---
# hdfs {#hdfs}
从HDFS中的文件创建表。 此表函数类似于 [url](url.md) 和 [文件](file.md) 一些的
根据HDFS中的文件创建表。 该表函数类似于 [url](url.md) 和 [文件](file.md)。
``` sql
hdfs(URI, format, structure)
@ -15,9 +13,9 @@ hdfs(URI, format, structure)
**输入参数**
- `URI`The relative URI to the file in HDFS. Path to file support following globs in readonly mode: `*`, `?`, `{abc,def}``{N..M}` 哪里 `N`, `M` — numbers, \``'abc', 'def'` — strings.
- `format`The [格式](../../interfaces/formats.md#formats) 的文件
- `structure`Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
- `URI`HDFS中文件的相对URI。 在只读模式下,文件路径支持以下通配符: `*`, `?`, `{abc,def}``{N..M}` ,其中 `N`, `M` 是数字, \``'abc', 'def'` 是字符串。
- `format`文件的[格式](../../interfaces/formats.md#formats)
- `structure`表的结构。格式 `'column1_name column1_type, column2_name column2_type, ...'`
**返回值**
@ -25,7 +23,7 @@ hdfs(URI, format, structure)
**示例**
`hdfs://hdfs1:9000/test` 并从中选择前两行:
来自 `hdfs://hdfs1:9000/test` 并从中选择前两行:
``` sql
SELECT *
@ -40,20 +38,20 @@ LIMIT 2
└─────────┴─────────┴─────────┘
```
**路径中的水珠**
**路径中的通配符**
多个路径组件可以具有globs。 对于正在处理的文件应该存在并匹配到整个路径模式(不仅后缀或前缀)。
多个路径组件可以具有通配符。 对于要处理的文件必须存在并与整个路径模式匹配(不仅后缀或前缀)。
- `*`Substitutes any number of any characters except `/` 包括空字符串。
- `?`Substitutes any single character.
- `{some_string,another_string,yet_another_one}`Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}`Substitutes any number in range from N to M including both borders.
- `*`替换任意数量的任何字符,除了 `/` 包括空字符串。
- `?`替换任何单个字符。
- `{some_string,another_string,yet_another_one}`替换任何字符串 `'some_string', 'another_string', 'yet_another_one'`
- `{N..M}`替换范围从N到M的任何数字包括两个边界
建筑与 `{}` 类似于 [远程表功能](../../sql-reference/table-functions/remote.md)).
使用 `{}` 的构造类似于 [remote](../../sql-reference/table-functions/remote.md))表函数。
**示例**
1. 假设我们在HDFS上有几个具有以下Uri的文件:
1. 假设我们在HDFS上有几个带有以下URI的文件:
- hdfs://hdfs1:9000/some_dir/some_file_1
- hdfs://hdfs1:9000/some_dir/some_file_2
@ -62,7 +60,7 @@ LIMIT 2
- hdfs://hdfs1:9000/another_dir/some_file_2
- hdfs://hdfs1:9000/another_dir/some_file_3
1. 查询这些文件中的行数:
2. 查询这些文件中的行数:
<!-- -->
@ -71,7 +69,7 @@ SELECT count(*)
FROM hdfs('hdfs://hdfs1:9000/{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32')
```
1. 查询这两个目录的所有文件中的行数:
3. 查询这两个目录的所有文件中的行数:
<!-- -->
@ -81,11 +79,11 @@ FROM hdfs('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value U
```
!!! warning "警告"
如果您的文件列表包含带前导零的数字范围,请单独使用带大括号的构造或使用 `?`.
如果您的文件列表包含带前导零的数字范围,请对每个数字分别使用带有大括号的结构或使用 `?`
**示例**
从名为 `file000`, `file001`, … , `file999`:
从名为 `file000`, `file001`, … , `file999`的文件中查询数据:
``` sql
SELECT count(*)
@ -94,8 +92,8 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
## 虚拟列 {#virtual-columns}
- `_path`Path to the file.
- `_file`Name of the file.
- `_path`文件路径。
- `_file`文件名称。
**另请参阅**

View File

@ -1,38 +1,35 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_folder_title: "\u8868\u51FD\u6570"
toc_priority: 34
toc_title: "\u5BFC\u8A00"
---
# 表函数 {#table-functions}
表函数是构造表的方法。
表函数是用来构造表的方法。
您可以使用表函数:
您可以在以下位置使用表函数:
- [FROM](../statements/select/from.md) 《公约》条款 `SELECT` 查询
- `SELECT` 查询的[FROM](../../sql-reference/statements/select/from.md)子句
The method for creating a temporary table that is available only in the current query. The table is deleted when the query finishes.
创建临时表的方法,该临时表仅在当前查询中可用。当查询完成后,该临时表将被删除。
- [创建表为\<table_function()\>](../statements/create.md#create-table-query) 查询。
- [CREATE TABLE AS \<table_function()\>](../statements/create.md#create-table-query) 查询。
It's one of the methods of creating a table.
这是创建表的方法之一。
!!! warning "警告"
你不能使用表函数,如果 [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) 设置被禁用。
如果 [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) 设置被禁用,则不能使用表函数
| 功能 | 产品描述 |
|--------------------|--------------------------------------------------------------------------------------------------------|
| [文件](file.md) | 创建一个 [文件](../../engines/table-engines/special/file.md)-发动机表。 |
| [合并](merge.md) | 创建一个 [合并](../../engines/table-engines/special/merge.md)-发动机表。 |
| [数字](numbers.md) | 创建一个包含整数填充的单列的表。 |
| [远程](remote.md) | 允许您访问远程服务器,而无需创建 [分布](../../engines/table-engines/special/distributed.md)-发动机表。 |
| [url](url.md) | 创建一个 [Url](../../engines/table-engines/special/url.md)-发动机表。 |
| [mysql](mysql.md) | 创建一个 [MySQL](../../engines/table-engines/integrations/mysql.md)-发动机表。 |
| [jdbc](jdbc.md) | 创建一个 [JDBC](../../engines/table-engines/integrations/jdbc.md)-发动机表。 |
| [odbc](odbc.md) | 创建一个 [ODBC](../../engines/table-engines/integrations/odbc.md)-发动机表。 |
| [hdfs](hdfs.md) | 创建一个 [HDFS](../../engines/table-engines/integrations/hdfs.md)-发动机表。 |
| 函数 | 描述 |
|-----------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------|
| [file](../../sql-reference/table-functions/file.md) | 创建一个file引擎表。 |
| [merge](../../sql-reference/table-functions/merge.md) | 创建一个merge引擎表。 |
| [numbers](../../sql-reference/table-functions/numbers.md) | 创建一个单列的表,其中包含整数。 |
| [remote](../../sql-reference/table-functions/remote.md) | 允许您访问远程服务器,而无需创建分布式表。 |
| [url](../../sql-reference/table-functions/url.md) | 创建一个URL引擎表。 |
| [mysql](../../sql-reference/table-functions/mysql.md) | 创建一个MySQL引擎表。 |
| [jdbc](../../sql-reference/table-functions/jdbc.md) | 创建一个JDBC引擎表。 |
| [odbc](../../sql-reference/table-functions/odbc.md) | 创建一个ODBC引擎表。 |
| [hdfs](../../sql-reference/table-functions/hdfs.md) | 创建一个HDFS引擎表。 |
[原始文章](https://clickhouse.tech/docs/en/query_language/table_functions/) <!--hide-->

View File

@ -1,33 +1,29 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_priority: 46
toc_title: "\u8F93\u5165"
toc_title: input
---
# 输入 {#input}
# input {#input}
`input(structure)` -表功能,允许有效地转换和插入数据发送到
服务器与给定结构的表与另一种结构。
`input(structure)` -表函数,可以有效地将发送给服务器的数据转换为具有给定结构的数据并将其插入到具有其他结构的表中。
`structure` -以下格式发送到服务器的数据结构 `'column1_name column1_type, column2_name column2_type, ...'`.
例如, `'id UInt32, name String'`.
`structure` -发送到服务器的数据结构的格式 `'column1_name column1_type, column2_name column2_type, ...'`
例如, `'id UInt32, name String'`
此功能只能用于 `INSERT SELECT` 查询,只有一次,但其他行为像普通表函数
该函数只能在 `INSERT SELECT` 查询中使用,并且只能使用一次,但在其他方面,行为类似于普通的表函数
(例如,它可以用于子查询等。).
数据可以以任何方式像普通发送 `INSERT` 查询并传递任何可用 [格式](../../interfaces/formats.md#formats)
必须在查询结束时指定(不像普通 `INSERT SELECT`).
数据可以像普通 `INSERT` 查询一样发送,并以必须在查询末尾指定的任何可用[格式](../../interfaces/formats.md#formats)
传递(与普通 `INSERT SELECT`不同)。
这个功能的主要特点是,当服务器从客户端接收数据时,它同时将其转换
根据表达式中的列表 `SELECT` 子句并插入到目标表中。 临时表
不创建所有传输的数据。
该函数的主要特点是,当服务器从客户端接收数据时,它会同时根据 `SELECT` 子句中的表达式列表将其转换,并插入到目标表中。
不会创建包含所有已传输数据的临时表。
**例**
- 让 `test` 表具有以下结构 `(a String, b String)`
和数据 `data.csv` 具有不同的结构 `(col1 String, col2 Date, col3 Int32)`. 查询插入
从数据 `data.csv``test` 同时转换的表如下所示:
并且 `data.csv` 中的数据具有不同的结构 `(col1 String, col2 Date, col3 Int32)`
将数据从 `data.csv` 插入到 `test` 表中,同时进行转换的查询如下所示:
<!-- -->
@ -35,7 +31,7 @@ toc_title: "\u8F93\u5165"
$ cat data.csv | clickhouse-client --query="INSERT INTO test SELECT lower(col1), col3 * col3 FROM input('col1 String, col2 Date, col3 Int32') FORMAT CSV";
```
- 如果 `data.csv` 包含相同结构的数据 `test_structure` 作为表 `test` 那么这两个查询是相等的:
- 如果 `data.csv` 包含与表 `test` 相同结构 `test_structure` 的数据,那么这两个查询是相等的:
<!-- -->

View File

@ -1,6 +1,4 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_priority: 43
toc_title: jdbc
---
@ -9,10 +7,10 @@ toc_title: jdbc
`jdbc(jdbc_connection_uri, schema, table)` -返回通过JDBC驱动程序连接的表。
此表函数需要单独的 `clickhouse-jdbc-bridge` 程序正在运行。
此表函数需要单独的 `clickhouse-jdbc-bridge` 程序才能运行。
它支持可空类型基于查询的远程表的DDL
**例**
**例**
``` sql
SELECT * FROM jdbc('jdbc:mysql://localhost:3306/?user=root&password=root', 'schema', 'table')

View File

@ -1,14 +1,12 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_priority: 38
toc_title: "\u5408\u5E76"
toc_title: merge
---
# 合并 {#merge}
# merge {#merge}
`merge(db_name, 'tables_regexp')` Creates a temporary Merge table. For more information, see the section “Table engines, Merge”.
`merge(db_name, 'tables_regexp')` 创建一个临时Merge表。 有关更多信息,请参见 “Table engines, Merge”。
表结构取自与正则表达式匹配的第一个表。
表结构取自遇到的第一个与正则表达式匹配的表。
[原始文章](https://clickhouse.tech/docs/en/query_language/table_functions/merge/) <!--hide-->

View File

@ -1,18 +1,16 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_priority: 39
toc_title: "\u6570\u5B57"
toc_title: numbers
---
# 数字 {#numbers}
# numbers {#numbers}
`numbers(N)` Returns a table with the single number 包含从0到N-1的整数的列(UInt64)
`numbers(N, M)` -返回一个表与单 number 包含从N到(N+M-1)的整数的列(UInt64)
`numbers(N)` 返回一个包含单个 number 列(UInt64)的表其中包含从0到N-1的整数
`numbers(N, M)` - 返回一个包含单个 number 列(UInt64)的表其中包含从N到(N+M-1)的整数
类似于 `system.numbers` 表,它可以用于测试和生成连续的值, `numbers(N, M)``system.numbers`.
类似于 `system.numbers` 表,它可以用于测试和生成连续的值, `numbers(N, M)``system.numbers`更有效。
以下查询是等的:
以下查询是等的:
``` sql
SELECT * FROM numbers(10);
@ -20,10 +18,10 @@ SELECT * FROM numbers(0, 10);
SELECT * FROM system.numbers LIMIT 10;
```
例:
例:
``` sql
-- Generate a sequence of dates from 2010-01-01 to 2010-12-31
-- 生成2010-01-01至2010-12-31的日期序列
select toDate('2010-01-01') + number as d FROM numbers(365);
```

View File

@ -1,13 +1,11 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_priority: 44
toc_title: odbc
---
# odbc {#table-functions-odbc}
返回通过连接的表 [ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity).
返回通过 [ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity) 连接的表。
``` sql
odbc(connection_settings, external_database, external_table)
@ -15,23 +13,23 @@ odbc(connection_settings, external_database, external_table)
参数:
- `connection_settings`Name of the section with connection settings in the `odbc.ini` 文件
- `external_database`Name of a database in an external DBMS.
- `external_table`Name of a table in the `external_database`.
- `connection_settings``odbc.ini` 文件中连接设置的部分的名称。
- `external_database`外部DBMS的数据库名。
- `external_table` `external_database` 数据库中的表名。
为了安全地实现ODBC连接ClickHouse使用单独的程序 `clickhouse-odbc-bridge`. 如果直接从ODBC驱动程序加载 `clickhouse-server`驱动程序问题可能会导致ClickHouse服务器崩溃。 ClickHouse自动启动 `clickhouse-odbc-bridge` 当它是必需的。 ODBC桥程序是从相同的软件包作为安装 `clickhouse-server`.
为了安全地实现ODBC连接ClickHouse使用单独的程序 `clickhouse-odbc-bridge`。 如果ODBC驱动程序直接从 `clickhouse-server` 加载,则驱动程序问题可能会导致ClickHouse服务器崩溃。 当需要时,ClickHouse自动启动 `clickhouse-odbc-bridge`。 ODBC桥程序是从与 `clickhouse-server` 相同的软件包安装的。
与字段 `NULL` 外部表中的值将转换为基数据类型的默认值。 例如如果远程MySQL表字段具有 `INT NULL` 键入它将转换为0ClickHouse的默认值 `Int32` 数据类型)。
外部表中字段包含的 `NULL` 值将转换为基本据类型的默认值。 例如如果远程MySQL表字段包含 `INT NULL` 类型则将被转换为0ClickHouse`Int32` 数据类型的默认值)。
## 用法示例 {#usage-example}
**通过ODBC从本地MySQL安装获取数据**
**通过ODBC从本地安装的MySQL获取数据**
此示例检查Ubuntu Linux18.04和MySQL服务器5.7。
这个例子检查Ubuntu Linux18.04和MySQL服务器5.7。
确保安装了unixODBC和MySQL连接器。
确保已经安装了unixODBC和MySQL连接器。
默认情况下如果从软件包安装ClickHouse以用户身份启动 `clickhouse`. 因此您需要在MySQL服务器中创建和配置此用户。
默认情况下如果从软件包安装ClickHouse以用户 `clickhouse` 启动。 因此您需要在MySQL服务器中创建和配置此用户。
``` bash
$ sudo mysql
@ -42,7 +40,7 @@ mysql> CREATE USER 'clickhouse'@'localhost' IDENTIFIED BY 'clickhouse';
mysql> GRANT ALL PRIVILEGES ON *.* TO 'clickhouse'@'clickhouse' WITH GRANT OPTION;
```
然后配置连接 `/etc/odbc.ini`.
然后`/etc/odbc.ini` 中配置连接。
``` bash
$ cat /etc/odbc.ini
@ -55,7 +53,7 @@ USERNAME = clickhouse
PASSWORD = clickhouse
```
您可以使用 `isql` unixodbc安装中的实用程序。
您可以使用unixODBC安装的 `isql` 实用程序检查连接
``` bash
$ isql -v mysqlconn

View File

@ -1,22 +1,52 @@
# 远程,远程安全 {#remote-remotesecure}
# remote, remoteSecure {#remote-remotesecure}
允许您访问远程服务器,而无需创建 `Distributed`
允许您访问远程服务器,而无需创建 `Distributed`。`remoteSecure` - 与 `remote` 相同,但是会使用加密链接。
签名:
这两个函数都可以在 `SELECT``INSERT` 查询中使用。
语法:
``` sql
remote('addresses_expr', db, table[, 'user'[, 'password']])
remote('addresses_expr', db.table[, 'user'[, 'password']])
remoteSecure('addresses_expr', db, table[, 'user'[, 'password']])
remoteSecure('addresses_expr', db.table[, 'user'[, 'password']])
remote('addresses_expr', db, table[, 'user'[, 'password'], sharding_key])
remote('addresses_expr', db.table[, 'user'[, 'password'], sharding_key])
remoteSecure('addresses_expr', db, table[, 'user'[, 'password'], sharding_key])
remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key])
```
`addresses_expr` 代表远程服务器地址的一个表达式。可以只是单个服务器地址。 服务器地址可以是 `host:port``host`。`host` 可以指定为服务器域名或是IPV4或IPV6地址。IPv6地址在方括号中指定。`port` 是远程服务器上的TCP端口。 如果省略端口,则使用服务器配置文件中的 `tcp_port` 默认情况为9000
**参数**
- `addresses_expr` 代表远程服务器地址的一个表达式。可以只是单个服务器地址。 服务器地址可以是 `host:port``host`
`host` 可以指定为服务器名称或是IPV4或IPV6地址。IPv6地址在方括号中指定。
`port` 是远程服务器上的TCP端口。 如果省略端口,则 `remote` 使用服务器配置文件中的 [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) 默认情况为9000`remoteSecure` 使用 [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) 默认情况为9440
!!! important "重要事项"
IPv6地址需要指定端口。
例:
类型: [String](../../sql-reference/data-types/string.md)。
- `db` — 数据库名。类型: [String](../../sql-reference/data-types/string.md)。
- `table` — 表名。类型: [String](../../sql-reference/data-types/string.md)。
- `user` — 用户名。如果未指定用户,则使用 `default` 。类型: [String](../../sql-reference/data-types/string.md)。
- `password` — 用户密码。如果未指定密码,则使用空密码。类型: [String](../../sql-reference/data-types/string.md)。
- `sharding_key` — 分片键以支持在节点之间分布数据。 例如: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`。 类型: [UInt32](../../sql-reference/data-types/int-uint.md)。
**返回值**
来自远程服务器的数据集。
**用法**
使用 `remote` 表函数没有创建一个 `Distributed` 表更优,因为在这种情况下,将为每个请求重新建立服务器连接。此外,如果设置了主机名,则会解析这些名称,并且在使用各种副本时不会计入错误。 在处理大量查询时,始终优先创建 `Distributed` 表,不要使用 `remote` 表函数。
`remote` 表函数可以在以下情况下是有用的:
- 访问特定服务器进行数据比较、调试和测试。
- 在多个ClickHouse集群之间的用户研究目的的查询。
- 手动发出的不频繁分布式请求。
- 每次重新定义服务器集的分布式请求。
**地址**
``` text
example01-01-1
@ -29,8 +59,6 @@ localhost
多个地址可以用逗号分隔。在这种情况下ClickHouse将使用分布式处理因此它将将查询发送到所有指定的地址如具有不同数据的分片
示例:
``` text
example01-01-1,example01-02-1
```
@ -49,30 +77,28 @@ example01-{01..02}-1
如果您有多对大括号,它会生成相应集合的直接乘积。
大括号中的地址和部分地址可以用管道符号(\|)分隔。 在这种情况下,相应的地址集被解释为副本,并且查询将被发送到第一个正常副本。 但是,副本将按照当前[load_balancing](../../operations/settings/settings.md)设置的顺序进行迭代。
示例:
大括号中的地址和部分地址可以用管道符号(\|)分隔。 在这种情况下,相应的地址集被解释为副本,并且查询将被发送到第一个正常副本。 但是,副本将按照当前[load_balancing](../../operations/settings/settings.md)设置的顺序进行迭代。此示例指定两个分片,每个分片都有两个副本:
``` text
example01-{01..02}-{1|2}
```
此示例指定两个分片,每个分片都有两个副本。
生成的地址数由常量限制。目前这是1000个地址。
使用 `remote` 表函数没有创建一个 `Distributed` 表更优,因为在这种情况下,将为每个请求重新建立服务器连接。此外,如果设置了主机名,则会解析这些名称,并且在使用各种副本时不会计算错误。 在处理大量查询时,始终优先创建 `Distributed` 表,不要使用 `remote` 表功能。
**示例**
`remote` 表函数可以在以下情况下是有用的:
从远程服务器选择数据:
- 访问特定服务器进行数据比较、调试和测试。
- 在多个ClickHouse集群之间的用户研究目的的查询。
- 手动发出的不频繁分布式请求。
- 每次重新定义服务器集的分布式请求。
``` sql
SELECT * FROM remote('127.0.0.1', db.remote_engine_table) LIMIT 3;
```
如果未指定用户, 将会使用`default`。
如果未指定密码,则使用空密码。
将远程服务器中的数据插入表中:
`remoteSecure` - 与 `remote` 相同,但是会使用加密链接。默认端口为配置文件中的[tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure)或9440。
``` sql
CREATE TABLE remote_table (name String, value UInt32) ENGINE=Memory;
INSERT INTO FUNCTION remote('127.0.0.1', currentDatabase(), 'remote_table') VALUES ('test', 42);
SELECT * FROM remote_table;
```
[原始文章](https://clickhouse.tech/docs/en/query_language/table_functions/remote/) <!--hide-->

View File

@ -1,26 +1,43 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_priority: 41
toc_title: url
---
# url {#url}
`url(URL, format, structure)` -返回从创建的表 `URL` 与给定
`format``structure`.
`url` 函数从 `URL` 创建一个具有给定 `format``structure` 的表。
URL-HTTP或HTTPS服务器地址它可以接受 `GET` 和/或 `POST` 请求
`url` 函数可用于对[URL](../../engines/table-engines/special/url.md)表中的数据进行 `SELECT``INSERT` 的查询中
格式 - [格式](../../interfaces/formats.md#formats) 的数据。
**语法**
结构-表结构 `'UserID UInt64, Name String'` 格式。 确定列名称和类型。
``` sql
url(URL, format, structure)
```
**参数**
- `URL` — HTTP或HTTPS服务器地址它可以接受 `GET``POST` 请求 (对应于 `SELECT``INSERT` 查询)。类型: [String](../../sql-reference/data-types/string.md)。
- `format` — 数据[格式](../../interfaces/formats.md#formats)。类型: [String](../../sql-reference/data-types/string.md)。
- `structure` — 以 `'UserID UInt64, Name String'` 格式的表结构。确定列名和类型。 类型: [String](../../sql-reference/data-types/string.md)。
**返回值**
A table with the specified format and structure and with data from the defined `URL`.
**示例**
获取一个表的前3行该表是从HTTP服务器获取的包含 `String` 和 [UInt32](../../sql-reference/data-types/int-uint.md) 类型的列,以[CSV](../../interfaces/formats.md#csv)格式返回。
``` sql
-- getting the first 3 lines of a table that contains columns of String and UInt32 type from HTTP-server which answers in CSV format.
SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3
SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3;
```
`URL` 的数据插入到表中:
``` sql
CREATE TABLE test_table (column1 String, column2 UInt32) ENGINE=Memory;
INSERT INTO FUNCTION url('http://127.0.0.1:8123/?query=INSERT+INTO+test_table+FORMAT+CSV', 'CSV', 'column1 String, column2 UInt32') VALUES ('http interface', 42);
SELECT * FROM test_table;
```
[原始文章](https://clickhouse.tech/docs/en/query_language/table_functions/url/) <!--hide-->

View File

@ -47,6 +47,7 @@
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/DNSCacheUpdater.h>
#include <Interpreters/ExternalLoaderXMLConfigRepository.h>
#include <Interpreters/ExpressionJIT.h>
#include <Access/AccessControlManager.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/System/attachSystemTables.h>
@ -830,8 +831,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
#if USE_EMBEDDED_COMPILER
size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", 500);
if (compiled_expression_cache_size)
global_context->setCompiledExpressionCache(compiled_expression_cache_size);
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size);
#endif
/// Set path for format schema files

View File

@ -110,7 +110,7 @@ public:
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
for (size_t i = 0; i < batch_size; ++i)
{
if (flags[i])
if (flags[i] && places[i])
add(places[i] + place_offset, columns, i, arena);
}
}
@ -118,6 +118,7 @@ public:
{
nested_function->addBatch(batch_size, places, place_offset, columns, arena, if_argument_pos);
for (size_t i = 0; i < batch_size; ++i)
if (places[i])
(places[i] + place_offset)[size_of_data] = 1;
}
}

View File

@ -267,13 +267,14 @@ public:
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
for (size_t i = 0; i < batch_size; ++i)
{
if (flags[i])
if (flags[i] && places[i])
static_cast<const Derived *>(this)->add(places[i] + place_offset, columns, i, arena);
}
}
else
{
for (size_t i = 0; i < batch_size; ++i)
if (places[i])
static_cast<const Derived *>(this)->add(places[i] + place_offset, columns, i, arena);
}
}
@ -349,6 +350,7 @@ public:
{
size_t next_offset = offsets[i];
for (size_t j = current_offset; j < next_offset; ++j)
if (places[i])
static_cast<const Derived *>(this)->add(places[i] + place_offset, columns, j, arena);
current_offset = next_offset;
}

View File

@ -13,12 +13,6 @@
#include <IO/ConnectionTimeouts.h>
namespace ProfileEvents
{
extern const Event DistributedConnectionMissingTable;
extern const Event DistributedConnectionStaleReplica;
}
namespace DB
{

View File

@ -1,8 +1,14 @@
#if defined(OS_LINUX)
#include <Client/HedgedConnections.h>
#include <Common/ProfileEvents.h>
#include <Interpreters/ClientInfo.h>
namespace ProfileEvents
{
extern const Event HedgedRequestsChangeReplica;
}
namespace DB
{
namespace ErrorCodes
@ -321,6 +327,7 @@ HedgedConnections::ReplicaLocation HedgedConnections::getReadyReplicaLocation(As
offset_states[location.offset].replicas[location.index].is_change_replica_timeout_expired = true;
offset_states[location.offset].next_replica_in_process = true;
offsets_queue.push(location.offset);
ProfileEvents::increment(ProfileEvents::HedgedRequestsChangeReplica);
startNewReplica();
}
else
@ -399,11 +406,21 @@ Packet HedgedConnections::receivePacketFromReplica(const ReplicaLocation & repli
break;
case Protocol::Server::EndOfStream:
/// Check case when we receive EndOfStream before first not empty data packet
/// or positive progress. It may happen if max_parallel_replicas > 1 and
/// there is no way to sample data in this query.
if (offset_states[replica_location.offset].can_change_replica)
disableChangingReplica(replica_location);
finishProcessReplica(replica, false);
break;
case Protocol::Server::Exception:
default:
/// Check case when we receive Exception before first not empty data packet
/// or positive progress. It may happen if max_parallel_replicas > 1 and
/// there is no way to sample data in this query.
if (offset_states[replica_location.offset].can_change_replica)
disableChangingReplica(replica_location);
finishProcessReplica(replica, true);
break;
}

View File

@ -2,10 +2,16 @@
#include <Client/HedgedConnectionsFactory.h>
#include <Common/typeid_cast.h>
#include <Common/ProfileEvents.h>
namespace ProfileEvents
{
extern const Event HedgedRequestsChangeReplica;
}
namespace DB
{
namespace ErrorCodes
{
extern const int ALL_CONNECTION_TRIES_FAILED;
@ -32,6 +38,16 @@ HedgedConnectionsFactory::HedgedConnectionsFactory(
HedgedConnectionsFactory::~HedgedConnectionsFactory()
{
/// Stop anything that maybe in progress,
/// to avoid interfer with the subsequent connections.
///
/// I.e. some replcas may be in the establishing state,
/// this means that hedged connection is waiting for TablesStatusResponse,
/// and if the connection will not be canceled,
/// then next user of the connection will get TablesStatusResponse,
/// while this is not the expected package.
stopChoosingReplicas();
pool->updateSharedError(shuffled_pools);
}
@ -219,6 +235,7 @@ HedgedConnectionsFactory::State HedgedConnectionsFactory::processEpollEvents(boo
int index = timeout_fd_to_replica_index[event_fd];
replicas[index].change_replica_timeout.reset();
++shuffled_pools[index].slowdown_count;
ProfileEvents::increment(ProfileEvents::HedgedRequestsChangeReplica);
}
else
throw Exception("Unknown event from epoll", ErrorCodes::LOGICAL_ERROR);

View File

@ -271,17 +271,24 @@ private:
void setImpl(const Key & key, const MappedPtr & mapped, [[maybe_unused]] std::lock_guard<std::mutex> & cache_lock)
{
auto res = cells.emplace(std::piecewise_construct,
auto [it, inserted] = cells.emplace(std::piecewise_construct,
std::forward_as_tuple(key),
std::forward_as_tuple());
Cell & cell = res.first->second;
bool inserted = res.second;
Cell & cell = it->second;
if (inserted)
{
try
{
cell.queue_iterator = queue.insert(queue.end(), key);
}
catch (...)
{
cells.erase(it);
throw;
}
}
else
{
current_size -= cell.size;

View File

@ -97,6 +97,8 @@
M(DistributedConnectionStaleReplica, "") \
M(DistributedConnectionFailAtAll, "Total count when distributed connection fails after all retries finished") \
\
M(HedgedRequestsChangeReplica, "Total count when timeout for changing replica expired in hedged requests.") \
\
M(CompileFunction, "Number of times a compilation of generated LLVM code (to create fused function for complex expressions) was initiated.") \
M(CompiledFunctionExecute, "Number of times a compiled function was executed.") \
M(CompileExpressionsMicroseconds, "Total time spent for compilation of expressions to LLVM code.") \

View File

@ -13,7 +13,7 @@
#define DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC 300
/// Timeouts for hedged requests.
#define DBMS_DEFAULT_HEDGED_CONNECTION_TIMEOUT_MS 100
#define DBMS_DEFAULT_RECEIVE_DATA_TIMEOUT_SEC 2
#define DBMS_DEFAULT_RECEIVE_DATA_TIMEOUT_MS 2000
/// Timeout for synchronous request-result protocol call (like Ping or TablesStatus).
#define DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC 5
#define DBMS_DEFAULT_POLL_INTERVAL 10

View File

@ -55,8 +55,8 @@ class IColumn;
M(Seconds, receive_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "", 0) \
M(Seconds, send_timeout, DBMS_DEFAULT_SEND_TIMEOUT_SEC, "", 0) \
M(Seconds, tcp_keep_alive_timeout, 0, "The time in seconds the connection needs to remain idle before TCP starts sending keepalive probes", 0) \
M(Milliseconds, hedged_connection_timeout, DBMS_DEFAULT_HEDGED_CONNECTION_TIMEOUT_MS, "Connection timeout for establishing connection with replica for Hedged requests", 0) \
M(Seconds, receive_data_timeout, DBMS_DEFAULT_RECEIVE_DATA_TIMEOUT_SEC, "Connection timeout for receiving first packet of data or packet with positive progress from replica", 0) \
M(Milliseconds, hedged_connection_timeout_ms, DBMS_DEFAULT_HEDGED_CONNECTION_TIMEOUT_MS, "Connection timeout for establishing connection with replica for Hedged requests", 0) \
M(Milliseconds, receive_data_timeout_ms, DBMS_DEFAULT_RECEIVE_DATA_TIMEOUT_MS, "Connection timeout for receiving first packet of data or packet with positive progress from replica", 0) \
M(Bool, use_hedged_requests, true, "Use hedged requests for distributed queries", 0) \
M(Bool, allow_changing_replica_until_first_data_packet, false, "Allow HedgedConnections to change replica until receiving first data packet", 0) \
M(Milliseconds, queue_max_wait_ms, 0, "The wait time in the request queue, if the number of concurrent requests exceeds the maximum.", 0) \
@ -366,6 +366,9 @@ class IColumn;
M(Bool, check_query_single_value_result, true, "Return check query result as single 1/0 value", 0) \
M(Bool, allow_drop_detached, false, "Allow ALTER TABLE ... DROP DETACHED PART[ITION] ... queries", 0) \
\
M(UInt64, postgresql_connection_pool_size, 16, "Connection pool size for PostgreSQL table engine and database engine.", 0) \
M(Int64, postgresql_connection_pool_wait_timeout, -1, "Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool.", 0) \
\
M(Seconds, distributed_replica_error_half_life, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD, "Time period reduces replica error counter by 2 times.", 0) \
M(UInt64, distributed_replica_error_cap, DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT, "Max number of errors per replica, prevents piling up an incredible amount of errors if replica was offline for some time and allows it to be reconsidered in a shorter amount of time.", 0) \
M(UInt64, distributed_replica_max_ignored_errors, 0, "Number of errors that will be ignored while choosing replicas", 0) \

View File

@ -14,7 +14,7 @@ AddingDefaultBlockOutputStream::AddingDefaultBlockOutputStream(
: output(output_), header(header_)
{
auto dag = addMissingDefaults(header_, output->getHeader().getNamesAndTypesList(), columns_, context_);
adding_defaults_actions = std::make_shared<ExpressionActions>(std::move(dag));
adding_defaults_actions = std::make_shared<ExpressionActions>(std::move(dag), ExpressionActionsSettings::fromContext(context_));
}
void AddingDefaultBlockOutputStream::write(const Block & block)

View File

@ -174,7 +174,7 @@ Block AddingDefaultsBlockInputStream::readImpl()
auto dag = evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), columns, context, false);
if (dag)
{
auto actions = std::make_shared<ExpressionActions>(std::move(dag));
auto actions = std::make_shared<ExpressionActions>(std::move(dag), ExpressionActionsSettings::fromContext(context));
actions->execute(evaluate_block);
}

View File

@ -28,13 +28,13 @@ namespace ErrorCodes
}
PostgreSQLBlockInputStream::PostgreSQLBlockInputStream(
ConnectionPtr connection_,
PostgreSQLConnectionHolderPtr connection_,
const std::string & query_str_,
const Block & sample_block,
const UInt64 max_block_size_)
: query_str(query_str_)
, max_block_size(max_block_size_)
, connection(connection_)
, connection(std::move(connection_))
{
description.init(sample_block);
for (const auto idx : ext::range(0, description.sample_block.columns()))
@ -48,7 +48,7 @@ PostgreSQLBlockInputStream::PostgreSQLBlockInputStream(
void PostgreSQLBlockInputStream::readPrefix()
{
tx = std::make_unique<pqxx::read_transaction>(*connection);
tx = std::make_unique<pqxx::read_transaction>(connection->conn());
stream = std::make_unique<pqxx::stream_from>(*tx, pqxx::from_query, std::string_view(query_str));
}

View File

@ -9,18 +9,17 @@
#include <DataStreams/IBlockInputStream.h>
#include <Core/ExternalResultDescription.h>
#include <Core/Field.h>
#include <pqxx/pqxx>
#include <Storages/PostgreSQL/PostgreSQLConnectionPool.h>
namespace DB
{
using ConnectionPtr = std::shared_ptr<pqxx::connection>;
class PostgreSQLBlockInputStream : public IBlockInputStream
{
public:
PostgreSQLBlockInputStream(
ConnectionPtr connection_,
PostgreSQLConnectionHolderPtr connection_,
const std::string & query_str,
const Block & sample_block,
const UInt64 max_block_size_);
@ -47,7 +46,7 @@ private:
const UInt64 max_block_size;
ExternalResultDescription description;
ConnectionPtr connection;
PostgreSQLConnectionHolderPtr connection;
std::unique_ptr<pqxx::read_transaction> tx;
std::unique_ptr<pqxx::stream_from> stream;

View File

@ -36,7 +36,7 @@
#if USE_LIBPQXX
#include <Databases/PostgreSQL/DatabasePostgreSQL.h> // Y_IGNORE
#include <Storages/PostgreSQL/PostgreSQLConnection.h>
#include <Storages/PostgreSQL/PostgreSQLConnectionPool.h>
#endif
namespace DB
@ -246,11 +246,15 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
auto parsed_host_port = parseAddress(host_port, 5432);
/// no connection is made here
auto connection = std::make_shared<PostgreSQLConnection>(
postgres_database_name, parsed_host_port.first, parsed_host_port.second, username, password);
auto connection_pool = std::make_shared<PostgreSQLConnectionPool>(
postgres_database_name,
parsed_host_port.first, parsed_host_port.second,
username, password,
context.getSettingsRef().postgresql_connection_pool_size,
context.getSettingsRef().postgresql_connection_pool_wait_timeout);
return std::make_shared<DatabasePostgreSQL>(
context, metadata_path, engine_define, database_name, postgres_database_name, connection, use_table_cache);
context, metadata_path, engine_define, database_name, postgres_database_name, connection_pool, use_table_cache);
}
#endif

View File

@ -5,7 +5,6 @@
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeArray.h>
#include <Storages/StoragePostgreSQL.h>
#include <Storages/PostgreSQL/PostgreSQLConnection.h>
#include <Interpreters/Context.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
@ -17,6 +16,7 @@
#include <Poco/File.h>
#include <Databases/PostgreSQL/fetchPostgreSQLTableStructure.h>
#include <Common/quoteString.h>
#include <Storages/PostgreSQL/PostgreSQLConnectionPool.h>
namespace DB
@ -40,14 +40,14 @@ DatabasePostgreSQL::DatabasePostgreSQL(
const ASTStorage * database_engine_define_,
const String & dbname_,
const String & postgres_dbname,
PostgreSQLConnectionPtr connection_,
PostgreSQLConnectionPoolPtr connection_pool_,
const bool cache_tables_)
: IDatabase(dbname_)
, global_context(context.getGlobalContext())
, metadata_path(metadata_path_)
, database_engine_define(database_engine_define_->clone())
, dbname(postgres_dbname)
, connection(std::move(connection_))
, connection_pool(std::move(connection_pool_))
, cache_tables(cache_tables_)
{
cleaner_task = context.getSchedulePool().createTask("PostgreSQLCleanerTask", [this]{ removeOutdatedTables(); });
@ -90,7 +90,8 @@ std::unordered_set<std::string> DatabasePostgreSQL::fetchTablesList() const
std::unordered_set<std::string> tables;
std::string query = "SELECT tablename FROM pg_catalog.pg_tables "
"WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema'";
pqxx::read_transaction tx(*connection->conn());
auto connection = connection_pool->get();
pqxx::read_transaction tx(connection->conn());
for (auto table_name : tx.stream<std::string>(query))
tables.insert(std::get<0>(table_name));
@ -108,7 +109,8 @@ bool DatabasePostgreSQL::checkPostgresTable(const String & table_name) const
"PostgreSQL table name cannot contain single quote or backslash characters, passed {}", table_name);
}
pqxx::nontransaction tx(*connection->conn());
auto connection = connection_pool->get();
pqxx::nontransaction tx(connection->conn());
try
{
@ -163,13 +165,13 @@ StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, const Conte
return StoragePtr{};
auto use_nulls = context.getSettingsRef().external_table_functions_use_nulls;
auto columns = fetchPostgreSQLTableStructure(connection->conn(), doubleQuoteString(table_name), use_nulls);
auto columns = fetchPostgreSQLTableStructure(connection_pool->get(), doubleQuoteString(table_name), use_nulls);
if (!columns)
return StoragePtr{};
auto storage = StoragePostgreSQL::create(
StorageID(database_name, table_name), table_name, std::make_shared<PostgreSQLConnection>(*connection),
StorageID(database_name, table_name), table_name, std::make_shared<PostgreSQLConnectionPool>(*connection_pool),
ColumnsDescription{*columns}, ConstraintsDescription{}, context);
if (cache_tables)

View File

@ -15,8 +15,8 @@ namespace DB
{
class Context;
class PostgreSQLConnection;
using PostgreSQLConnectionPtr = std::shared_ptr<PostgreSQLConnection>;
class PostgreSQLConnectionPool;
using PostgreSQLConnectionPoolPtr = std::shared_ptr<PostgreSQLConnectionPool>;
/** Real-time access to table list and table structure from remote PostgreSQL.
@ -34,7 +34,7 @@ public:
const ASTStorage * database_engine_define,
const String & dbname_,
const String & postgres_dbname,
PostgreSQLConnectionPtr connection_,
PostgreSQLConnectionPoolPtr connection_pool_,
const bool cache_tables_);
String getEngineName() const override { return "PostgreSQL"; }
@ -72,7 +72,7 @@ private:
String metadata_path;
ASTPtr database_engine_define;
String dbname;
PostgreSQLConnectionPtr connection;
PostgreSQLConnectionPoolPtr connection_pool;
const bool cache_tables;
mutable Tables cached_tables;

View File

@ -94,7 +94,7 @@ static DataTypePtr convertPostgreSQLDataType(std::string & type, bool is_nullabl
std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
std::shared_ptr<pqxx::connection> connection, const String & postgres_table_name, bool use_nulls)
PostgreSQLConnectionHolderPtr connection, const String & postgres_table_name, bool use_nulls)
{
auto columns = NamesAndTypesList();
@ -113,7 +113,7 @@ std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
"AND NOT attisdropped AND attnum > 0", postgres_table_name);
try
{
pqxx::read_transaction tx(*connection);
pqxx::read_transaction tx(connection->conn());
pqxx::stream_from stream(tx, pqxx::from_query, std::string_view(query));
std::tuple<std::string, std::string, std::string, uint16_t> row;
@ -133,7 +133,7 @@ std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
{
throw Exception(fmt::format(
"PostgreSQL table {}.{} does not exist",
connection->dbname(), postgres_table_name), ErrorCodes::UNKNOWN_TABLE);
connection->conn().dbname(), postgres_table_name), ErrorCodes::UNKNOWN_TABLE);
}
catch (Exception & e)
{

View File

@ -12,7 +12,7 @@ namespace DB
{
std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
std::shared_ptr<pqxx::connection> connection, const String & postgres_table_name, bool use_nulls);
PostgreSQLConnectionHolderPtr connection, const String & postgres_table_name, bool use_nulls);
}

View File

@ -306,14 +306,6 @@ bool DictionaryStructure::isKeySizeFixed() const
return true;
}
size_t DictionaryStructure::getKeySize() const
{
return std::accumulate(std::begin(*key), std::end(*key), size_t{}, [](const auto running_size, const auto & key_i)
{
return running_size + key_i.type->getSizeOfValueInMemory();
});
}
Strings DictionaryStructure::getKeysNames() const
{
if (id)

View File

@ -161,12 +161,12 @@ struct DictionaryStructure final
const DictionaryAttribute & getAttribute(const std::string & attribute_name) const;
const DictionaryAttribute & getAttribute(const std::string & attribute_name, const DataTypePtr & type) const;
Strings getKeysNames() const;
size_t getKeysSize() const;
std::string getKeyDescription() const;
bool isKeySizeFixed() const;
size_t getKeySize() const;
Strings getKeysNames() const;
private:
/// range_min and range_max have to be parsed before this function call

View File

@ -384,41 +384,12 @@ void HashedDictionary::loadData()
{
if (!source_ptr->hasUpdateField())
{
/// atomic since progress callbac called in parallel
std::atomic<uint64_t> new_size = 0;
auto stream = source_ptr->loadAll();
/// preallocation can be used only when we know number of rows, for this we need:
/// - source clickhouse
/// - no filtering (i.e. lack of <where>), since filtering can filter
/// too much rows and eventually it may allocate memory that will
/// never be used.
bool preallocate = false;
if (const auto & clickhouse_source = dynamic_cast<ClickHouseDictionarySource *>(source_ptr.get()))
{
if (!clickhouse_source->hasWhere())
preallocate = true;
}
if (preallocate)
{
stream->setProgressCallback([&new_size](const Progress & progress)
{
new_size += progress.total_rows_to_read;
});
}
stream->readPrefix();
while (const auto block = stream->read())
{
if (new_size)
{
size_t current_new_size = new_size.exchange(0);
if (current_new_size)
resize(current_new_size);
}
else
resize(block.rows());
blockToAttributes(block);
}

View File

@ -1,4 +1,8 @@
#include "PolygonDictionary.h"
#include <numeric>
#include <cmath>
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
@ -8,8 +12,6 @@
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypesDecimal.h>
#include <numeric>
namespace DB
{
@ -35,63 +37,9 @@ IPolygonDictionary::IPolygonDictionary(
, input_type(input_type_)
, point_type(point_type_)
{
createAttributes();
setup();
loadData();
}
std::string IPolygonDictionary::getTypeName() const
{
return "Polygon";
}
std::string IPolygonDictionary::getKeyDescription() const
{
return dict_struct.getKeyDescription();
}
size_t IPolygonDictionary::getBytesAllocated() const
{
return bytes_allocated;
}
size_t IPolygonDictionary::getQueryCount() const
{
return query_count.load(std::memory_order_relaxed);
}
double IPolygonDictionary::getHitRate() const
{
return 1.0;
}
size_t IPolygonDictionary::getElementCount() const
{
return element_count;
}
double IPolygonDictionary::getLoadFactor() const
{
return 1.0;
}
const IDictionarySource * IPolygonDictionary::getSource() const
{
return source_ptr.get();
}
const DictionaryLifetime & IPolygonDictionary::getLifetime() const
{
return dict_lifetime;
}
const DictionaryStructure & IPolygonDictionary::getStructure() const
{
return dict_struct;
}
bool IPolygonDictionary::isInjective(const std::string &) const
{
return false;
calculateBytesAllocated();
}
ColumnPtr IPolygonDictionary::getColumn(
@ -101,50 +49,101 @@ ColumnPtr IPolygonDictionary::getColumn(
const DataTypes &,
const ColumnPtr & default_values_column) const
{
ColumnPtr result;
const auto requested_key_points = extractPoints(key_columns);
const auto index = getAttributeIndex(attribute_name);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
const auto & attribute = dict_struct.getAttribute(attribute_name, result_type);
bool complex_attribute = attribute.is_nullable || attribute.is_array;
DefaultValueProvider default_value_provider(attribute.null_value, default_values_column);
auto keys_size = key_columns.front()->size();
size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
const auto & attribute_values_column = attributes[attribute_index];
auto result = attribute_values_column->cloneEmpty();
result->reserve(requested_key_points.size());
Field row_value_to_insert;
size_t polygon_index = 0;
if (unlikely(complex_attribute))
{
for (size_t requested_key_index = 0; requested_key_index < requested_key_points.size(); ++requested_key_index)
{
const auto found = find(requested_key_points[requested_key_index], polygon_index);
if (found)
{
size_t attribute_values_index = polygon_index_to_attribute_value_index[polygon_index];
attribute_values_column->get(attribute_values_index, row_value_to_insert);
}
else
row_value_to_insert = default_value_provider.getDefaultValue(requested_key_index);
result->insert(row_value_to_insert);
}
}
else
{
auto type_call = [&](const auto & dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnType = std::conditional_t<
std::is_same_v<AttributeType, String>,
ColumnString,
std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<ValueType>, ColumnVector<AttributeType>>>;
const auto & null_value = std::get<AttributeType>(null_values[index]);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
const auto attribute_values_column_typed = typeid_cast<const ColumnType *>(attribute_values_column.get());
if (!attribute_values_column_typed)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "An attribute type should be same as dictionary type");
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
ColumnType & result_column_typed = static_cast<ColumnType &>(*result);
if constexpr (std::is_same_v<AttributeType, String>)
if constexpr (std::is_same_v<ColumnType, ColumnString>)
{
auto column_string = ColumnString::create();
auto * out = column.get();
for (size_t requested_key_index = 0; requested_key_index < requested_key_points.size(); ++requested_key_index)
{
const auto found = find(requested_key_points[requested_key_index], polygon_index);
getItemsImpl<String, StringRef>(
index,
key_columns,
[&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); },
default_value_extractor);
if (found)
{
size_t attribute_values_index = polygon_index_to_attribute_value_index[polygon_index];
auto data_to_insert = attribute_values_column->getDataAt(attribute_values_index);
result_column_typed.insertData(data_to_insert.data, data_to_insert.size);
}
else
result_column_typed.insert(default_value_provider.getDefaultValue(requested_key_index));
}
}
else
{
auto & out = column->getData();
auto & attribute_data = attribute_values_column_typed->getData();
auto & result_data = result_column_typed.getData();
getItemsImpl<AttributeType, AttributeType>(
index,
key_columns,
[&](const size_t row, const auto value) { return out[row] = value; },
default_value_extractor);
for (size_t requested_key_index = 0; requested_key_index < requested_key_points.size(); ++requested_key_index)
{
const auto found = find(requested_key_points[requested_key_index], polygon_index);
if (found)
{
size_t attribute_values_index = polygon_index_to_attribute_value_index[polygon_index];
auto & item = attribute_data[attribute_values_index];
result_data.emplace_back(item);
}
else
{
row_value_to_insert = default_value_provider.getDefaultValue(requested_key_index);
result_data.emplace_back(row_value_to_insert.template get<NearestFieldType<ValueType>>());
}
}
}
result = std::move(column);
};
callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
}
query_count.fetch_add(requested_key_points.size(), std::memory_order_relaxed);
return result;
}
@ -156,75 +155,16 @@ BlockInputStreamPtr IPolygonDictionary::getBlockInputStream(const Names &, size_
throw Exception{"Reading the dictionary is not allowed", ErrorCodes::UNSUPPORTED_METHOD};
}
template <typename T>
void IPolygonDictionary::appendNullValueImpl(const Field & null_value)
void IPolygonDictionary::setup()
{
null_values.emplace_back(T(null_value.get<NearestFieldType<T>>()));
}
attributes.reserve(dict_struct.attributes.size());
void IPolygonDictionary::appendNullValue(AttributeUnderlyingType type, const Field & null_value)
for (const auto & attribute : dict_struct.attributes)
{
switch (type)
{
case AttributeUnderlyingType::utUInt8:
appendNullValueImpl<UInt8>(null_value);
break;
case AttributeUnderlyingType::utUInt16:
appendNullValueImpl<UInt16>(null_value);
break;
case AttributeUnderlyingType::utUInt32:
appendNullValueImpl<UInt32>(null_value);
break;
case AttributeUnderlyingType::utUInt64:
appendNullValueImpl<UInt64>(null_value);
break;
case AttributeUnderlyingType::utUInt128:
appendNullValueImpl<UInt128>(null_value);
break;
case AttributeUnderlyingType::utInt8:
appendNullValueImpl<Int8>(null_value);
break;
case AttributeUnderlyingType::utInt16:
appendNullValueImpl<Int16>(null_value);
break;
case AttributeUnderlyingType::utInt32:
appendNullValueImpl<Int32>(null_value);
break;
case AttributeUnderlyingType::utInt64:
appendNullValueImpl<Int64>(null_value);
break;
case AttributeUnderlyingType::utFloat32:
appendNullValueImpl<Float32>(null_value);
break;
case AttributeUnderlyingType::utFloat64:
appendNullValueImpl<Float64>(null_value);
break;
case AttributeUnderlyingType::utDecimal32:
appendNullValueImpl<Decimal32>(null_value);
break;
case AttributeUnderlyingType::utDecimal64:
appendNullValueImpl<Decimal64>(null_value);
break;
case AttributeUnderlyingType::utDecimal128:
appendNullValueImpl<Decimal128>(null_value);
break;
case AttributeUnderlyingType::utString:
appendNullValueImpl<String>(null_value);
break;
}
}
auto column = attribute.type->createColumn();
attributes.emplace_back(std::move(column));
void IPolygonDictionary::createAttributes()
{
attributes.resize(dict_struct.attributes.size());
for (size_t i = 0; i < dict_struct.attributes.size(); ++i)
{
const auto & attr = dict_struct.attributes[i];
attribute_index_by_name.emplace(attr.name, i);
appendNullValue(attr.underlying_type, attr.null_value);
if (attr.hierarchical)
if (attribute.hierarchical)
throw Exception{ErrorCodes::TYPE_MISMATCH,
"{}: hierarchical attributes not supported for dictionary of polygonal type",
getDictionaryID().getNameForLogs()};
@ -234,22 +174,20 @@ void IPolygonDictionary::createAttributes()
void IPolygonDictionary::blockToAttributes(const DB::Block & block)
{
const auto rows = block.rows();
element_count += rows;
size_t skip_key_column_offset = 1;
for (size_t i = 0; i < attributes.size(); ++i)
{
const auto & column = block.safeGetByPosition(i + 1);
if (attributes[i])
{
MutableColumnPtr mutated = IColumn::mutate(std::move(attributes[i]));
mutated->insertRangeFrom(*column.column, 0, column.column->size());
attributes[i] = std::move(mutated);
}
else
attributes[i] = column.column;
const auto & block_column = block.safeGetByPosition(i + skip_key_column_offset);
const auto & column = block_column.column;
attributes[i]->assumeMutable()->insertRangeFrom(*column, 0, column->size());
}
/** Multi-polygons could cause bigger sizes, but this is better than nothing. */
polygons.reserve(polygons.size() + rows);
ids.reserve(ids.size() + rows);
polygon_index_to_attribute_value_index.reserve(polygon_index_to_attribute_value_index.size() + rows);
const auto & key = block.safeGetByPosition(0).column;
extractPolygons(key);
}
@ -262,114 +200,104 @@ void IPolygonDictionary::loadData()
blockToAttributes(block);
stream->readSuffix();
std::vector<double> areas;
areas.reserve(polygons.size());
/// Correct and sort polygons by area and update polygon_index_to_attribute_value_index after sort
PaddedPODArray<double> areas;
areas.resize_fill(polygons.size());
std::vector<std::pair<Polygon, size_t>> polygon_ids;
polygon_ids.reserve(polygons.size());
for (size_t i = 0; i < polygons.size(); ++i)
{
auto & polygon = polygons[i];
bg::correct(polygon);
areas.push_back(bg::area(polygon));
areas[i] = bg::area(polygon);
polygon_ids.emplace_back(polygon, i);
}
sort(polygon_ids.begin(), polygon_ids.end(), [& areas](const auto & lhs, const auto & rhs)
std::sort(polygon_ids.begin(), polygon_ids.end(), [& areas](const auto & lhs, const auto & rhs)
{
return areas[lhs.second] < areas[rhs.second];
});
std::vector<size_t> correct_ids;
correct_ids.reserve(polygon_ids.size());
for (size_t i = 0; i < polygon_ids.size(); ++i)
{
auto & polygon = polygon_ids[i];
correct_ids.emplace_back(ids[polygon.second]);
correct_ids.emplace_back(polygon_index_to_attribute_value_index[polygon.second]);
polygons[i] = polygon.first;
}
ids = correct_ids;
polygon_index_to_attribute_value_index = std::move(correct_ids);
}
void IPolygonDictionary::calculateBytesAllocated()
{
// TODO:: Account for key.
/// Index allocated by subclass not counted because it take a small part in relation to attributes and polygons
for (const auto & column : attributes)
bytes_allocated += column->allocatedBytes();
for (auto & polygon : polygons)
bytes_allocated += bg::num_points(polygon) * sizeof(Point);
}
std::vector<IPolygonDictionary::Point> IPolygonDictionary::extractPoints(const Columns & key_columns)
{
if (key_columns.size() != 2)
throw Exception{"Expected two columns of coordinates", ErrorCodes::BAD_ARGUMENTS};
throw Exception{"Expected two columns of coordinates with type Float64", ErrorCodes::BAD_ARGUMENTS};
const auto * column_x = typeid_cast<const ColumnVector<Float64>*>(key_columns[0].get());
const auto * column_y = typeid_cast<const ColumnVector<Float64>*>(key_columns[1].get());
if (!column_x || !column_y)
throw Exception{"Expected columns of Float64", ErrorCodes::TYPE_MISMATCH};
const auto rows = key_columns.front()->size();
std::vector<Point> result;
result.reserve(rows);
for (const auto row : ext::range(0, rows))
result.emplace_back(column_x->getElement(row), column_y->getElement(row));
{
auto x = column_x->getElement(row);
auto y = column_y->getElement(row);
if (isNaN(x) || isNaN(y))
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"PolygonDictionary input point component must not be NaN");
if (isinf(x) || isinf(y))
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"PolygonDictionary input point component must not be infinite");
result.emplace_back(x, y);
}
return result;
}
ColumnUInt8::Ptr IPolygonDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
{
auto size = key_columns.front()->size();
auto result = ColumnUInt8::create(size);
std::vector<IPolygonDictionary::Point> points = extractPoints(key_columns);
auto result = ColumnUInt8::create(points.size());
auto& out = result->getData();
size_t row = 0;
for (const auto & pt : extractPoints(key_columns))
for (size_t i = 0; i < points.size(); ++i)
{
size_t trash = 0;
out[row] = find(pt, trash);
++row;
}
query_count.fetch_add(row, std::memory_order_relaxed);
return result;
}
size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name) const
{
const auto it = attribute_index_by_name.find(attribute_name);
if (it == attribute_index_by_name.end())
throw Exception{"No such attribute: " + attribute_name, ErrorCodes::BAD_ARGUMENTS};
return it->second;
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void IPolygonDictionary::getItemsImpl(
size_t attribute_ind,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto points = extractPoints(key_columns);
using ColVecType = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
using ColType = std::conditional_t<std::is_same<AttributeType, String>::value, ColumnString, ColVecType>;
const auto column = typeid_cast<const ColType *>(attributes[attribute_ind].get());
if (!column)
throw Exception{"An attribute should be a column of its type", ErrorCodes::BAD_ARGUMENTS};
for (const auto i : ext::range(0, points.size()))
{
size_t id = 0;
const auto found = find(points[i], id);
id = ids[id];
if (!found)
{
set_value(i, static_cast<OutputType>(default_value_extractor[i]));
continue;
}
if constexpr (std::is_same<AttributeType, String>::value)
set_value(i, static_cast<OutputType>(column->getDataAt(id)));
else
set_value(i, static_cast<OutputType>(column->getElement(id)));
size_t unused_find_result = 0;
auto & point = points[i];
out[i] = find(point, unused_find_result);
}
query_count.fetch_add(points.size(), std::memory_order_relaxed);
return result;
}
namespace
@ -531,7 +459,7 @@ void handlePointsReprByTuples(const IColumn * column, Data & data, Offset & offs
void IPolygonDictionary::extractPolygons(const ColumnPtr & column)
{
Data data = {polygons, ids};
Data data = {polygons, polygon_index_to_attribute_value_index};
Offset offset;
const IColumn * points_collection = nullptr;

View File

@ -57,27 +57,25 @@ public:
InputType input_type_,
PointType point_type_);
std::string getTypeName() const override;
std::string getTypeName() const override { return "Polygon"; }
std::string getKeyDescription() const;
size_t getBytesAllocated() const override { return bytes_allocated; }
size_t getBytesAllocated() const override;
size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
size_t getQueryCount() const override;
double getHitRate() const override { return 1.0; }
double getHitRate() const override;
size_t getElementCount() const override { return attributes.empty() ? 0 : attributes.front()->size(); }
size_t getElementCount() const override;
double getLoadFactor() const override { return 1.0; }
double getLoadFactor() const override;
const IDictionarySource * getSource() const override { return source_ptr.get(); }
const IDictionarySource * getSource() const override;
const DictionaryStructure & getStructure() const override { return dict_struct; }
const DictionaryStructure & getStructure() const override;
const DictionaryLifetime & getLifetime() const override { return dict_lifetime; }
const DictionaryLifetime & getLifetime() const override;
bool isInjective(const std::string & attribute_name) const override;
bool isInjective(const std::string & attribute_name) const override { return dict_struct.getAttribute(attribute_name).injective; }
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
@ -106,13 +104,9 @@ protected:
* If true id is set to the index of a polygon containing the given point.
* Overridden in different implementations of this interface.
*/
virtual bool find(const Point & point, size_t & id) const = 0;
virtual bool find(const Point & point, size_t & polygon_index) const = 0;
std::vector<Polygon> polygons;
/** Since the original data may have been in the form of multi-polygons, an id is stored for each single polygon
* corresponding to the row in which any other attributes for this entry are located.
*/
std::vector<size_t> ids;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;
@ -126,7 +120,7 @@ private:
* The polygons serving as keys are extracted into boost types.
* All other values are stored in one column per attribute.
*/
void createAttributes();
void setup();
void blockToAttributes(const Block & block);
void loadData();
@ -135,13 +129,6 @@ private:
/** Checks whether a given attribute exists and returns its index */
size_t getAttributeIndex(const std::string & attribute_name) const;
/** Helper functions to retrieve and instantiate the provided null value of an attribute.
* Since a null value is obligatory for every attribute they are simply appended to null_values defined below.
*/
template <typename T>
void appendNullValueImpl(const Field & null_value);
void appendNullValue(AttributeUnderlyingType type, const Field & value);
/** Helper function for retrieving the value of an attribute by key. */
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
@ -150,32 +137,16 @@ private:
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
/** A mapping from the names of the attributes to their index in the two vectors defined below. */
std::map<std::string, size_t> attribute_index_by_name;
/** A vector of columns storing the values of each attribute. */
Columns attributes;
/** A vector of null values corresponding to each attribute. */
std::vector<std::variant<
UInt8,
UInt16,
UInt32,
UInt64,
UInt128,
Int8,
Int16,
Int32,
Int64,
Decimal32,
Decimal64,
Decimal128,
Float32,
Float64,
String>> null_values;
size_t bytes_allocated = 0;
size_t element_count = 0;
mutable std::atomic<size_t> query_count{0};
/** Since the original data may have been in the form of multi-polygons, an id is stored for each single polygon
* corresponding to the row in which any other attributes for this entry are located.
*/
std::vector<size_t> polygon_index_to_attribute_value_index;
/** Extracts a list of polygons from a column according to input_type and point_type.
* The polygons are appended to the dictionary with the corresponding ids.
*/

View File

@ -39,14 +39,14 @@ std::shared_ptr<const IExternalLoadable> PolygonDictionarySimple::clone() const
this->point_type);
}
bool PolygonDictionarySimple::find(const Point & point, size_t & id) const
bool PolygonDictionarySimple::find(const Point & point, size_t & polygon_index) const
{
bool found = false;
for (size_t i = 0; i < polygons.size(); ++i)
{
if (bg::covered_by(point, polygons[i]))
{
id = i;
polygon_index = i;
found = true;
break;
}
@ -90,7 +90,7 @@ std::shared_ptr<const IExternalLoadable> PolygonDictionaryIndexEach::clone() con
this->max_depth);
}
bool PolygonDictionaryIndexEach::find(const Point & point, size_t & id) const
bool PolygonDictionaryIndexEach::find(const Point & point, size_t & polygon_index) const
{
const auto * cell = grid.find(point.x(), point.y());
if (cell)
@ -100,13 +100,13 @@ bool PolygonDictionaryIndexEach::find(const Point & point, size_t & id) const
size_t unused;
if (buckets[candidate].find(point, unused))
{
id = candidate;
polygon_index = candidate;
return true;
}
}
if (cell->first_covered != FinalCell::kNone)
{
id = cell->first_covered;
polygon_index = cell->first_covered;
return true;
}
}
@ -142,19 +142,19 @@ std::shared_ptr<const IExternalLoadable> PolygonDictionaryIndexCell::clone() con
this->max_depth);
}
bool PolygonDictionaryIndexCell::find(const Point & point, size_t & id) const
bool PolygonDictionaryIndexCell::find(const Point & point, size_t & polygon_index) const
{
const auto * cell = index.find(point.x(), point.y());
if (cell)
{
if (!(cell->corresponding_ids).empty() && cell->index.find(point, id))
if (!(cell->corresponding_ids).empty() && cell->index.find(point, polygon_index))
{
id = cell->corresponding_ids[id];
polygon_index = cell->corresponding_ids[polygon_index];
return true;
}
if (cell->first_covered != FinalCellWithSlabs::kNone)
{
id = cell->first_covered;
polygon_index = cell->first_covered;
return true;
}
}

View File

@ -27,7 +27,7 @@ public:
std::shared_ptr<const IExternalLoadable> clone() const override;
private:
bool find(const Point & point, size_t & id) const override;
bool find(const Point & point, size_t & polygon_index) const override;
};
/** A polygon dictionary which generates a recursive grid in order to efficiently cut the number
@ -55,7 +55,7 @@ public:
static constexpr size_t kMaxDepthDefault = 5;
private:
bool find(const Point & point, size_t & id) const override;
bool find(const Point & point, size_t & polygon_index) const override;
std::vector<SlabsPolygonIndex> buckets;
GridRoot<FinalCell> grid;
@ -84,7 +84,7 @@ public:
static constexpr size_t kMaxDepthDefault = 5;
private:
bool find(const Point & point, size_t & id) const override;
bool find(const Point & point, size_t & polygon_index) const override;
GridRoot<FinalCellWithSlabs> index;

View File

@ -90,7 +90,6 @@ std::vector<Coord> SlabsPolygonIndex::uniqueX(const std::vector<Polygon> & polyg
std::sort(all_x.begin(), all_x.end());
all_x.erase(std::unique(all_x.begin(), all_x.end()), all_x.end());
LOG_TRACE(log, "Found {} unique x coordinates", all_x.size());
return all_x;
}
@ -112,8 +111,6 @@ void SlabsPolygonIndex::indexBuild(const std::vector<Polygon> & polygons)
/** Total number of edges */
size_t m = all_edges.size();
LOG_TRACE(log, "Just sorted {} edges from all {} polygons", all_edges.size(), polygons.size());
/** Using custom comparator for fetching edges in right_point order, like in scanline */
auto cmp = [](const Edge & a, const Edge & b)
{
@ -180,8 +177,6 @@ void SlabsPolygonIndex::indexBuild(const std::vector<Polygon> & polygons)
}
}
}
LOG_TRACE(log, "Polygon index is built, total_index_edges = {}", total_index_edges);
}
void SlabsPolygonIndex::indexAddRing(const Ring & ring, size_t polygon_id)

View File

@ -73,7 +73,7 @@ public:
private:
/** Returns unique x coordinates among all points */
std::vector<Coord> uniqueX(const std::vector<Polygon> & polygons);
static std::vector<Coord> uniqueX(const std::vector<Polygon> & polygons);
/** Builds index described above */
void indexBuild(const std::vector<Polygon> & polygons);

View File

@ -75,17 +75,18 @@ namespace ErrorCodes
class FunctionDictHelper
{
public:
explicit FunctionDictHelper(const Context & context_) : context(context_), external_loader(context.getExternalDictionariesLoader()) {}
explicit FunctionDictHelper(const Context & context_) : context(context_) {}
std::shared_ptr<const IDictionaryBase> getDictionary(const String & dictionary_name)
{
String resolved_name = DatabaseCatalog::instance().resolveDictionaryName(dictionary_name);
auto dict = external_loader.getDictionary(resolved_name);
auto dict = context.getExternalDictionariesLoader().getDictionary(dictionary_name, context);
if (!access_checked)
{
context.checkAccess(AccessType::dictGet, dict->getDatabaseOrNoDatabaseTag(), dict->getDictionaryID().getTableName());
access_checked = true;
}
return dict;
}
@ -117,16 +118,11 @@ public:
DictionaryStructure getDictionaryStructure(const String & dictionary_name) const
{
String resolved_name = DatabaseCatalog::instance().resolveDictionaryName(dictionary_name);
auto load_result = external_loader.getLoadResult(resolved_name);
if (!load_result.config)
throw Exception("Dictionary " + backQuote(dictionary_name) + " not found", ErrorCodes::BAD_ARGUMENTS);
return ExternalDictionariesLoader::getDictionaryStructure(*load_result.config);
return context.getExternalDictionariesLoader().getDictionaryStructure(dictionary_name, context);
}
private:
const Context & context;
const ExternalDictionariesLoader & external_loader;
/// Access cannot be not granted, since in this case checkAccess() will throw and access_checked will not be updated.
std::atomic<bool> access_checked = false;
@ -296,10 +292,12 @@ public:
DataTypes types;
auto dictionary_structure = helper.getDictionaryStructure(dictionary_name);
for (auto & attribute_name : attribute_names)
{
/// We're extracting the return type from the dictionary's config, without loading the dictionary.
auto attribute = helper.getDictionaryStructure(dictionary_name).getAttribute(attribute_name);
auto attribute = dictionary_structure.getAttribute(attribute_name);
types.emplace_back(attribute.type);
}

View File

@ -23,8 +23,8 @@ inline ConnectionTimeouts ConnectionTimeouts::getTCPTimeoutsWithFailover(const S
settings.tcp_keep_alive_timeout,
0,
settings.connect_timeout_with_failover_secure_ms,
settings.hedged_connection_timeout,
settings.receive_data_timeout);
settings.hedged_connection_timeout_ms,
settings.receive_data_timeout_ms);
}
inline ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(const Context & context)

View File

@ -20,7 +20,6 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int DUPLICATE_COLUMN;
extern const int UNKNOWN_IDENTIFIER;
extern const int TYPE_MISMATCH;
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
@ -32,7 +31,7 @@ namespace ErrorCodes
ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs_)
{
for (const auto & input : inputs_)
addInput(input.name, input.type, true);
index.push_back(&addInput(input.name, input.type));
}
ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_)
@ -41,7 +40,7 @@ ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_)
{
if (input.column && isColumnConst(*input.column))
{
addInput(input, true);
addInput(input);
/// Here we also add column.
/// It will allow to remove input which is actually constant (after projection).
@ -49,49 +48,34 @@ ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_)
/// without any respect to header structure. So, it is a way to drop materialized column and use
/// constant value from header.
/// We cannot remove such input right now cause inputs positions are important in some cases.
addColumn(input, true);
index.push_back(&addColumn(input));
}
else
addInput(input.name, input.type, true);
index.push_back(&addInput(input.name, input.type));
}
}
ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace, bool add_to_index)
ActionsDAG::Node & ActionsDAG::addNode(Node node)
{
auto it = index.find(node.result_name);
if (it != index.end() && !can_replace && add_to_index)
throw Exception("Column '" + node.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
auto & res = nodes.emplace_back(std::move(node));
if (res.type == ActionType::INPUT)
inputs.emplace_back(&res);
if (add_to_index)
index.replace(&res);
return res;
}
ActionsDAG::Node & ActionsDAG::getNode(const std::string & name)
{
auto it = index.find(name);
if (it == index.end())
throw Exception("Unknown identifier: '" + name + "'", ErrorCodes::UNKNOWN_IDENTIFIER);
return **it;
}
const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type, bool can_replace, bool add_to_index)
const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type)
{
Node node;
node.type = ActionType::INPUT;
node.result_type = std::move(type);
node.result_name = std::move(name);
return addNode(std::move(node), can_replace, add_to_index);
return addNode(std::move(node));
}
const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column, bool can_replace, bool add_to_index)
const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column)
{
Node node;
node.type = ActionType::INPUT;
@ -99,10 +83,10 @@ const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column, bool
node.result_name = std::move(column.name);
node.column = std::move(column.column);
return addNode(std::move(node), can_replace, add_to_index);
return addNode(std::move(node));
}
const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, bool can_replace, bool materialize)
const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column)
{
if (!column.column)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add column {} because it is nullptr", column.name);
@ -113,30 +97,10 @@ const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, boo
node.result_name = std::move(column.name);
node.column = std::move(column.column);
auto * res = &addNode(std::move(node), can_replace, !materialize);
if (materialize)
{
auto & name = res->result_name;
FunctionOverloadResolverPtr func_builder_materialize =
std::make_shared<FunctionOverloadResolverAdaptor>(
std::make_unique<DefaultOverloadResolver>(
std::make_shared<FunctionMaterialize>()));
res = &addFunction(func_builder_materialize, {res}, {}, true, false);
res = &addAlias(*res, name, true);
return addNode(std::move(node));
}
return *res;
}
const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::string alias, bool can_replace)
{
return addAlias(getNode(name), alias, can_replace);
}
ActionsDAG::Node & ActionsDAG::addAlias(Node & child, std::string alias, bool can_replace)
const ActionsDAG::Node & ActionsDAG::addAlias(const Node & child, std::string alias)
{
Node node;
node.type = ActionType::ALIAS;
@ -145,13 +109,11 @@ ActionsDAG::Node & ActionsDAG::addAlias(Node & child, std::string alias, bool ca
node.column = child.column;
node.children.emplace_back(&child);
return addNode(std::move(node), can_replace);
return addNode(std::move(node));
}
const ActionsDAG::Node & ActionsDAG::addArrayJoin(const std::string & source_name, std::string result_name)
const ActionsDAG::Node & ActionsDAG::addArrayJoin(const Node & child, std::string result_name)
{
auto & child = getNode(source_name);
const DataTypeArray * array_type = typeid_cast<const DataTypeArray *>(child.result_type.get());
if (!array_type)
throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH);
@ -167,37 +129,8 @@ const ActionsDAG::Node & ActionsDAG::addArrayJoin(const std::string & source_nam
const ActionsDAG::Node & ActionsDAG::addFunction(
const FunctionOverloadResolverPtr & function,
const Names & argument_names,
std::string result_name,
const Context & context [[maybe_unused]],
bool can_replace)
{
const auto & all_settings = context.getSettingsRef();
settings.max_temporary_columns = all_settings.max_temporary_columns;
settings.max_temporary_non_const_columns = all_settings.max_temporary_non_const_columns;
#if USE_EMBEDDED_COMPILER
settings.compile_expressions = all_settings.compile_expressions;
settings.min_count_to_compile_expression = all_settings.min_count_to_compile_expression;
if (!compilation_cache)
compilation_cache = context.getCompiledExpressionCache();
#endif
Inputs children;
children.reserve(argument_names.size());
for (const auto & name : argument_names)
children.push_back(&getNode(name));
return addFunction(function, children, std::move(result_name), can_replace);
}
ActionsDAG::Node & ActionsDAG::addFunction(
const FunctionOverloadResolverPtr & function,
Inputs children,
std::string result_name,
bool can_replace,
bool add_to_index)
NodeRawConstPtrs children,
std::string result_name)
{
size_t num_arguments = children.size();
@ -211,7 +144,7 @@ ActionsDAG::Node & ActionsDAG::addFunction(
for (size_t i = 0; i < num_arguments; ++i)
{
auto & child = *node.children[i];
const auto & child = *node.children[i];
ColumnWithTypeAndName argument;
argument.column = child.column;
@ -229,10 +162,7 @@ ActionsDAG::Node & ActionsDAG::addFunction(
node.function = node.function_base->prepare(arguments);
/// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function.
/// But if we compile expressions compiled version of this function maybe placed in cache,
/// so we don't want to unfold non deterministic functions
if (all_const && node.function_base->isSuitableForConstantFolding()
&& (!settings.compile_expressions || node.function_base->isDeterministic()))
if (all_const && node.function_base->isSuitableForConstantFolding())
{
size_t num_rows = arguments.empty() ? 0 : arguments.front().column->size();
auto col = node.function->execute(arguments, node.result_type, num_rows, true);
@ -277,9 +207,39 @@ ActionsDAG::Node & ActionsDAG::addFunction(
node.result_name = std::move(result_name);
return addNode(std::move(node), can_replace, add_to_index);
return addNode(std::move(node));
}
const ActionsDAG::Node & ActionsDAG::findInIndex(const std::string & name) const
{
if (const auto * node = tryFindInIndex(name))
return *node;
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier: '{}'", name);
}
const ActionsDAG::Node * ActionsDAG::tryFindInIndex(const std::string & name) const
{
for (const auto & node : index)
if (node->result_name == name)
return node;
return nullptr;
}
void ActionsDAG::addOrReplaceInIndex(const Node & node)
{
for (auto & index_node : index)
{
if (index_node->result_name == node.result_name)
{
index_node = &node;
return;
}
}
index.push_back(&node);
}
NamesAndTypesList ActionsDAG::getRequiredColumns() const
{
@ -331,37 +291,53 @@ std::string ActionsDAG::dumpNames() const
return out.str();
}
void ActionsDAG::removeUnusedActions(const NameSet & required_names)
{
NodeRawConstPtrs required_nodes;
required_nodes.reserve(required_names.size());
NameSet added;
for (const auto & node : index)
{
if (required_names.count(node->result_name) && added.count(node->result_name) == 0)
{
required_nodes.push_back(node);
added.insert(node->result_name);
}
}
if (added.size() < required_names.size())
{
for (const auto & name : required_names)
if (added.count(name) == 0)
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Unknown column: {}, there are only columns {}", name, dumpNames());
}
index.swap(required_nodes);
removeUnusedActions();
}
void ActionsDAG::removeUnusedActions(const Names & required_names)
{
std::unordered_set<Node *> nodes_set;
std::vector<Node *> required_nodes;
NodeRawConstPtrs required_nodes;
required_nodes.reserve(required_names.size());
std::unordered_map<std::string_view, const Node *> names_map;
for (const auto * node : index)
names_map[node->result_name] = node;
for (const auto & name : required_names)
{
auto it = index.find(name);
if (it == index.end())
auto it = names_map.find(name);
if (it == names_map.end())
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Unknown column: {}, there are only columns {}", name, dumpNames());
"Unknown column: {}, there are only columns {}", name, dumpDAG());
if (nodes_set.insert(*it).second)
required_nodes.push_back(*it);
}
removeUnusedActions(required_nodes);
}
void ActionsDAG::removeUnusedActions(const std::vector<Node *> & required_nodes)
{
{
Index new_index;
for (auto * node : required_nodes)
new_index.insert(node);
index.swap(new_index);
required_nodes.push_back(it->second);
}
index.swap(required_nodes);
removeUnusedActions();
}
@ -370,10 +346,10 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs)
std::unordered_set<const Node *> visited_nodes;
std::stack<Node *> stack;
for (auto * node : index)
for (const auto * node : index)
{
visited_nodes.insert(node);
stack.push(node);
stack.push(const_cast<Node *>(node));
}
for (auto & node : nodes)
@ -406,11 +382,11 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs)
node->children.clear();
}
for (auto * child : node->children)
for (const auto * child : node->children)
{
if (visited_nodes.count(child) == 0)
{
stack.push(child);
stack.push(const_cast<Node *>(child));
visited_nodes.insert(child);
}
}
@ -421,22 +397,29 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs)
inputs.erase(it, inputs.end());
}
void ActionsDAG::addAliases(const NamesWithAliases & aliases, std::vector<Node *> & result_nodes)
void ActionsDAG::addAliases(const NamesWithAliases & aliases)
{
std::vector<Node *> required_nodes;
std::unordered_map<std::string_view, size_t> names_map;
for (size_t i = 0; i < index.size(); ++i)
names_map[index[i]->result_name] = i;
NodeRawConstPtrs required_nodes;
required_nodes.reserve(aliases.size());
for (const auto & item : aliases)
{
auto & child = getNode(item.first);
required_nodes.push_back(&child);
}
auto it = names_map.find(item.first);
if (it == names_map.end())
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Unknown column: {}, there are only columns {}", item.first, dumpNames());
result_nodes.reserve(aliases.size());
required_nodes.push_back(index[it->second]);
}
for (size_t i = 0; i < aliases.size(); ++i)
{
const auto & item = aliases[i];
auto * child = required_nodes[i];
const auto * child = required_nodes[i];
if (!item.second.empty() && item.first != item.second)
{
@ -447,32 +430,66 @@ void ActionsDAG::addAliases(const NamesWithAliases & aliases, std::vector<Node *
node.column = child->column;
node.children.emplace_back(child);
auto & alias = addNode(std::move(node), true);
result_nodes.push_back(&alias);
}
else
result_nodes.push_back(child);
}
child = &addNode(std::move(node));
}
void ActionsDAG::addAliases(const NamesWithAliases & aliases)
auto it = names_map.find(child->result_name);
if (it == names_map.end())
{
std::vector<Node *> result_nodes;
addAliases(aliases, result_nodes);
names_map[child->result_name] = index.size();
index.push_back(child);
}
else
index[it->second] = child;
}
}
void ActionsDAG::project(const NamesWithAliases & projection)
{
std::vector<Node *> result_nodes;
addAliases(projection, result_nodes);
removeUnusedActions(result_nodes);
std::unordered_map<std::string_view, const Node *> names_map;
for (const auto * node : index)
names_map.emplace(node->result_name, node);
index.clear();
index.reserve(projection.size());
for (const auto & item : projection)
{
auto it = names_map.find(item.first);
if (it == names_map.end())
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Unknown column: {}, there are only columns {}", item.first, dumpNames());
index.push_back(it->second);
}
for (size_t i = 0; i < projection.size(); ++i)
{
const auto & item = projection[i];
auto & child = index[i];
if (!item.second.empty() && item.first != item.second)
{
Node node;
node.type = ActionType::ALIAS;
node.result_type = child->result_type;
node.result_name = std::move(item.second);
node.column = child->column;
node.children.emplace_back(child);
child = &addNode(std::move(node));
}
}
removeUnusedActions();
projectInput();
settings.projected_output = true;
projected_output = true;
}
bool ActionsDAG::tryRestoreColumn(const std::string & column_name)
{
if (index.contains(column_name))
for (const auto * node : index)
if (node->result_name == column_name)
return true;
for (auto it = nodes.rbegin(); it != nodes.rend(); ++it)
@ -480,7 +497,7 @@ bool ActionsDAG::tryRestoreColumn(const std::string & column_name)
auto & node = *it;
if (node.result_name == column_name)
{
index.replace(&node);
index.push_back(&node);
return true;
}
}
@ -502,7 +519,7 @@ bool ActionsDAG::removeUnusedResult(const std::string & column_name)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not found result {} in ActionsDAG\n{}", column_name, dumpDAG());
col = *it;
index.remove(it);
index.erase(it);
}
/// Check if column is in input.
@ -541,7 +558,9 @@ bool ActionsDAG::removeUnusedResult(const std::string & column_name)
ActionsDAGPtr ActionsDAG::clone() const
{
auto actions = cloneEmpty();
auto actions = std::make_shared<ActionsDAG>();
actions->project_input = project_input;
actions->projected_output = projected_output;
std::unordered_map<const Node *, Node *> copy_map;
@ -556,7 +575,7 @@ ActionsDAGPtr ActionsDAG::clone() const
child = copy_map[child];
for (const auto & node : index)
actions->index.insert(copy_map[node]);
actions->index.push_back(copy_map[node]);
for (const auto & node : inputs)
actions->inputs.push_back(copy_map[node]);
@ -564,16 +583,13 @@ ActionsDAGPtr ActionsDAG::clone() const
return actions;
}
void ActionsDAG::compileExpressions()
{
#if USE_EMBEDDED_COMPILER
if (settings.compile_expressions)
void ActionsDAG::compileExpressions(size_t min_count_to_compile_expression)
{
compileFunctions();
compileFunctions(min_count_to_compile_expression);
removeUnusedActions();
}
#endif
}
std::string ActionsDAG::dumpDAG() const
{
@ -665,23 +681,21 @@ bool ActionsDAG::trivial() const
}
void ActionsDAG::addMaterializingOutputActions()
{
for (auto & node : index)
node = &materializeNode(*node);
}
const ActionsDAG::Node & ActionsDAG::materializeNode(const Node & node)
{
FunctionOverloadResolverPtr func_builder_materialize =
std::make_shared<FunctionOverloadResolverAdaptor>(
std::make_unique<DefaultOverloadResolver>(
std::make_shared<FunctionMaterialize>()));
Index new_index;
std::vector<Node *> index_nodes(index.begin(), index.end());
for (auto * node : index_nodes)
{
auto & name = node->result_name;
node = &addFunction(func_builder_materialize, {node}, {}, true, false);
node = &addAlias(*node, name, true);
new_index.insert(node);
}
index.swap(new_index);
const auto & name = node.result_name;
const auto * func = &addFunction(func_builder_materialize, {&node}, {});
return addAlias(*func, name);
}
ActionsDAGPtr ActionsDAG::makeConvertingActions(
@ -702,7 +716,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
throw Exception("Converting with add_casted_columns supported only for MatchColumnsMode::Name", ErrorCodes::LOGICAL_ERROR);
auto actions_dag = std::make_shared<ActionsDAG>(source);
std::vector<Node *> projection(num_result_columns);
NodeRawConstPtrs projection(num_result_columns);
FunctionOverloadResolverPtr func_builder_materialize =
std::make_shared<FunctionOverloadResolverAdaptor>(
@ -719,8 +733,8 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
for (size_t result_col_num = 0; result_col_num < num_result_columns; ++result_col_num)
{
const auto & res_elem = result[result_col_num];
Node * src_node = nullptr;
Node * dst_node = nullptr;
const Node * src_node = nullptr;
const Node * dst_node = nullptr;
switch (mode)
{
@ -749,7 +763,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
if (const auto * src_const = typeid_cast<const ColumnConst *>(dst_node->column.get()))
{
if (ignore_constant_values)
dst_node = const_cast<Node *>(&actions_dag->addColumn(res_elem, true));
dst_node = &actions_dag->addColumn(res_elem);
else if (res_const->getField() != src_const->getField())
throw Exception("Cannot convert column " + backQuote(res_elem.name) + " because "
"it is constant but values of constants are different in source and result",
@ -769,22 +783,22 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
column.column = DataTypeString().createColumnConst(0, column.name);
column.type = std::make_shared<DataTypeString>();
auto * right_arg = const_cast<Node *>(&actions_dag->addColumn(std::move(column), true));
auto * left_arg = dst_node;
const auto * right_arg = &actions_dag->addColumn(std::move(column));
const auto * left_arg = dst_node;
FunctionCast::Diagnostic diagnostic = {dst_node->result_name, res_elem.name};
FunctionOverloadResolverPtr func_builder_cast =
std::make_shared<FunctionOverloadResolverAdaptor>(
CastOverloadResolver<CastType::nonAccurate>::createImpl(false, std::move(diagnostic)));
Inputs children = { left_arg, right_arg };
dst_node = &actions_dag->addFunction(func_builder_cast, std::move(children), {}, true);
NodeRawConstPtrs children = { left_arg, right_arg };
dst_node = &actions_dag->addFunction(func_builder_cast, std::move(children), {});
}
if (dst_node->column && isColumnConst(*dst_node->column) && !(res_elem.column && isColumnConst(*res_elem.column)))
{
Inputs children = {dst_node};
dst_node = &actions_dag->addFunction(func_builder_materialize, std::move(children), {}, true);
NodeRawConstPtrs children = {dst_node};
dst_node = &actions_dag->addFunction(func_builder_materialize, std::move(children), {});
}
if (dst_node->result_name != res_elem.name)
@ -805,7 +819,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
}
else
{
dst_node = &actions_dag->addAlias(*dst_node, res_elem.name, true);
dst_node = &actions_dag->addAlias(*dst_node, res_elem.name);
projection[result_col_num] = dst_node;
}
}
@ -815,7 +829,8 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
}
}
actions_dag->removeUnusedActions(projection);
actions_dag->index.swap(projection);
actions_dag->removeUnusedActions();
actions_dag->projectInput();
return actions_dag;
@ -830,11 +845,12 @@ ActionsDAGPtr ActionsDAG::makeAddingColumnActions(ColumnWithTypeAndName column)
std::make_shared<FunctionMaterialize>()));
auto column_name = column.name;
const auto & column_node = adding_column_action->addColumn(std::move(column));
Inputs inputs = {const_cast<Node *>(&column_node)};
auto & function_node = adding_column_action->addFunction(func_builder_materialize, std::move(inputs), {}, true);
adding_column_action->addAlias(function_node, std::move(column_name), true);
const auto * column_node = &adding_column_action->addColumn(std::move(column));
NodeRawConstPtrs inputs = {column_node};
const auto & function_node = adding_column_action->addFunction(func_builder_materialize, std::move(inputs), {});
const auto & alias_node = adding_column_action->addAlias(function_node, std::move(column_name));
adding_column_action->index.push_back(&alias_node);
return adding_column_action;
}
@ -848,23 +864,23 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
/// This map contains nodes which should be removed from `first` index, cause they are used as inputs for `second`.
/// The second element is the number of removes (cause one node may be repeated several times in result).
std::unordered_map<Node *, size_t> removed_first_result;
std::unordered_map<const Node *, size_t> removed_first_result;
/// Map inputs of `second` to nodes of `first`.
std::unordered_map<Node *, Node *> inputs_map;
std::unordered_map<const Node *, const Node *> inputs_map;
/// Update inputs list.
{
/// Index may have multiple columns with same name. They also may be used by `second`. Order is important.
std::unordered_map<std::string_view, std::list<Node *>> first_result;
for (auto & node : first.index)
std::unordered_map<std::string_view, std::list<const Node *>> first_result;
for (const auto & node : first.index)
first_result[node->result_name].push_back(node);
for (auto & node : second.inputs)
for (const auto & node : second.inputs)
{
auto it = first_result.find(node->result_name);
if (it == first_result.end() || it->second.empty())
{
if (first.settings.project_input)
if (first.project_input)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Cannot find column {} in ActionsDAG result", node->result_name);
@ -904,50 +920,29 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
}
/// Update index.
if (second.settings.project_input)
if (second.project_input)
{
first.index.swap(second.index);
first.settings.project_input = true;
first.project_input = true;
}
else
{
/// Remove `second` inputs from index.
for (auto it = first.index.begin(); it != first.index.end();)
/// Add not removed result from first actions.
for (const auto * node : first.index)
{
auto cur = it;
++it;
auto jt = removed_first_result.find(*cur);
if (jt != removed_first_result.end() && jt->second > 0)
{
first.index.remove(cur);
--jt->second;
}
auto it = removed_first_result.find(node);
if (it != removed_first_result.end() && it->second > 0)
--it->second;
else
second.index.push_back(node);
}
for (auto it = second.index.rbegin(); it != second.index.rend(); ++it)
first.index.prepend(*it);
first.index.swap(second.index);
}
first.nodes.splice(first.nodes.end(), std::move(second.nodes));
/// Here we rebuild index because some string_view from the first map now may point to string from second.
ActionsDAG::Index first_index;
for (auto * node : first.index)
first_index.insert(node);
first.index.swap(first_index);
#if USE_EMBEDDED_COMPILER
if (first.compilation_cache == nullptr)
first.compilation_cache = second.compilation_cache;
#endif
first.settings.max_temporary_columns = std::max(first.settings.max_temporary_columns, second.settings.max_temporary_columns);
first.settings.max_temporary_non_const_columns = std::max(first.settings.max_temporary_non_const_columns, second.settings.max_temporary_non_const_columns);
first.settings.min_count_to_compile_expression = std::max(first.settings.min_count_to_compile_expression, second.settings.min_count_to_compile_expression);
first.settings.projected_output = second.settings.projected_output;
first.projected_output = second.projected_output;
/// Drop unused inputs and, probably, some actions.
first.removeUnusedActions();
@ -960,13 +955,13 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
/// Split DAG into two parts.
/// (first_nodes, first_index) is a part which will have split_list in result.
/// (second_nodes, second_index) is a part which will have same index as current actions.
std::list<Node> second_nodes;
std::list<Node> first_nodes;
Index second_index;
Index first_index;
Nodes second_nodes;
Nodes first_nodes;
NodeRawConstPtrs second_index;
NodeRawConstPtrs first_index;
/// List of nodes from current actions which are not inputs, but will be in second part.
std::vector<const Node *> new_inputs;
NodeRawConstPtrs new_inputs;
struct Frame
{
@ -1036,7 +1031,7 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
/// At first, visit all children.
while (cur.next_child_to_visit < cur.node->children.size())
{
auto * child = cur.node->children[cur.next_child_to_visit];
const auto * child = cur.node->children[cur.next_child_to_visit];
auto & child_data = data[child];
if (!child_data.visited)
@ -1124,13 +1119,13 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
}
}
for (auto * node : index)
second_index.insert(data[node].to_second);
for (const auto * node : index)
second_index.push_back(data[node].to_second);
Inputs second_inputs;
Inputs first_inputs;
NodeRawConstPtrs second_inputs;
NodeRawConstPtrs first_inputs;
for (auto * input : inputs)
for (const auto * input : inputs)
{
const auto & cur = data[input];
first_inputs.push_back(cur.to_first);
@ -1140,15 +1135,15 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
{
const auto & cur = data[input];
second_inputs.push_back(cur.to_second);
first_index.insert(cur.to_first);
first_index.push_back(cur.to_first);
}
auto first_actions = cloneEmpty();
auto first_actions = std::make_shared<ActionsDAG>();
first_actions->nodes.swap(first_nodes);
first_actions->index.swap(first_index);
first_actions->inputs.swap(first_inputs);
auto second_actions = cloneEmpty();
auto second_actions = std::make_shared<ActionsDAG>();
second_actions->nodes.swap(second_nodes);
second_actions->index.swap(second_index);
second_actions->inputs.swap(second_inputs);
@ -1186,7 +1181,7 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet &
/// At first, visit all children. We depend on ARRAY JOIN if any child does.
while (cur.next_child_to_visit < cur.node->children.size())
{
auto * child = cur.node->children[cur.next_child_to_visit];
const auto * child = cur.node->children[cur.next_child_to_visit];
if (visited_nodes.count(child) == 0)
{
@ -1220,23 +1215,19 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet &
auto res = split(split_nodes);
/// Do not remove array joined columns if they are not used.
res.first->settings.project_input = false;
res.first->project_input = false;
return res;
}
ActionsDAG::SplitResult ActionsDAG::splitActionsForFilter(const std::string & column_name) const
{
auto it = index.begin();
for (; it != index.end(); ++it)
if ((*it)->result_name == column_name)
break;
if (it == index.end())
const auto * node = tryFindInIndex(column_name);
if (!node)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Index for ActionsDAG does not contain filter column name {}. DAG:\n{}",
column_name, dumpDAG());
std::unordered_set<const Node *> split_nodes = {*it};
std::unordered_set<const Node *> split_nodes = {node};
return split(split_nodes);
}
@ -1245,8 +1236,8 @@ namespace
struct ConjunctionNodes
{
std::vector<ActionsDAG::Node *> allowed;
std::vector<ActionsDAG::Node *> rejected;
ActionsDAG::NodeRawConstPtrs allowed;
ActionsDAG::NodeRawConstPtrs rejected;
};
/// Take a node which result is predicate.
@ -1256,19 +1247,19 @@ struct ConjunctionNodes
ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordered_set<const ActionsDAG::Node *> allowed_nodes)
{
ConjunctionNodes conjunction;
std::unordered_set<ActionsDAG::Node *> allowed;
std::unordered_set<ActionsDAG::Node *> rejected;
std::unordered_set<const ActionsDAG::Node *> allowed;
std::unordered_set<const ActionsDAG::Node *> rejected;
struct Frame
{
ActionsDAG::Node * node;
const ActionsDAG::Node * node;
bool is_predicate = false;
size_t next_child_to_visit = 0;
size_t num_allowed_children = 0;
};
std::stack<Frame> stack;
std::unordered_set<ActionsDAG::Node *> visited_nodes;
std::unordered_set<const ActionsDAG::Node *> visited_nodes;
stack.push(Frame{.node = predicate, .is_predicate = true});
visited_nodes.insert(predicate);
@ -1282,7 +1273,7 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere
/// At first, visit all children.
while (cur.next_child_to_visit < cur.node->children.size())
{
auto * child = cur.node->children[cur.next_child_to_visit];
const auto * child = cur.node->children[cur.next_child_to_visit];
if (visited_nodes.count(child) == 0)
{
@ -1305,7 +1296,7 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere
}
else if (is_conjunction)
{
for (auto * child : cur.node->children)
for (const auto * child : cur.node->children)
{
if (allowed_nodes.count(child))
{
@ -1335,7 +1326,7 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere
return conjunction;
}
ColumnsWithTypeAndName prepareFunctionArguments(const std::vector<ActionsDAG::Node *> nodes)
ColumnsWithTypeAndName prepareFunctionArguments(const ActionsDAG::NodeRawConstPtrs & nodes)
{
ColumnsWithTypeAndName arguments;
arguments.reserve(nodes.size());
@ -1360,21 +1351,20 @@ ColumnsWithTypeAndName prepareFunctionArguments(const std::vector<ActionsDAG::No
///
/// Result actions add single column with conjunction result (it is always last in index).
/// No other columns are added or removed.
ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::vector<Node *> conjunction, const ColumnsWithTypeAndName & all_inputs)
ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs)
{
if (conjunction.empty())
return nullptr;
auto actions = cloneEmpty();
actions->settings.project_input = false;
auto actions = std::make_shared<ActionsDAG>();
FunctionOverloadResolverPtr func_builder_and =
std::make_shared<FunctionOverloadResolverAdaptor>(
std::make_unique<DefaultOverloadResolver>(
std::make_shared<FunctionAnd>()));
std::unordered_map<const ActionsDAG::Node *, ActionsDAG::Node *> nodes_mapping;
std::unordered_map<std::string, std::list<Node *>> required_inputs;
std::unordered_map<const ActionsDAG::Node *, const ActionsDAG::Node *> nodes_mapping;
std::unordered_map<std::string, std::list<const Node *>> required_inputs;
struct Frame
{
@ -1397,7 +1387,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::vector<Node *> conjunc
/// At first, visit all children.
while (cur.next_child_to_visit < cur.node->children.size())
{
auto * child = cur.node->children[cur.next_child_to_visit];
const auto * child = cur.node->children[cur.next_child_to_visit];
if (nodes_mapping.count(child) == 0)
{
@ -1424,14 +1414,12 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::vector<Node *> conjunc
}
}
/// Actions must have the same inputs as in all_inputs list.
/// See comment to cloneActionsForFilterPushDown.
for (const auto & col : all_inputs)
{
Node * input;
const Node * input;
auto & list = required_inputs[col.name];
if (list.empty())
input = &const_cast<Node &>(actions->addInput(col, true, false));
input = &actions->addInput(col);
else
{
input = list.front();
@ -1439,22 +1427,22 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::vector<Node *> conjunc
actions->inputs.push_back(input);
}
actions->index.insert(input);
actions->index.push_back(input);
}
Node * result_predicate = nodes_mapping[*conjunction.begin()];
const Node * result_predicate = nodes_mapping[*conjunction.begin()];
if (conjunction.size() > 1)
{
std::vector<Node *> args;
NodeRawConstPtrs args;
args.reserve(conjunction.size());
for (const auto * predicate : conjunction)
args.emplace_back(nodes_mapping[predicate]);
result_predicate = &actions->addFunction(func_builder_and, args, {}, true, false);
result_predicate = &actions->addFunction(func_builder_and, std::move(args), {});
}
actions->index.insert(result_predicate);
actions->index.push_back(result_predicate);
return actions;
}
@ -1464,22 +1452,12 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
const Names & available_inputs,
const ColumnsWithTypeAndName & all_inputs)
{
Node * predicate;
{
auto it = index.begin();
for (; it != index.end(); ++it)
if ((*it)->result_name == filter_name)
break;
if (it == index.end())
Node * predicate = const_cast<Node *>(tryFindInIndex(filter_name));
if (!predicate)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Index for ActionsDAG does not contain filter column name {}. DAG:\n{}",
filter_name, dumpDAG());
predicate = *it;
}
std::unordered_set<const Node *> allowed_nodes;
/// Get input nodes from available_inputs names.
@ -1516,7 +1494,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
{
if (*i == predicate)
{
index.remove(i);
index.erase(i);
break;
}
}
@ -1539,7 +1517,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
/// Predicate is conjunction, where both allowed and rejected sets are not empty.
/// Replace this node to conjunction of rejected predicates.
std::vector<Node *> new_children(conjunction.rejected.begin(), conjunction.rejected.end());
NodeRawConstPtrs new_children = std::move(conjunction.rejected);
if (new_children.size() == 1)
{
@ -1564,8 +1542,8 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
node.column = DataTypeString().createColumnConst(0, node.result_name);
node.result_type = std::make_shared<DataTypeString>();
auto * right_arg = &nodes.emplace_back(std::move(node));
auto * left_arg = new_children.front();
const auto * right_arg = &nodes.emplace_back(std::move(node));
const auto * left_arg = new_children.front();
predicate->children = {left_arg, right_arg};
auto arguments = prepareFunctionArguments(predicate->children);

View File

@ -26,7 +26,6 @@ using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>;
class IDataType;
using DataTypePtr = std::shared_ptr<const IDataType>;
class Context;
class CompiledExpressionCache;
/// Directed acyclic graph of expressions.
@ -55,9 +54,13 @@ public:
FUNCTION,
};
struct Node;
using NodeRawPtrs = std::vector<Node *>;
using NodeRawConstPtrs = std::vector<const Node *>;
struct Node
{
std::vector<Node *> children;
NodeRawConstPtrs children;
ActionType type;
@ -79,128 +82,18 @@ public:
bool allow_constant_folding = true;
};
/// Index is used to:
/// * find Node by it's result_name
/// * specify order of columns in result
/// It represents a set of available columns.
/// Removing of column from index is equivalent to removing of column from final result.
///
/// DAG allows actions with duplicating result names. In this case index will point to last added Node.
/// It does not cause any problems as long as execution of actions does not depend on action names anymore.
///
/// Index is a list of nodes + [map: name -> list::iterator].
/// List is ordered, may contain nodes with same names, or one node several times.
class Index
{
private:
std::list<Node *> list;
/// Map key is a string_view to Node::result_name for node from value.
/// Map always point to existing node, so key always valid (nodes live longer then index).
std::unordered_map<std::string_view, std::list<Node *>::iterator> map;
public:
auto size() const { return list.size(); }
bool contains(std::string_view key) const { return map.count(key) != 0; }
std::list<Node *>::iterator begin() { return list.begin(); }
std::list<Node *>::iterator end() { return list.end(); }
std::list<Node *>::const_iterator begin() const { return list.begin(); }
std::list<Node *>::const_iterator end() const { return list.end(); }
std::list<Node *>::const_reverse_iterator rbegin() const { return list.rbegin(); }
std::list<Node *>::const_reverse_iterator rend() const { return list.rend(); }
std::list<Node *>::const_iterator find(std::string_view key) const
{
auto it = map.find(key);
if (it == map.end())
return list.end();
return it->second;
}
/// Insert method doesn't check if map already have node with the same name.
/// If node with the same name exists, it is removed from map, but not list.
/// It is expected and used for project(), when result may have several columns with the same name.
void insert(Node * node)
{
auto it = list.emplace(list.end(), node);
if (auto handle = map.extract(node->result_name))
{
handle.key() = node->result_name; /// Change string_view
handle.mapped() = it;
map.insert(std::move(handle));
}
else
map[node->result_name] = it;
}
void prepend(Node * node)
{
auto it = list.emplace(list.begin(), node);
if (auto handle = map.extract(node->result_name))
{
handle.key() = node->result_name; /// Change string_view
handle.mapped() = it;
map.insert(std::move(handle));
}
else
map[node->result_name] = it;
}
/// If node with same name exists in index, replace it. Otherwise insert new node to index.
void replace(Node * node)
{
if (auto handle = map.extract(node->result_name))
{
handle.key() = node->result_name; /// Change string_view
*handle.mapped() = node;
map.insert(std::move(handle));
}
else
insert(node);
}
void remove(std::list<Node *>::iterator it)
{
auto map_it = map.find((*it)->result_name);
if (map_it != map.end() && map_it->second == it)
map.erase(map_it);
list.erase(it);
}
void swap(Index & other)
{
list.swap(other.list);
map.swap(other.map);
}
};
/// NOTE: std::list is an implementation detail.
/// It allows to add and remove new nodes inplace without reallocation.
/// Raw pointers to nodes remain valid.
using Nodes = std::list<Node>;
using Inputs = std::vector<Node *>;
struct ActionsSettings
{
size_t max_temporary_columns = 0;
size_t max_temporary_non_const_columns = 0;
size_t min_count_to_compile_expression = 0;
bool compile_expressions = false;
bool project_input = false;
bool projected_output = false;
};
private:
Nodes nodes;
Index index;
Inputs inputs;
NodeRawConstPtrs index;
NodeRawConstPtrs inputs;
ActionsSettings settings;
#if USE_EMBEDDED_COMPILER
std::shared_ptr<CompiledExpressionCache> compilation_cache;
#endif
bool project_input = false;
bool projected_output = false;
public:
ActionsDAG() = default;
@ -211,8 +104,8 @@ public:
explicit ActionsDAG(const ColumnsWithTypeAndName & inputs_);
const Nodes & getNodes() const { return nodes; }
const Index & getIndex() const { return index; }
const Inputs & getInputs() const { return inputs; }
const NodeRawConstPtrs & getIndex() const { return index; }
const NodeRawConstPtrs & getInputs() const { return inputs; }
NamesAndTypesList getRequiredColumns() const;
ColumnsWithTypeAndName getResultColumns() const;
@ -222,19 +115,26 @@ public:
std::string dumpNames() const;
std::string dumpDAG() const;
const Node & addInput(std::string name, DataTypePtr type, bool can_replace = false, bool add_to_index = true);
const Node & addInput(ColumnWithTypeAndName column, bool can_replace = false, bool add_to_index = true);
const Node & addColumn(ColumnWithTypeAndName column, bool can_replace = false, bool materialize = false);
const Node & addAlias(const std::string & name, std::string alias, bool can_replace = false);
const Node & addArrayJoin(const std::string & source_name, std::string result_name);
const Node & addInput(std::string name, DataTypePtr type);
const Node & addInput(ColumnWithTypeAndName column);
const Node & addColumn(ColumnWithTypeAndName column);
const Node & addAlias(const Node & child, std::string alias);
const Node & addArrayJoin(const Node & child, std::string result_name);
const Node & addFunction(
const FunctionOverloadResolverPtr & function,
const Names & argument_names,
std::string result_name,
const Context & context,
bool can_replace = false);
NodeRawConstPtrs children,
std::string result_name);
void addNodeToIndex(const Node * node) { index.insert(const_cast<Node *>(node)); }
/// Index can contain any column returned from DAG.
/// You may manually change it if needed.
NodeRawConstPtrs & getIndex() { return index; }
/// Find first column by name in index. This search is linear.
const Node & findInIndex(const std::string & name) const;
/// Same, but return nullptr if node not found.
const Node * tryFindInIndex(const std::string & name) const;
/// Find first node with the same name in index and replace it.
/// If was not found, add node to index end.
void addOrReplaceInIndex(const Node & node);
/// Call addAlias several times.
void addAliases(const NamesWithAliases & aliases);
@ -248,16 +148,20 @@ public:
/// Return true if column was removed from inputs.
bool removeUnusedResult(const std::string & column_name);
void projectInput(bool project = true) { settings.project_input = project; }
void projectInput(bool project = true) { project_input = project; }
bool isInputProjected() const { return project_input; }
bool isOutputProjected() const { return projected_output; }
void removeUnusedActions(const Names & required_names);
void removeUnusedActions(const NameSet & required_names);
bool hasArrayJoin() const;
bool hasStatefulFunctions() const;
bool trivial() const; /// If actions has no functions or array join.
const ActionsSettings & getSettings() const { return settings; }
void compileExpressions();
#if USE_EMBEDDED_COMPILER
void compileExpressions(size_t min_count_to_compile_expression);
#endif
ActionsDAGPtr clone() const;
@ -265,6 +169,9 @@ public:
/// Also add aliases so the result names remain unchanged.
void addMaterializingOutputActions();
/// Apply materialize() function to node. Result node has the same name.
const Node & materializeNode(const Node & node);
enum class MatchColumnsMode
{
/// Require same number of columns in source and result. Match columns by corresponding positions, regardless to names.
@ -334,36 +241,15 @@ public:
const ColumnsWithTypeAndName & all_inputs);
private:
Node & addNode(Node node, bool can_replace = false, bool add_to_index = true);
Node & getNode(const std::string & name);
Node & addNode(Node node);
Node & addAlias(Node & child, std::string alias, bool can_replace);
Node & addFunction(
const FunctionOverloadResolverPtr & function,
Inputs children,
std::string result_name,
bool can_replace,
bool add_to_index = true);
ActionsDAGPtr cloneEmpty() const
{
auto actions = std::make_shared<ActionsDAG>();
actions->settings = settings;
void removeUnusedActions(bool allow_remove_inputs = true);
#if USE_EMBEDDED_COMPILER
actions->compilation_cache = compilation_cache;
void compileFunctions(size_t min_count_to_compile_expression);
#endif
return actions;
}
void removeUnusedActions(const std::vector<Node *> & required_nodes);
void removeUnusedActions(bool allow_remove_inputs = true);
void addAliases(const NamesWithAliases & aliases, std::vector<Node *> & result_nodes);
void compileFunctions();
ActionsDAGPtr cloneActionsForConjunction(std::vector<Node *> conjunction, const ColumnsWithTypeAndName & all_inputs);
static ActionsDAGPtr cloneActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs);
};
}

View File

@ -52,6 +52,7 @@ namespace ErrorCodes
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int INCORRECT_ELEMENT_OF_SET;
extern const int BAD_ARGUMENTS;
extern const int DUPLICATE_COLUMN;
}
static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols)
@ -346,11 +347,9 @@ SetPtr makeExplicitSet(
const ASTPtr & left_arg = args.children.at(0);
const ASTPtr & right_arg = args.children.at(1);
const auto & index = actions.getIndex();
auto it = index.find(left_arg->getColumnName());
if (it == index.end())
throw Exception("Unknown identifier: '" + left_arg->getColumnName() + "'", ErrorCodes::UNKNOWN_IDENTIFIER);
const DataTypePtr & left_arg_type = (*it)->result_type;
auto column_name = left_arg->getColumnName();
const auto & dag_node = actions.findInIndex(column_name);
const DataTypePtr & left_arg_type = dag_node.result_type;
DataTypes set_element_types = {left_arg_type};
const auto * left_tuple_type = typeid_cast<const DataTypeTuple *>(left_arg_type.get());
@ -381,6 +380,54 @@ SetPtr makeExplicitSet(
return set;
}
ScopeStack::Level::~Level() = default;
ScopeStack::Level::Level() = default;
ScopeStack::Level::Level(Level &&) = default;
class ScopeStack::Index
{
/// Map column name -> Node.
/// Use string_view as key which always points to Node::result_name.
std::unordered_map<std::string_view, const ActionsDAG::Node *> map;
ActionsDAG::NodeRawConstPtrs & index;
public:
explicit Index(ActionsDAG::NodeRawConstPtrs & index_) : index(index_)
{
for (const auto * node : index)
map.emplace(node->result_name, node);
}
void addNode(const ActionsDAG::Node * node)
{
bool inserted = map.emplace(node->result_name, node).second;
if (!inserted)
throw Exception("Column '" + node->result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
index.push_back(node);
}
const ActionsDAG::Node * tryGetNode(const std::string & name) const
{
auto it = map.find(name);
if (it == map.end())
return nullptr;
return it->second;
}
const ActionsDAG::Node & getNode(const std::string & name) const
{
const auto * node = tryGetNode(name);
if (!node)
throw Exception("Unknown identifier: '" + name + "'", ErrorCodes::UNKNOWN_IDENTIFIER);
return *node;
}
bool contains(const std::string & name) const { return map.count(name) > 0; }
};
ActionsMatcher::Data::Data(
const Context & context_, SizeLimits set_size_limit_, size_t subquery_depth_,
const NamesAndTypesList & source_columns_, ActionsDAGPtr actions_dag,
@ -404,7 +451,7 @@ ActionsMatcher::Data::Data(
bool ActionsMatcher::Data::hasColumn(const String & column_name) const
{
return actions_stack.getLastActions().getIndex().contains(column_name);
return actions_stack.getLastActionsIndex().contains(column_name);
}
ScopeStack::ScopeStack(ActionsDAGPtr actions_dag, const Context & context_)
@ -412,6 +459,7 @@ ScopeStack::ScopeStack(ActionsDAGPtr actions_dag, const Context & context_)
{
auto & level = stack.emplace_back();
level.actions_dag = std::move(actions_dag);
level.index = std::make_unique<ScopeStack::Index>(level.actions_dag->getIndex());
for (const auto & node : level.actions_dag->getIndex())
if (node->type == ActionsDAG::ActionType::INPUT)
@ -422,20 +470,23 @@ void ScopeStack::pushLevel(const NamesAndTypesList & input_columns)
{
auto & level = stack.emplace_back();
level.actions_dag = std::make_shared<ActionsDAG>();
level.index = std::make_unique<ScopeStack::Index>(level.actions_dag->getIndex());
const auto & prev = stack[stack.size() - 2];
for (const auto & input_column : input_columns)
{
level.actions_dag->addInput(input_column.name, input_column.type);
const auto & node = level.actions_dag->addInput(input_column.name, input_column.type);
level.index->addNode(&node);
level.inputs.emplace(input_column.name);
}
const auto & index = level.actions_dag->getIndex();
for (const auto & node : prev.actions_dag->getIndex())
{
if (!index.contains(node->result_name))
level.actions_dag->addInput({node->column, node->result_type, node->result_name});
if (!level.index->contains(node->result_name))
{
const auto & input = level.actions_dag->addInput({node->column, node->result_type, node->result_name});
level.index->addNode(&input);
}
}
}
@ -448,10 +499,8 @@ size_t ScopeStack::getColumnLevel(const std::string & name)
if (stack[i].inputs.count(name))
return i;
const auto & index = stack[i].actions_dag->getIndex();
auto it = index.find(name);
if (it != index.end() && (*it)->type != ActionsDAG::ActionType::INPUT)
const auto * node = stack[i].index->tryGetNode(name);
if (node && node->type != ActionsDAG::ActionType::INPUT)
return i;
}
@ -461,32 +510,46 @@ size_t ScopeStack::getColumnLevel(const std::string & name)
void ScopeStack::addColumn(ColumnWithTypeAndName column)
{
const auto & node = stack[0].actions_dag->addColumn(std::move(column));
stack[0].index->addNode(&node);
for (size_t j = 1; j < stack.size(); ++j)
stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
{
const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
stack[j].index->addNode(&input);
}
}
void ScopeStack::addAlias(const std::string & name, std::string alias)
{
auto level = getColumnLevel(name);
const auto & node = stack[level].actions_dag->addAlias(name, std::move(alias));
const auto & source = stack[level].index->getNode(name);
const auto & node = stack[level].actions_dag->addAlias(source, std::move(alias));
stack[level].index->addNode(&node);
for (size_t j = level + 1; j < stack.size(); ++j)
stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
{
const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
stack[j].index->addNode(&input);
}
}
void ScopeStack::addArrayJoin(const std::string & source_name, std::string result_name)
{
getColumnLevel(source_name);
if (!stack.front().actions_dag->getIndex().contains(source_name))
const auto * source_node = stack.front().index->tryGetNode(source_name);
if (!source_node)
throw Exception("Expression with arrayJoin cannot depend on lambda argument: " + source_name,
ErrorCodes::BAD_ARGUMENTS);
const auto & node = stack.front().actions_dag->addArrayJoin(source_name, std::move(result_name));
const auto & node = stack.front().actions_dag->addArrayJoin(*source_node, std::move(result_name));
stack.front().index->addNode(&node);
for (size_t j = 1; j < stack.size(); ++j)
stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
{
const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
stack[j].index->addNode(&input);
}
}
void ScopeStack::addFunction(
@ -498,17 +561,26 @@ void ScopeStack::addFunction(
for (const auto & argument : argument_names)
level = std::max(level, getColumnLevel(argument));
const auto & node = stack[level].actions_dag->addFunction(function, argument_names, std::move(result_name), context);
ActionsDAG::NodeRawConstPtrs children;
children.reserve(argument_names.size());
for (const auto & argument : argument_names)
children.push_back(&stack[level].index->getNode(argument));
const auto & node = stack[level].actions_dag->addFunction(function, std::move(children), std::move(result_name));
stack[level].index->addNode(&node);
for (size_t j = level + 1; j < stack.size(); ++j)
stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
{
const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
stack[j].index->addNode(&input);
}
}
ActionsDAGPtr ScopeStack::popLevel()
{
auto res = std::move(stack.back());
auto res = std::move(stack.back().actions_dag);
stack.pop_back();
return res.actions_dag;
return res;
}
std::string ScopeStack::dumpNames() const
@ -521,6 +593,11 @@ const ActionsDAG & ScopeStack::getLastActions() const
return *stack.back().actions_dag;
}
const ScopeStack::Index & ScopeStack::getLastActionsIndex() const
{
return *stack.back().index;
}
bool ActionsMatcher::needChildVisit(const ASTPtr & node, const ASTPtr & child)
{
/// Visit children themself
@ -568,10 +645,9 @@ std::optional<NameAndTypePair> ActionsMatcher::getNameAndTypeFromAST(const ASTPt
child_column_name = as_literal->unique_column_name;
}
const auto & index = data.actions_stack.getLastActions().getIndex();
auto it = index.find(child_column_name);
if (it != index.end())
return NameAndTypePair(child_column_name, (*it)->result_type);
const auto & index = data.actions_stack.getLastActionsIndex();
if (const auto * node = index.tryGetNode(child_column_name))
return NameAndTypePair(child_column_name, node->result_type);
if (!data.only_consts)
throw Exception("Unknown identifier: " + child_column_name + " there are columns: " + data.actions_stack.dumpNames(),
@ -927,7 +1003,9 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
String result_name = lambda->arguments->children.at(1)->getColumnName();
lambda_dag->removeUnusedActions(Names(1, result_name));
auto lambda_actions = std::make_shared<ExpressionActions>(lambda_dag);
auto lambda_actions = std::make_shared<ExpressionActions>(
lambda_dag,
ExpressionActionsSettings::fromContext(data.context));
DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type;
@ -983,12 +1061,8 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */,
if (literal.unique_column_name.empty())
{
const auto default_name = literal.getColumnName();
const auto & index = data.actions_stack.getLastActions().getIndex();
const ActionsDAG::Node * existing_column = nullptr;
auto it = index.find(default_name);
if (it != index.end())
existing_column = *it;
const auto & index = data.actions_stack.getLastActionsIndex();
const auto * existing_column = index.tryGetNode(default_name);
/*
* To approximate CSE, bind all identical literals to a single temporary
@ -1101,7 +1175,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
else
{
const auto & last_actions = data.actions_stack.getLastActions();
const auto & index = last_actions.getIndex();
const auto & index = data.actions_stack.getLastActionsIndex();
if (index.contains(left_in_operand->getColumnName()))
/// An explicit enumeration of values in parentheses.
return makeExplicitSet(&node, last_actions, false, data.context, data.set_size_limit, data.prepared_sets);

View File

@ -62,10 +62,18 @@ Block createBlockForSet(
*/
struct ScopeStack
{
class Index;
using IndexPtr = std::unique_ptr<Index>;
struct Level
{
ActionsDAGPtr actions_dag;
IndexPtr index;
NameSet inputs;
Level();
Level(Level &&);
~Level();
};
using Levels = std::vector<Level>;
@ -91,6 +99,7 @@ struct ScopeStack
ActionsDAGPtr popLevel();
const ActionsDAG & getLastActions() const;
const Index & getLastActionsIndex() const;
std::string dumpNames() const;
};

View File

@ -440,80 +440,27 @@ void NO_INLINE Aggregator::executeImpl(
typename Method::State state(key_columns, key_sizes, aggregation_state_cache);
if (!no_more_keys)
executeImplBatch(method, state, aggregates_pool, rows, aggregate_instructions);
executeImplBatch<false>(method, state, aggregates_pool, rows, aggregate_instructions, overflow_row);
else
executeImplCase<true>(method, state, aggregates_pool, rows, aggregate_instructions, overflow_row);
executeImplBatch<true>(method, state, aggregates_pool, rows, aggregate_instructions, overflow_row);
}
template <bool no_more_keys, typename Method>
void NO_INLINE Aggregator::executeImplCase(
void NO_INLINE Aggregator::executeImplBatch(
Method & method,
typename Method::State & state,
Arena * aggregates_pool,
size_t rows,
AggregateFunctionInstruction * aggregate_instructions,
AggregateDataPtr overflow_row) const
{
/// NOTE When editing this code, also pay attention to SpecializedAggregator.h.
/// For all rows.
for (size_t i = 0; i < rows; ++i)
{
AggregateDataPtr aggregate_data = nullptr;
if constexpr (!no_more_keys) /// Insert.
{
auto emplace_result = state.emplaceKey(method.data, i, *aggregates_pool);
/// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key.
if (emplace_result.isInserted())
{
/// exception-safety - if you can not allocate memory or create states, then destructors will not be called.
emplace_result.setMapped(nullptr);
aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states);
createAggregateStates(aggregate_data);
emplace_result.setMapped(aggregate_data);
}
else
aggregate_data = emplace_result.getMapped();
}
else
{
/// Add only if the key already exists.
auto find_result = state.findKey(method.data, i, *aggregates_pool);
if (find_result.isFound())
aggregate_data = find_result.getMapped();
}
/// aggregate_date == nullptr means that the new key did not fit in the hash table because of no_more_keys.
/// If the key does not fit, and the data does not need to be aggregated in a separate row, then there's nothing to do.
if (!aggregate_data && !overflow_row)
continue;
AggregateDataPtr value = aggregate_data ? aggregate_data : overflow_row;
/// Add values to the aggregate functions.
for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst)
(*inst->func)(inst->that, value + inst->state_offset, inst->arguments, i, aggregates_pool);
}
}
template <typename Method>
void NO_INLINE Aggregator::executeImplBatch(
Method & method,
typename Method::State & state,
Arena * aggregates_pool,
size_t rows,
AggregateFunctionInstruction * aggregate_instructions) const
{
/// Optimization for special case when there are no aggregate functions.
if (params.aggregates_size == 0)
{
if constexpr (no_more_keys)
return;
/// For all rows.
AggregateDataPtr place = aggregates_pool->alloc(0);
for (size_t i = 0; i < rows; ++i)
@ -522,7 +469,7 @@ void NO_INLINE Aggregator::executeImplBatch(
}
/// Optimization for special case when aggregating by 8bit key.
if constexpr (std::is_same_v<Method, typename decltype(AggregatedDataVariants::key8)::element_type>)
if constexpr (!no_more_keys && std::is_same_v<Method, typename decltype(AggregatedDataVariants::key8)::element_type>)
{
/// We use another method if there are aggregate functions with -Array combinator.
bool has_arrays = false;
@ -565,6 +512,8 @@ void NO_INLINE Aggregator::executeImplBatch(
{
AggregateDataPtr aggregate_data = nullptr;
if constexpr (!no_more_keys)
{
auto emplace_result = state.emplaceKey(method.data, i, *aggregates_pool);
/// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key.
@ -581,8 +530,19 @@ void NO_INLINE Aggregator::executeImplBatch(
else
aggregate_data = emplace_result.getMapped();
assert(aggregate_data != nullptr);
}
else
{
/// Add only if the key already exists.
auto find_result = state.findKey(method.data, i, *aggregates_pool);
if (find_result.isFound())
aggregate_data = find_result.getMapped();
else
aggregate_data = overflow_row;
}
places[i] = aggregate_data;
assert(places[i] != nullptr);
}
/// Add values to the aggregate functions.

View File

@ -1111,7 +1111,7 @@ protected:
/// Specialization for a particular value no_more_keys.
template <bool no_more_keys, typename Method>
void executeImplCase(
void executeImplBatch(
Method & method,
typename Method::State & state,
Arena * aggregates_pool,
@ -1119,14 +1119,6 @@ protected:
AggregateFunctionInstruction * aggregate_instructions,
AggregateDataPtr overflow_row) const;
template <typename Method>
void executeImplBatch(
Method & method,
typename Method::State & state,
Arena * aggregates_pool,
size_t rows,
AggregateFunctionInstruction * aggregate_instructions) const;
/// For case when there are no keys (all aggregate into one row).
static void executeWithoutKeyImpl(
AggregatedDataWithoutKey & res,

View File

@ -188,7 +188,7 @@ void AsynchronousMetrics::update()
#if USE_EMBEDDED_COMPILER
{
if (auto compiled_expression_cache = global_context.getCompiledExpressionCache())
if (auto * compiled_expression_cache = CompiledExpressionCacheFactory::instance().tryGetCache())
new_values["CompiledExpressionCacheCount"] = compiled_expression_cache->count();
}
#endif

View File

@ -15,6 +15,8 @@
#include <Processors/Sources/DelayedSource.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
namespace ProfileEvents
@ -284,7 +286,9 @@ void SelectStreamFactory::createForShard(
if (try_results.empty() || local_delay < max_remote_delay)
{
auto plan = createLocalPlan(modified_query_ast, header, context, stage);
return QueryPipeline::getPipe(std::move(*plan->buildQueryPipeline(QueryPlanOptimizationSettings(context.getSettingsRef()))));
return QueryPipeline::getPipe(std::move(*plan->buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(*context_ptr),
BuildQueryPipelineSettings::fromContext(*context_ptr))));
}
else
{

View File

@ -380,10 +380,6 @@ struct ContextShared
ConfigurationPtr clusters_config; /// Stores updated configs
mutable std::mutex clusters_mutex; /// Guards clusters and clusters_config
#if USE_EMBEDDED_COMPILER
std::shared_ptr<CompiledExpressionCache> compiled_expression_cache;
#endif
bool shutdown_called = false;
Stopwatch uptime_watch;
@ -2327,35 +2323,6 @@ void Context::setQueryParameter(const String & name, const String & value)
}
#if USE_EMBEDDED_COMPILER
std::shared_ptr<CompiledExpressionCache> Context::getCompiledExpressionCache() const
{
auto lock = getLock();
return shared->compiled_expression_cache;
}
void Context::setCompiledExpressionCache(size_t cache_size)
{
auto lock = getLock();
if (shared->compiled_expression_cache)
throw Exception("Compiled expressions cache has been already created.", ErrorCodes::LOGICAL_ERROR);
shared->compiled_expression_cache = std::make_shared<CompiledExpressionCache>(cache_size);
}
void Context::dropCompiledExpressionCache() const
{
auto lock = getLock();
if (shared->compiled_expression_cache)
shared->compiled_expression_cache->reset();
}
#endif
void Context::addXDBCBridgeCommand(std::unique_ptr<ShellCommand> cmd) const
{
auto lock = getLock();

View File

@ -740,12 +740,6 @@ public:
void setQueryParameter(const String & name, const String & value);
void setQueryParameters(const NameToNameMap & parameters) { query_parameters = parameters; }
#if USE_EMBEDDED_COMPILER
std::shared_ptr<CompiledExpressionCache> getCompiledExpressionCache() const;
void setCompiledExpressionCache(size_t cache_size);
void dropCompiledExpressionCache() const;
#endif
/// Add started bridge command. It will be killed after context destruction
void addXDBCBridgeCommand(std::unique_ptr<ShellCommand> cmd) const;

View File

@ -910,31 +910,6 @@ String DatabaseCatalog::getPathForUUID(const UUID & uuid)
return toString(uuid).substr(0, uuid_prefix_len) + '/' + toString(uuid) + '/';
}
String DatabaseCatalog::resolveDictionaryName(const String & name) const
{
/// If it's dictionary from Atomic database, then we need to convert qualified name to UUID.
/// Try to split name and get id from associated StorageDictionary.
/// If something went wrong, return name as is.
/// TODO support dot in name for dictionaries in Atomic databases
auto pos = name.find('.');
if (pos == std::string::npos || name.find('.', pos + 1) != std::string::npos)
return name;
String maybe_database_name = name.substr(0, pos);
String maybe_table_name = name.substr(pos + 1);
auto db_and_table = tryGetDatabaseAndTable({maybe_database_name, maybe_table_name}, global_context);
if (!db_and_table.first)
return name;
assert(db_and_table.second);
if (db_and_table.first->getUUID() == UUIDHelpers::Nil)
return name;
if (db_and_table.second->getName() != "Dictionary")
return name;
return toString(db_and_table.second->getStorageID().uuid);
}
void DatabaseCatalog::waitTableFinallyDropped(const UUID & uuid)
{
if (uuid == UUIDHelpers::Nil)

View File

@ -192,9 +192,6 @@ public:
String getPathForDroppedMetadata(const StorageID & table_id) const;
void enqueueDroppedTableCleanup(StorageID table_id, StoragePtr table, String dropped_metadata_path, bool ignore_delay = false);
/// Try convert qualified dictionary name to persistent UUID
String resolveDictionaryName(const String & name) const;
void waitTableFinallyDropped(const UUID & uuid);
private:

View File

@ -44,16 +44,18 @@ namespace ErrorCodes
ExpressionActions::~ExpressionActions() = default;
ExpressionActions::ExpressionActions(ActionsDAGPtr actions_dag_)
ExpressionActions::ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_)
: settings(settings_)
{
actions_dag = actions_dag_->clone();
actions_dag->compileExpressions();
#if USE_EMBEDDED_COMPILER
if (settings.compile_expressions)
actions_dag->compileExpressions(settings.min_count_to_compile_expression);
#endif
linearizeActions();
const auto & settings = actions_dag->getSettings();
if (settings.max_temporary_columns && num_columns > settings.max_temporary_columns)
throw Exception(ErrorCodes::TOO_MANY_TEMPORARY_COLUMNS,
"Too many temporary columns: {}. Maximum: {}",
@ -141,7 +143,7 @@ void ExpressionActions::linearizeActions()
ExpressionActions::Arguments arguments;
arguments.reserve(cur.node->children.size());
for (auto * child : cur.node->children)
for (const auto * child : cur.node->children)
{
auto & arg = data[reverse_index[child]];
@ -258,15 +260,14 @@ std::string ExpressionActions::Action::toString() const
void ExpressionActions::checkLimits(const ColumnsWithTypeAndName & columns) const
{
auto max_temporary_non_const_columns = actions_dag->getSettings().max_temporary_non_const_columns;
if (max_temporary_non_const_columns)
if (settings.max_temporary_non_const_columns)
{
size_t non_const_columns = 0;
for (const auto & column : columns)
if (column.column && !isColumnConst(*column.column))
++non_const_columns;
if (non_const_columns > max_temporary_non_const_columns)
if (non_const_columns > settings.max_temporary_non_const_columns)
{
WriteBufferFromOwnString list_of_non_const_columns;
for (const auto & column : columns)
@ -274,7 +275,7 @@ void ExpressionActions::checkLimits(const ColumnsWithTypeAndName & columns) cons
list_of_non_const_columns << "\n" << column.name;
throw Exception("Too many temporary non-const columns:" + list_of_non_const_columns.str()
+ ". Maximum: " + std::to_string(max_temporary_non_const_columns),
+ ". Maximum: " + std::to_string(settings.max_temporary_non_const_columns),
ErrorCodes::TOO_MANY_TEMPORARY_NON_CONST_COLUMNS);
}
}
@ -460,7 +461,7 @@ void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run)
}
}
if (actions_dag->getSettings().project_input)
if (actions_dag->isInputProjected())
{
block.clear();
}
@ -554,7 +555,7 @@ std::string ExpressionActions::dumpActions() const
for (const auto & output_column : output_columns)
ss << output_column.name << " " << output_column.type->getName() << "\n";
ss << "\nproject input: " << actions_dag->getSettings().project_input << "\noutput positions:";
ss << "\nproject input: " << actions_dag->isInputProjected() << "\noutput positions:";
for (auto pos : result_positions)
ss << " " << pos;
ss << "\n";
@ -621,11 +622,10 @@ void ExpressionActionsChain::finalize()
/// Finalize all steps. Right to left to define unnecessary input columns.
for (int i = static_cast<int>(steps.size()) - 1; i >= 0; --i)
{
Names required_output = steps[i]->required_output;
std::unordered_map<String, size_t> required_output_indexes;
for (size_t j = 0; j < required_output.size(); ++j)
required_output_indexes[required_output[j]] = j;
auto & can_remove_required_output = steps[i]->can_remove_required_output;
auto & required_output = steps[i]->required_output;
NameSet required_names;
for (const auto & output : required_output)
required_names.insert(output.first);
if (i + 1 < static_cast<int>(steps.size()))
{
@ -634,15 +634,15 @@ void ExpressionActionsChain::finalize()
{
if (additional_input.count(it.name) == 0)
{
auto iter = required_output_indexes.find(it.name);
if (iter == required_output_indexes.end())
required_output.push_back(it.name);
else if (!can_remove_required_output.empty())
can_remove_required_output[iter->second] = false;
auto iter = required_output.find(it.name);
if (iter == required_output.end())
required_names.insert(it.name);
else
iter->second = false;
}
}
}
steps[i]->finalize(required_output);
steps[i]->finalize(required_names);
}
/// Adding the ejection of unnecessary columns to the beginning of each step.
@ -666,8 +666,8 @@ std::string ExpressionActionsChain::dumpChain() const
{
ss << "step " << i << "\n";
ss << "required output:\n";
for (const std::string & name : steps[i]->required_output)
ss << name << "\n";
for (const auto & it : steps[i]->required_output)
ss << it.first << "\n";
ss << "\n" << steps[i]->dump() << "\n";
}
@ -693,20 +693,19 @@ ExpressionActionsChain::ArrayJoinStep::ArrayJoinStep(ArrayJoinActionPtr array_jo
}
}
void ExpressionActionsChain::ArrayJoinStep::finalize(const Names & required_output_)
void ExpressionActionsChain::ArrayJoinStep::finalize(const NameSet & required_output_)
{
NamesAndTypesList new_required_columns;
ColumnsWithTypeAndName new_result_columns;
NameSet names(required_output_.begin(), required_output_.end());
for (const auto & column : result_columns)
{
if (array_join->columns.count(column.name) != 0 || names.count(column.name) != 0)
if (array_join->columns.count(column.name) != 0 || required_output_.count(column.name) != 0)
new_result_columns.emplace_back(column);
}
for (const auto & column : required_columns)
{
if (array_join->columns.count(column.name) != 0 || names.count(column.name) != 0)
if (array_join->columns.count(column.name) != 0 || required_output_.count(column.name) != 0)
new_required_columns.emplace_back(column);
}
@ -729,14 +728,14 @@ ExpressionActionsChain::JoinStep::JoinStep(
analyzed_join->addJoinedColumnsAndCorrectTypes(result_columns);
}
void ExpressionActionsChain::JoinStep::finalize(const Names & required_output_)
void ExpressionActionsChain::JoinStep::finalize(const NameSet & required_output_)
{
/// We need to update required and result columns by removing unused ones.
NamesAndTypesList new_required_columns;
ColumnsWithTypeAndName new_result_columns;
/// That's an input columns we need.
NameSet required_names(required_output_.begin(), required_output_.end());
NameSet required_names = required_output_;
for (const auto & name : analyzed_join->keyNamesLeft())
required_names.emplace(name);

View File

@ -3,6 +3,7 @@
#include <Core/Block.h>
#include <Core/ColumnNumbers.h>
#include <Interpreters/ActionsDAG.h>
#include <Interpreters/ExpressionActionsSettings.h>
#include <variant>
@ -38,7 +39,6 @@ class ExpressionActions
{
public:
using Node = ActionsDAG::Node;
using Index = ActionsDAG::Index;
struct Argument
{
@ -78,10 +78,12 @@ private:
ColumnNumbers result_positions;
Block sample_block;
ExpressionActionsSettings settings;
public:
ExpressionActions() = delete;
~ExpressionActions();
explicit ExpressionActions(ActionsDAGPtr actions_dag_);
explicit ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_ = {});
ExpressionActions(const ExpressionActions &) = default;
ExpressionActions & operator=(const ExpressionActions &) = default;
@ -89,6 +91,7 @@ public:
const std::list<Node> & getNodes() const { return actions_dag->getNodes(); }
const ActionsDAG & getActionsDAG() const { return *actions_dag; }
const ColumnNumbers & getResultPositions() const { return result_positions; }
const ExpressionActionsSettings & getSettings() const { return settings; }
/// Get a list of input columns.
Names getRequiredColumns() const;
@ -138,21 +141,26 @@ struct ExpressionActionsChain
struct Step
{
virtual ~Step() = default;
explicit Step(Names required_output_) : required_output(std::move(required_output_)) {}
explicit Step(Names required_output_)
{
for (const auto & name : required_output_)
required_output[name] = true;
}
/// Columns were added to the block before current step in addition to prev step output.
NameSet additional_input;
/// Columns which are required in the result of current step.
Names required_output;
/// True if column from required_output is needed only for current step and not used in next actions
/// Flag is true if column from required_output is needed only for current step and not used in next actions
/// (and can be removed from block). Example: filter column for where actions.
/// If not empty, has the same size with required_output; is filled in finalize().
std::vector<bool> can_remove_required_output;
std::unordered_map<std::string, bool> required_output;
void addRequiredOutput(const std::string & name) { required_output[name] = true; }
virtual NamesAndTypesList getRequiredColumns() const = 0;
virtual ColumnsWithTypeAndName getResultColumns() const = 0;
/// Remove unused result and update required columns
virtual void finalize(const Names & required_output_) = 0;
virtual void finalize(const NameSet & required_output_) = 0;
/// Add projections to expression
virtual void prependProjectInput() const = 0;
virtual std::string dump() const = 0;
@ -182,9 +190,9 @@ struct ExpressionActionsChain
return actions_dag->getResultColumns();
}
void finalize(const Names & required_output_) override
void finalize(const NameSet & required_output_) override
{
if (!actions_dag->getSettings().projected_output)
if (!actions_dag->isOutputProjected())
actions_dag->removeUnusedActions(required_output_);
}
@ -209,7 +217,7 @@ struct ExpressionActionsChain
NamesAndTypesList getRequiredColumns() const override { return required_columns; }
ColumnsWithTypeAndName getResultColumns() const override { return result_columns; }
void finalize(const Names & required_output_) override;
void finalize(const NameSet & required_output_) override;
void prependProjectInput() const override {} /// TODO: remove unused columns before ARRAY JOIN ?
std::string dump() const override { return "ARRAY JOIN"; }
};
@ -225,7 +233,7 @@ struct ExpressionActionsChain
JoinStep(std::shared_ptr<TableJoin> analyzed_join_, JoinPtr join_, ColumnsWithTypeAndName required_columns_);
NamesAndTypesList getRequiredColumns() const override { return required_columns; }
ColumnsWithTypeAndName getResultColumns() const override { return result_columns; }
void finalize(const Names & required_output_) override;
void finalize(const NameSet & required_output_) override;
void prependProjectInput() const override {} /// TODO: remove unused columns before JOIN ?
std::string dump() const override { return "JOIN"; }
};

View File

@ -0,0 +1,24 @@
#include <Interpreters/ExpressionActionsSettings.h>
#include <Core/Settings.h>
#include <Interpreters/Context.h>
namespace DB
{
ExpressionActionsSettings ExpressionActionsSettings::fromSettings(const Settings & from)
{
ExpressionActionsSettings settings;
settings.compile_expressions = from.compile_expressions;
settings.min_count_to_compile_expression = from.min_count_to_compile_expression;
settings.max_temporary_columns = from.max_temporary_columns;
settings.max_temporary_non_const_columns = from.max_temporary_non_const_columns;
return settings;
}
ExpressionActionsSettings ExpressionActionsSettings::fromContext(const Context & from)
{
return fromSettings(from.getSettingsRef());
}
}

View File

@ -0,0 +1,23 @@
#pragma once
#include <cstddef>
namespace DB
{
struct Settings;
class Context;
struct ExpressionActionsSettings
{
bool compile_expressions = false;
size_t min_count_to_compile_expression = 0;
size_t max_temporary_columns = 0;
size_t max_temporary_non_const_columns = 0;
static ExpressionActionsSettings fromSettings(const Settings & from);
static ExpressionActionsSettings fromContext(const Context & from);
};
}

View File

@ -235,14 +235,10 @@ void ExpressionAnalyzer::analyzeAggregation()
getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false);
const auto & column_name = group_asts[i]->getColumnName();
const auto & index = temp_actions->getIndex();
auto it = index.find(column_name);
if (it == index.end())
const auto * node = temp_actions->tryFindInIndex(column_name);
if (!node)
throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER);
const auto & node = *it;
/// Constant expressions have non-null column pointer at this stage.
if (node->column && isColumnConst(*node->column))
{
@ -392,7 +388,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
auto temp_actions = std::make_shared<ActionsDAG>(columns_after_join);
getRootActions(left_in_operand, true, temp_actions);
if (temp_actions->getIndex().contains(left_in_operand->getColumnName()))
if (temp_actions->tryFindInIndex(left_in_operand->getColumnName()))
makeExplicitSet(func, *temp_actions, true, context,
settings.size_limits_for_set, prepared_sets);
}
@ -438,7 +434,8 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions)
for (const ASTFunction * node : aggregates())
{
AggregateDescription aggregate;
if (node->arguments) getRootActionsNoMakeSet(node->arguments, true, actions);
if (node->arguments)
getRootActionsNoMakeSet(node->arguments, true, actions);
aggregate.column_name = node->getColumnName();
@ -446,20 +443,18 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions)
aggregate.argument_names.resize(arguments.size());
DataTypes types(arguments.size());
const auto & index = actions->getIndex();
for (size_t i = 0; i < arguments.size(); ++i)
{
const std::string & name = arguments[i]->getColumnName();
auto it = index.find(name);
if (it == index.end())
const auto * dag_node = actions->tryFindInIndex(name);
if (!dag_node)
{
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Unknown identifier '{}' in aggregate function '{}'",
name, node->formatForErrorMessage());
}
types[i] = (*it)->result_type;
types[i] = dag_node->result_type;
aggregate.argument_names[i] = name;
}
@ -595,20 +590,19 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
= window_function.function_node->arguments->children;
window_function.argument_types.resize(arguments.size());
window_function.argument_names.resize(arguments.size());
const auto & index = actions->getIndex();
for (size_t i = 0; i < arguments.size(); ++i)
{
const std::string & name = arguments[i]->getColumnName();
const auto * node = actions->tryFindInIndex(name);
auto it = index.find(name);
if (it == index.end())
if (!node)
{
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Unknown identifier '{}' in window function '{}'",
name, window_function.function_node->formatForErrorMessage());
}
window_function.argument_types[i] = (*it)->result_type;
window_function.argument_types[i] = node->result_type;
window_function.argument_names[i] = name;
}
@ -682,7 +676,10 @@ ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAGPtr
{
/// Assign new names to columns, if needed.
if (result_source.first != result_source.second)
actions->addAlias(result_source.second, result_source.first);
{
const auto & node = actions->findInIndex(result_source.second);
actions->getIndex().push_back(&actions->addAlias(node, result_source.first));
}
/// Make ARRAY JOIN (replace arrays with their insides) for the columns in these new names.
result_columns.insert(result_source.first);
@ -761,8 +758,8 @@ static bool allowDictJoin(StoragePtr joined_storage, const Context & context, St
if (!dict)
return false;
dict_name = dict->resolvedDictionaryName();
auto dictionary = context.getExternalDictionariesLoader().getDictionary(dict_name);
dict_name = dict->dictionaryName();
auto dictionary = context.getExternalDictionariesLoader().getDictionary(dict_name, context);
if (!dictionary)
return false;
@ -842,7 +839,9 @@ JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin(
const ColumnsWithTypeAndName & right_sample_columns = subquery_for_join.sample_block.getColumnsWithTypeAndName();
bool need_convert = syntax->analyzed_join->applyJoinKeyConvert(left_sample_columns, right_sample_columns);
if (need_convert)
subquery_for_join.addJoinActions(std::make_shared<ExpressionActions>(syntax->analyzed_join->rightConvertingActions()));
subquery_for_join.addJoinActions(std::make_shared<ExpressionActions>(
syntax->analyzed_join->rightConvertingActions(),
ExpressionActionsSettings::fromContext(context)));
subquery_for_join.join = makeJoin(syntax->analyzed_join, subquery_for_join.sample_block, context);
@ -881,15 +880,10 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
auto & step = chain.lastStep(sourceColumns());
getRootActions(select_query->prewhere(), only_types, step.actions());
String prewhere_column_name = select_query->prewhere()->getColumnName();
step.required_output.push_back(prewhere_column_name);
step.can_remove_required_output.push_back(true);
step.addRequiredOutput(prewhere_column_name);
const auto & index = step.actions()->getIndex();
auto it = index.find(prewhere_column_name);
if (it == index.end())
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier: '{}'", prewhere_column_name);
auto filter_type = (*it)->result_type;
const auto & node = step.actions()->findInIndex(prewhere_column_name);
auto filter_type = node.result_type;
if (!filter_type->canBeUsedInBooleanContext())
throw Exception("Invalid type for filter in PREWHERE: " + filter_type->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
@ -898,8 +892,8 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
/// Remove unused source_columns from prewhere actions.
auto tmp_actions_dag = std::make_shared<ActionsDAG>(sourceColumns());
getRootActions(select_query->prewhere(), only_types, tmp_actions_dag);
tmp_actions_dag->removeUnusedActions({prewhere_column_name});
auto tmp_actions = std::make_shared<ExpressionActions>(tmp_actions_dag);
tmp_actions_dag->removeUnusedActions(NameSet{prewhere_column_name});
auto tmp_actions = std::make_shared<ExpressionActions>(tmp_actions_dag, ExpressionActionsSettings::fromContext(context));
auto required_columns = tmp_actions->getRequiredColumns();
NameSet required_source_columns(required_columns.begin(), required_columns.end());
required_source_columns.insert(first_action_names.begin(), first_action_names.end());
@ -909,10 +903,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
for (const auto & column : additional_required_columns)
{
if (required_source_columns.count(column))
{
step.required_output.push_back(column);
step.can_remove_required_output.push_back(true);
}
step.addRequiredOutput(column);
}
auto names = step.actions()->getNames();
@ -969,8 +960,7 @@ void SelectQueryExpressionAnalyzer::appendPreliminaryFilter(ExpressionActionsCha
// FIXME: assert(filter_info);
auto * expression_step = typeid_cast<ExpressionActionsChain::ExpressionActionsStep *>(&step);
expression_step->actions_dag = std::move(actions_dag);
step.required_output.push_back(std::move(column_name));
step.can_remove_required_output = {true};
step.addRequiredOutput(column_name);
chain.addStep();
}
@ -987,15 +977,10 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
getRootActions(select_query->where(), only_types, step.actions());
auto where_column_name = select_query->where()->getColumnName();
step.required_output.push_back(where_column_name);
step.can_remove_required_output = {true};
step.addRequiredOutput(where_column_name);
const auto & index = step.actions()->getIndex();
auto it = index.find(where_column_name);
if (it == index.end())
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier: '{}'", where_column_name);
auto filter_type = (*it)->result_type;
const auto & node = step.actions()->findInIndex(where_column_name);
auto filter_type = node.result_type;
if (!filter_type->canBeUsedInBooleanContext())
throw Exception("Invalid type for filter in WHERE: " + filter_type->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
@ -1016,7 +1001,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain
ASTs asts = select_query->groupBy()->children;
for (const auto & ast : asts)
{
step.required_output.emplace_back(ast->getColumnName());
step.addRequiredOutput(ast->getColumnName());
getRootActions(ast, only_types, step.actions());
}
@ -1026,7 +1011,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain
{
auto actions_dag = std::make_shared<ActionsDAG>(columns_after_join);
getRootActions(child, only_types, actions_dag);
group_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(actions_dag));
group_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(context)));
}
}
@ -1041,7 +1026,7 @@ void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(Expression
for (const auto & desc : aggregate_descriptions)
for (const auto & name : desc.argument_names)
step.required_output.emplace_back(name);
step.addRequiredOutput(name);
/// Collect aggregates removing duplicates by node.getColumnName()
/// It's not clear why we recollect aggregates (for query parts) while we're able to use previously collected ones (for entire query)
@ -1098,14 +1083,14 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
// (2b) Required function argument columns.
for (const auto & a : f.function_node->arguments->children)
{
step.required_output.push_back(a->getColumnName());
step.addRequiredOutput(a->getColumnName());
}
}
// (2a) Required PARTITION BY and ORDER BY columns.
for (const auto & c : w.full_sort_description)
{
step.required_output.push_back(c.column_name);
step.addRequiredOutput(c.column_name);
}
}
}
@ -1120,7 +1105,7 @@ bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain,
ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns);
getRootActionsForHaving(select_query->having(), only_types, step.actions());
step.required_output.push_back(select_query->having()->getColumnName());
step.addRequiredOutput(select_query->having()->getColumnName());
return true;
}
@ -1144,7 +1129,7 @@ void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain,
continue;
}
step.required_output.push_back(child->getColumnName());
step.addRequiredOutput(child->getColumnName());
}
}
@ -1172,7 +1157,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai
if (!ast || ast->children.empty())
throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
ASTPtr order_expression = ast->children.at(0);
step.required_output.push_back(order_expression->getColumnName());
step.addRequiredOutput(order_expression->getColumnName());
if (ast->with_fill)
with_fill = true;
@ -1184,7 +1169,8 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai
{
auto actions_dag = std::make_shared<ActionsDAG>(columns_after_join);
getRootActions(child, only_types, actions_dag);
order_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(actions_dag));
order_by_elements_actions.emplace_back(
std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(context)));
}
}
@ -1215,7 +1201,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain
NameSet aggregated_names;
for (const auto & column : aggregated_columns)
{
step.required_output.push_back(column.name);
step.addRequiredOutput(column.name);
aggregated_names.insert(column.name);
}
@ -1223,7 +1209,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain
{
auto child_name = child->getColumnName();
if (!aggregated_names.count(child_name))
step.required_output.push_back(std::move(child_name));
step.addRequiredOutput(std::move(child_name));
}
return true;
@ -1271,7 +1257,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActio
}
result_columns.emplace_back(source_name, result_name);
step.required_output.push_back(result_columns.back().second);
step.addRequiredOutput(result_columns.back().second);
}
}
@ -1285,7 +1271,7 @@ void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const
{
ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns());
getRootActions(expr, only_types, step.actions());
step.required_output.push_back(expr->getColumnName());
step.addRequiredOutput(expr->getColumnName());
}
@ -1325,18 +1311,26 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r
if (!(add_aliases && project_result))
{
NameSet name_set(result_names.begin(), result_names.end());
/// We will not delete the original columns.
for (const auto & column_name_type : sourceColumns())
{
if (name_set.count(column_name_type.name) == 0)
{
result_names.push_back(column_name_type.name);
name_set.insert(column_name_type.name);
}
}
actions_dag->removeUnusedActions(name_set);
}
actions_dag->removeUnusedActions(result_names);
return actions_dag;
}
ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result)
{
return std::make_shared<ExpressionActions>(getActionsDAG(add_aliases, project_result));
return std::make_shared<ExpressionActions>(getActionsDAG(add_aliases, project_result), ExpressionActionsSettings::fromContext(context));
}
@ -1345,7 +1339,7 @@ ExpressionActionsPtr ExpressionAnalyzer::getConstActions()
auto actions = std::make_shared<ActionsDAG>(NamesAndTypesList());
getRootActions(query, true, actions, true);
return std::make_shared<ExpressionActions>(actions);
return std::make_shared<ExpressionActions>(actions, ExpressionActionsSettings::fromContext(context));
}
ActionsDAGPtr SelectQueryExpressionAnalyzer::simpleSelectActions()
@ -1390,7 +1384,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
if (!finalized)
{
finalize(chain, where_step_num);
finalize(chain, where_step_num, query);
finalized = true;
}
@ -1436,7 +1430,9 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
Block before_prewhere_sample = source_header;
if (sanitizeBlock(before_prewhere_sample))
{
ExpressionActions(prewhere_info->prewhere_actions).execute(before_prewhere_sample);
ExpressionActions(
prewhere_info->prewhere_actions,
ExpressionActionsSettings::fromSettings(context.getSettingsRef())).execute(before_prewhere_sample);
auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName());
/// If the filter column is a constant, record it.
if (column_elem.column)
@ -1469,7 +1465,9 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
before_where_sample = source_header;
if (sanitizeBlock(before_where_sample))
{
ExpressionActions(before_where).execute(before_where_sample);
ExpressionActions(
before_where,
ExpressionActionsSettings::fromSettings(context.getSettingsRef())).execute(before_where_sample);
auto & column_elem = before_where_sample.getByName(query.where()->getColumnName());
/// If the filter column is a constant, record it.
if (column_elem.column)
@ -1511,6 +1509,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
settings.optimize_read_in_order
&& storage && query.orderBy()
&& !query_analyzer.hasAggregation()
&& !query_analyzer.hasWindow()
&& !query.final()
&& join_allow_read_in_order;
@ -1559,11 +1558,14 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
const auto * select_query = query_analyzer.getSelectQuery();
for (const auto & child : select_query->select()->children)
{
step.required_output.push_back(child->getColumnName());
step.addRequiredOutput(child->getColumnName());
}
}
selected_columns = chain.getLastStep().required_output;
selected_columns.clear();
selected_columns.reserve(chain.getLastStep().required_output.size());
for (const auto & it : chain.getLastStep().required_output)
selected_columns.emplace_back(it.first);
has_order_by = query.orderBy() != nullptr;
before_order_by = query_analyzer.appendOrderBy(
@ -1589,21 +1591,22 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
checkActions();
}
void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, size_t where_step_num)
void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, size_t where_step_num, const ASTSelectQuery & query)
{
size_t next_step_i = 0;
if (hasPrewhere())
{
const ExpressionActionsChain::Step & step = *chain.steps.at(next_step_i++);
prewhere_info->remove_prewhere_column = step.can_remove_required_output.at(0);
prewhere_info->prewhere_actions->projectInput(false);
NameSet columns_to_remove;
for (size_t i = 1; i < step.required_output.size(); ++i)
for (const auto & [name, can_remove] : step.required_output)
{
if (step.can_remove_required_output[i])
columns_to_remove.insert(step.required_output[i]);
if (name == prewhere_info->prewhere_column_name)
prewhere_info->remove_prewhere_column = can_remove;
else if (can_remove)
columns_to_remove.insert(name);
}
columns_to_remove_after_prewhere = std::move(columns_to_remove);
@ -1611,8 +1614,8 @@ void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, si
if (hasWhere())
{
const ExpressionActionsChain::Step & step = *chain.steps.at(where_step_num);
remove_where_filter = step.can_remove_required_output.at(0);
auto where_column_name = query.where()->getColumnName();
remove_where_filter = chain.steps.at(where_step_num)->required_output.find(where_column_name)->second;
}
}

View File

@ -247,7 +247,7 @@ struct ExpressionAnalysisResult
void removeExtraColumns() const;
void checkActions() const;
void finalize(const ExpressionActionsChain & chain, size_t where_step_num);
void finalize(const ExpressionActionsChain & chain, size_t where_step_num, const ASTSelectQuery & query);
};
/// SelectQuery specific ExpressionAnalyzer part.

View File

@ -596,8 +596,8 @@ static bool isCompilableFunction(const ActionsDAG::Node & node)
}
static LLVMFunction::CompileDAG getCompilableDAG(
ActionsDAG::Node * root,
std::vector<ActionsDAG::Node *> & children,
const ActionsDAG::Node * root,
ActionsDAG::NodeRawConstPtrs & children,
const std::unordered_set<const ActionsDAG::Node *> & used_in_result)
{
LLVMFunction::CompileDAG dag;
@ -605,7 +605,7 @@ static LLVMFunction::CompileDAG getCompilableDAG(
std::unordered_map<const ActionsDAG::Node *, size_t> positions;
struct Frame
{
ActionsDAG::Node * node;
const ActionsDAG::Node * node;
size_t next_child_to_visit = 0;
};
@ -621,7 +621,7 @@ static LLVMFunction::CompileDAG getCompilableDAG(
while (is_compilable_function && frame.next_child_to_visit < frame.node->children.size())
{
auto * child = frame.node->children[frame.next_child_to_visit];
const auto * child = frame.node->children[frame.next_child_to_visit];
if (positions.count(child))
++frame.next_child_to_visit;
@ -743,8 +743,7 @@ UInt128 LLVMFunction::CompileDAG::hash() const
static FunctionBasePtr compile(
const LLVMFunction::CompileDAG & dag,
size_t min_count_to_compile_expression,
const std::shared_ptr<CompiledExpressionCache> & compilation_cache)
size_t min_count_to_compile_expression)
{
static std::unordered_map<UInt128, UInt32, UInt128Hash> counter;
static std::mutex mutex;
@ -769,7 +768,7 @@ static FunctionBasePtr compile(
}
FunctionBasePtr fn;
if (compilation_cache)
if (auto * compilation_cache = CompiledExpressionCacheFactory::instance().tryGetCache())
{
std::tie(fn, std::ignore) = compilation_cache->getOrSet(hash_key, [&dag] ()
{
@ -790,7 +789,7 @@ static FunctionBasePtr compile(
return fn;
}
void ActionsDAG::compileFunctions()
void ActionsDAG::compileFunctions(size_t min_count_to_compile_expression)
{
struct Data
{
@ -815,7 +814,7 @@ void ActionsDAG::compileFunctions()
struct Frame
{
Node * node;
const Node * node;
size_t next_child_to_visit = 0;
};
@ -834,7 +833,7 @@ void ActionsDAG::compileFunctions()
while (frame.next_child_to_visit < frame.node->children.size())
{
auto * child = frame.node->children[frame.next_child_to_visit];
const auto * child = frame.node->children[frame.next_child_to_visit];
if (visited.count(child))
++frame.next_child_to_visit;
@ -871,10 +870,10 @@ void ActionsDAG::compileFunctions()
if (should_compile)
{
std::vector<Node *> new_children;
NodeRawConstPtrs new_children;
auto dag = getCompilableDAG(frame.node, new_children, used_in_result);
if (auto fn = compile(dag, settings.min_count_to_compile_expression, compilation_cache))
if (auto fn = compile(dag, min_count_to_compile_expression))
{
/// Replace current node to compilable function.
@ -883,12 +882,13 @@ void ActionsDAG::compileFunctions()
for (const auto * child : new_children)
arguments.emplace_back(child->column, child->result_type, child->result_name);
frame.node->type = ActionsDAG::ActionType::FUNCTION;
frame.node->function_base = fn;
frame.node->function = fn->prepare(arguments);
frame.node->children.swap(new_children);
frame.node->is_function_compiled = true;
frame.node->column = nullptr; /// Just in case.
auto * frame_node = const_cast<Node *>(frame.node);
frame_node->type = ActionsDAG::ActionType::FUNCTION;
frame_node->function_base = fn;
frame_node->function = fn->prepare(arguments);
frame_node->children.swap(new_children);
frame_node->is_function_compiled = true;
frame_node->column = nullptr; /// Just in case.
}
}
}
@ -900,6 +900,25 @@ void ActionsDAG::compileFunctions()
}
}
CompiledExpressionCacheFactory & CompiledExpressionCacheFactory::instance()
{
static CompiledExpressionCacheFactory factory;
return factory;
}
void CompiledExpressionCacheFactory::init(size_t cache_size)
{
if (cache)
throw Exception(ErrorCodes::LOGICAL_ERROR, "CompiledExpressionCache was already initialized");
cache = std::make_unique<CompiledExpressionCache>(cache_size);
}
CompiledExpressionCache * CompiledExpressionCacheFactory::tryGetCache()
{
return cache.get();
}
}
#endif

View File

@ -100,6 +100,18 @@ public:
using Base::Base;
};
class CompiledExpressionCacheFactory
{
private:
std::unique_ptr<CompiledExpressionCache> cache;
public:
static CompiledExpressionCacheFactory & instance();
void init(size_t cache_size);
CompiledExpressionCache * tryGetCache();
};
}
#endif

View File

@ -1,6 +1,10 @@
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/Context.h>
#include <Dictionaries/DictionaryFactory.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Databases/IDatabase.h>
#include <Storages/IStorage.h>
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
@ -13,10 +17,15 @@
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
/// Must not acquire Context lock in constructor to avoid possibility of deadlocks.
ExternalDictionariesLoader::ExternalDictionariesLoader(Context & context_)
ExternalDictionariesLoader::ExternalDictionariesLoader(Context & global_context_)
: ExternalLoader("external dictionary", &Poco::Logger::get("ExternalDictionariesLoader"))
, context(context_)
, global_context(global_context_)
{
setConfigSettings({"dictionary", "name", "database", "uuid"});
enableAsyncLoading(true);
@ -31,9 +40,88 @@ ExternalLoader::LoadablePtr ExternalDictionariesLoader::create(
/// For dictionaries from databases (created with DDL queries) we have to perform
/// additional checks, so we identify them here.
bool dictionary_from_database = !repository_name.empty();
return DictionaryFactory::instance().create(name, config, key_in_config, context, dictionary_from_database);
return DictionaryFactory::instance().create(name, config, key_in_config, global_context, dictionary_from_database);
}
ExternalDictionariesLoader::DictPtr ExternalDictionariesLoader::getDictionary(const std::string & dictionary_name, const Context & context) const
{
std::string resolved_dictionary_name = resolveDictionaryName(dictionary_name, context.getCurrentDatabase());
return std::static_pointer_cast<const IDictionaryBase>(load(resolved_dictionary_name));
}
ExternalDictionariesLoader::DictPtr ExternalDictionariesLoader::tryGetDictionary(const std::string & dictionary_name, const Context & context) const
{
std::string resolved_dictionary_name = resolveDictionaryName(dictionary_name, context.getCurrentDatabase());
return std::static_pointer_cast<const IDictionaryBase>(tryLoad(resolved_dictionary_name));
}
void ExternalDictionariesLoader::reloadDictionary(const std::string & dictionary_name, const Context & context) const
{
std::string resolved_dictionary_name = resolveDictionaryName(dictionary_name, context.getCurrentDatabase());
loadOrReload(resolved_dictionary_name);
}
DictionaryStructure ExternalDictionariesLoader::getDictionaryStructure(const std::string & dictionary_name, const Context & query_context) const
{
std::string resolved_name = resolveDictionaryName(dictionary_name, query_context.getCurrentDatabase());
auto load_result = getLoadResult(resolved_name);
if (!load_result.config)
throw Exception("Dictionary " + backQuote(dictionary_name) + " config not found", ErrorCodes::BAD_ARGUMENTS);
return ExternalDictionariesLoader::getDictionaryStructure(*load_result.config);
}
std::string ExternalDictionariesLoader::resolveDictionaryName(const std::string & dictionary_name, const std::string & current_database_name) const
{
std::string resolved_name = resolveDictionaryNameFromDatabaseCatalog(dictionary_name);
bool has_dictionary = has(resolved_name);
if (!has_dictionary)
{
/// If dictionary not found. And database was not implicitly specified
/// we can qualify dictionary name with current database name.
/// It will help if dictionary is created with DDL and is in current database.
if (dictionary_name.find('.') == std::string::npos)
{
String dictionary_name_with_database = current_database_name + '.' + dictionary_name;
resolved_name = resolveDictionaryNameFromDatabaseCatalog(dictionary_name_with_database);
has_dictionary = has(resolved_name);
}
}
if (!has_dictionary)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dictionary ({}) not found", backQuote(dictionary_name));
return resolved_name;
}
std::string ExternalDictionariesLoader::resolveDictionaryNameFromDatabaseCatalog(const std::string & name) const
{
/// If it's dictionary from Atomic database, then we need to convert qualified name to UUID.
/// Try to split name and get id from associated StorageDictionary.
/// If something went wrong, return name as is.
auto pos = name.find('.');
if (pos == std::string::npos || name.find('.', pos + 1) != std::string::npos)
return name;
std::string maybe_database_name = name.substr(0, pos);
std::string maybe_table_name = name.substr(pos + 1);
auto [db, table] = DatabaseCatalog::instance().tryGetDatabaseAndTable({maybe_database_name, maybe_table_name}, global_context);
if (!db)
return name;
assert(table);
if (db->getUUID() == UUIDHelpers::Nil)
return name;
if (table->getName() != "Dictionary")
return name;
return toString(table->getStorageID().uuid);
}
DictionaryStructure
ExternalDictionariesLoader::getDictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & key_in_config)

View File

@ -1,9 +1,11 @@
#pragma once
#include <Dictionaries/IDictionary.h>
#include <Interpreters/ExternalLoader.h>
#include <memory>
#include <Common/quoteString.h>
#include <Interpreters/ExternalLoader.h>
#include <Dictionaries/IDictionary.h>
namespace DB
{
class Context;
@ -16,19 +18,18 @@ public:
using DictPtr = std::shared_ptr<const IDictionaryBase>;
/// Dictionaries will be loaded immediately and then will be updated in separate thread, each 'reload_period' seconds.
ExternalDictionariesLoader(Context & context_);
explicit ExternalDictionariesLoader(Context & global_context_);
DictPtr getDictionary(const std::string & name) const
{
return std::static_pointer_cast<const IDictionaryBase>(load(name));
}
DictPtr getDictionary(const std::string & dictionary_name, const Context & context) const;
DictPtr tryGetDictionary(const std::string & name) const
{
return std::static_pointer_cast<const IDictionaryBase>(tryLoad(name));
}
DictPtr tryGetDictionary(const std::string & dictionary_name, const Context & context) const;
void reloadDictionary(const std::string & dictionary_name, const Context & context) const;
DictionaryStructure getDictionaryStructure(const std::string & dictionary_name, const Context & context) const;
static DictionaryStructure getDictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & key_in_config = "dictionary");
static DictionaryStructure getDictionaryStructure(const ObjectConfig & config);
static void resetAll();
@ -37,11 +38,16 @@ protected:
LoadablePtr create(const std::string & name, const Poco::Util::AbstractConfiguration & config,
const std::string & key_in_config, const std::string & repository_name) const override;
std::string resolveDictionaryName(const std::string & dictionary_name, const std::string & current_database_name) const;
/// Try convert qualified dictionary name to persistent UUID
std::string resolveDictionaryNameFromDatabaseCatalog(const std::string & name) const;
friend class StorageSystemDictionaries;
friend class DatabaseDictionary;
private:
Context & context;
Context & global_context;
};
}

View File

@ -625,6 +625,12 @@ public:
return collectLoadResults<ReturnType>(filter);
}
bool has(const String & name) const
{
std::lock_guard lock{mutex};
return infos.contains(name);
}
/// Starts reloading all the object which update time is earlier than now.
/// The function doesn't touch the objects which were never tried to load.
void reloadOutdated()
@ -1391,6 +1397,11 @@ ReturnType ExternalLoader::reloadAllTriedToLoad() const
return loadOrReload<ReturnType>([&names](const String & name) { return names.count(name); });
}
bool ExternalLoader::has(const String & name) const
{
return loading_dispatcher->has(name);
}
Strings ExternalLoader::getAllTriedToLoadNames() const
{
return loading_dispatcher->getAllTriedToLoadNames();

View File

@ -196,6 +196,9 @@ public:
template <typename ReturnType = Loadables, typename = std::enable_if_t<is_vector_load_result_type<ReturnType>, void>>
ReturnType reloadAllTriedToLoad() const;
/// Check if object with name exists in configuration
bool has(const String & name) const;
/// Reloads all config repositories.
void reloadConfig() const;

View File

@ -14,6 +14,8 @@
#include <Storages/StorageView.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
#include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
#include <Processors/printPipeline.h>
namespace DB
@ -251,7 +253,7 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl()
interpreter.buildQueryPlan(plan);
if (settings.optimize)
plan.optimize(QueryPlanOptimizationSettings(context.getSettingsRef()));
plan.optimize(QueryPlanOptimizationSettings::fromContext(context));
plan.explainPlan(buf, settings.query_plan_options);
}
@ -265,7 +267,9 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl()
InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), context, SelectQueryOptions());
interpreter.buildQueryPlan(plan);
auto pipeline = plan.buildQueryPipeline(QueryPlanOptimizationSettings(context.getSettingsRef()));
auto pipeline = plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(context),
BuildQueryPipelineSettings::fromContext(context));
if (settings.graph)
{

View File

@ -250,7 +250,7 @@ BlockIO InterpreterInsertQuery::execute()
}
}
res.pipeline = QueryPipeline::unitePipelines(std::move(pipelines), {});
res.pipeline = QueryPipeline::unitePipelines(std::move(pipelines), {}, ExpressionActionsSettings::fromContext(context));
}
}
@ -378,7 +378,7 @@ BlockIO InterpreterInsertQuery::execute()
res.pipeline.getHeader().getColumnsWithTypeAndName(),
header.getColumnsWithTypeAndName(),
ActionsDAG::MatchColumnsMode::Position);
auto actions = std::make_shared<ExpressionActions>(actions_dag);
auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(context));
res.pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
{

View File

@ -62,6 +62,7 @@
#include <Processors/QueryPlan/SettingQuotaAndLimitsStep.h>
#include <Processors/QueryPlan/TotalsHavingStep.h>
#include <Processors/QueryPlan/WindowStep.h>
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
#include <Processors/Sources/NullSource.h>
#include <Processors/Sources/SourceFromInputStream.h>
#include <Processors/Transforms/AggregatingTransform.h>
@ -142,12 +143,11 @@ String InterpreterSelectQuery::generateFilterActions(ActionsDAGPtr & actions, co
actions = analyzer.simpleSelectActions();
auto column_name = expr_list->children.at(0)->getColumnName();
actions->removeUnusedActions({column_name});
actions->removeUnusedActions(NameSet{column_name});
actions->projectInput(false);
ActionsDAG::Index index;
for (const auto * node : actions->getInputs())
actions->addNodeToIndex(node);
actions->getIndex().push_back(node);
return column_name;
}
@ -561,7 +561,9 @@ BlockIO InterpreterSelectQuery::execute()
buildQueryPlan(query_plan);
res.pipeline = std::move(*query_plan.buildQueryPipeline(QueryPlanOptimizationSettings(context->getSettingsRef())));
res.pipeline = std::move(*query_plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(*context),
BuildQueryPipelineSettings::fromContext(*context)));
return res;
}
@ -606,7 +608,9 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
if (analysis_result.prewhere_info)
{
ExpressionActions(analysis_result.prewhere_info->prewhere_actions).execute(header);
ExpressionActions(
analysis_result.prewhere_info->prewhere_actions,
ExpressionActionsSettings::fromContext(*context)).execute(header);
if (analysis_result.prewhere_info->remove_prewhere_column)
header.erase(analysis_result.prewhere_info->prewhere_column_name);
}
@ -1668,19 +1672,19 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
query_info.syntax_analyzer_result = syntax_analyzer_result;
query_info.sets = query_analyzer->getPreparedSets();
auto actions_settings = ExpressionActionsSettings::fromContext(*context);
if (prewhere_info)
{
query_info.prewhere_info = std::make_shared<PrewhereInfo>();
query_info.prewhere_info->prewhere_actions = std::make_shared<ExpressionActions>(prewhere_info->prewhere_actions);
query_info.prewhere_info->prewhere_actions = std::make_shared<ExpressionActions>(prewhere_info->prewhere_actions, actions_settings);
if (prewhere_info->row_level_filter_actions)
query_info.prewhere_info->row_level_filter = std::make_shared<ExpressionActions>(prewhere_info->row_level_filter_actions);
query_info.prewhere_info->row_level_filter = std::make_shared<ExpressionActions>(prewhere_info->row_level_filter_actions, actions_settings);
if (prewhere_info->alias_actions)
query_info.prewhere_info->alias_actions = std::make_shared<ExpressionActions>(prewhere_info->alias_actions);
query_info.prewhere_info->alias_actions = std::make_shared<ExpressionActions>(prewhere_info->alias_actions, actions_settings);
if (prewhere_info->remove_columns_actions)
query_info.prewhere_info->remove_columns_actions = std::make_shared<ExpressionActions>(prewhere_info->remove_columns_actions);
query_info.prewhere_info->remove_columns_actions = std::make_shared<ExpressionActions>(prewhere_info->remove_columns_actions, actions_settings);
query_info.prewhere_info->prewhere_column_name = prewhere_info->prewhere_column_name;
query_info.prewhere_info->remove_prewhere_column = prewhere_info->remove_prewhere_column;

View File

@ -11,6 +11,7 @@
#include <Processors/QueryPlan/UnionStep.h>
#include <Processors/QueryPlan/LimitStep.h>
#include <Processors/QueryPlan/OffsetStep.h>
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
#include <Common/typeid_cast.h>
#include <Interpreters/InDepthNodeVisitor.h>
@ -296,7 +297,9 @@ BlockIO InterpreterSelectWithUnionQuery::execute()
QueryPlan query_plan;
buildQueryPlan(query_plan);
auto pipeline = query_plan.buildQueryPipeline(QueryPlanOptimizationSettings(context->getSettingsRef()));
auto pipeline = query_plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(*context),
BuildQueryPipelineSettings::fromContext(*context));
res.pipeline = std::move(*pipeline);
res.pipeline.addInterpreterContext(context);

View File

@ -24,6 +24,7 @@
#include <Interpreters/MetricLog.h>
#include <Interpreters/AsynchronousMetricLog.h>
#include <Interpreters/OpenTelemetrySpanLog.h>
#include <Interpreters/ExpressionJIT.h>
#include <Access/ContextAccess.h>
#include <Access/AllowedClientHosts.h>
#include <Databases/IDatabase.h>
@ -270,14 +271,17 @@ BlockIO InterpreterSystemQuery::execute()
#if USE_EMBEDDED_COMPILER
case Type::DROP_COMPILED_EXPRESSION_CACHE:
context.checkAccess(AccessType::SYSTEM_DROP_COMPILED_EXPRESSION_CACHE);
system_context.dropCompiledExpressionCache();
if (auto * cache = CompiledExpressionCacheFactory::instance().tryGetCache())
cache->reset();
break;
#endif
case Type::RELOAD_DICTIONARY:
{
context.checkAccess(AccessType::SYSTEM_RELOAD_DICTIONARY);
system_context.getExternalDictionariesLoader().loadOrReload(
DatabaseCatalog::instance().resolveDictionaryName(query.target_dictionary));
auto & external_dictionaries_loader = system_context.getExternalDictionariesLoader();
external_dictionaries_loader.reloadDictionary(query.target_dictionary, context);
ExternalDictionariesLoader::resetAll();
break;
}

View File

@ -673,16 +673,24 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> &
for (const auto & kv : stage.column_to_updated)
stage.analyzer->appendExpression(actions_chain, kv.second, dry_run);
auto & actions = actions_chain.getLastStep().actions();
for (const auto & kv : stage.column_to_updated)
{
actions_chain.getLastStep().actions()->addAlias(
kv.second->getColumnName(), kv.first, /* can_replace = */ true);
auto column_name = kv.second->getColumnName();
const auto & dag_node = actions->findInIndex(column_name);
const auto & alias = actions->addAlias(dag_node, kv.first);
actions->addOrReplaceInIndex(alias);
}
}
/// Remove all intermediate columns.
actions_chain.addStep();
actions_chain.getLastStep().required_output.assign(stage.output_columns.begin(), stage.output_columns.end());
actions_chain.getLastStep().required_output.clear();
ActionsDAG::NodeRawConstPtrs new_index;
for (const auto & name : stage.output_columns)
actions_chain.getLastStep().addRequiredOutput(name);
actions_chain.getLastActions();
actions_chain.finalize();
@ -755,7 +763,10 @@ QueryPipelinePtr MutationsInterpreter::addStreamsForLaterStages(const std::vecto
}
}
auto pipeline = plan.buildQueryPipeline(QueryPlanOptimizationSettings(context.getSettingsRef()));
auto pipeline = plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(context),
BuildQueryPipelineSettings::fromContext(context));
pipeline->addSimpleTransform([&](const Block & header)
{
return std::make_shared<MaterializingTransform>(header);

View File

@ -89,7 +89,7 @@ struct StorageID
const String & config_prefix);
/// If dictionary has UUID, then use it as dictionary name in ExternalLoader to allow dictionary renaming.
/// DatabaseCatalog::resolveDictionaryName(...) should be used to access such dictionaries by name.
/// ExternalDictnariesLoader::resolveDictionaryName(...) should be used to access such dictionaries by name.
String getInternalDictionaryName() const;
private:

View File

@ -51,7 +51,7 @@ void SubqueryForSet::addJoinActions(ExpressionActionsPtr actions)
auto new_dag = ActionsDAG::merge(
std::move(*joined_block_actions->getActionsDAG().clone()),
std::move(*actions->getActionsDAG().clone()));
joined_block_actions = std::make_shared<ExpressionActions>(new_dag);
joined_block_actions = std::make_shared<ExpressionActions>(new_dag, actions->getSettings());
}
}

Some files were not shown because too many files have changed in this diff Show More