Merge branch 'master' of https://github.com/ClickHouse/ClickHouse into nanodbc

This commit is contained in:
kssenii 2021-03-23 05:52:21 +00:00
commit 167c9d3aeb
208 changed files with 2452 additions and 1505 deletions

View File

@ -8,7 +8,7 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Tutorial](https://clickhouse.tech/docs/en/getting_started/tutorial/) shows how to set up and query small ClickHouse cluster.
* [Documentation](https://clickhouse.tech/docs/en/) provides more in-depth information.
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-ly9m4w1x-6j7x5Ts_pQZqrctAbRZ3cg) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time.
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-nwwakmk4-xOJ6cdy0sJC3It8j348~IA) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time.
* [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events.
* [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation.
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.

2
contrib/libpq vendored

@ -1 +1 @@
Subproject commit 1f9c286dba60809edb64e384d6727d80d269b6cf
Subproject commit c7624588ddd84f153dd5990e81b886e4568bddde

View File

@ -1,4 +1,5 @@
usr/bin/clickhouse
usr/bin/clickhouse-odbc-bridge
usr/bin/clickhouse-extract-from-config
usr/share/bash-completion/completions
etc/security/limits.d/clickhouse.conf

View File

@ -760,7 +760,7 @@ create view test_times_view as
total_client_time,
queries,
query_max,
real / queries avg_real_per_query,
real / if(queries > 0, queries, 1) avg_real_per_query,
query_min,
runs
from test_time
@ -781,7 +781,7 @@ create view test_times_view_total as
sum(total_client_time),
sum(queries),
max(query_max),
sum(real) / sum(queries) avg_real_per_query,
sum(real) / if(sum(queries) > 0, sum(queries), 1) avg_real_per_query,
min(query_min),
-- Totaling the number of runs doesn't make sense, but use the max so
-- that the reporting script doesn't complain about queries being too

View File

@ -263,8 +263,17 @@ for query_index in queries_to_run:
for conn_index, c in enumerate(all_connections):
try:
prewarm_id = f'{query_prefix}.prewarm0'
# Will also detect too long queries during warmup stage
res = c.execute(q, query_id = prewarm_id, settings = {'max_execution_time': 10})
try:
# Will also detect too long queries during warmup stage
res = c.execute(q, query_id = prewarm_id, settings = {'max_execution_time': 10})
except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier.
e.args = (prewarm_id, *e.args)
e.message = prewarm_id + ': ' + e.message
raise
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
except KeyboardInterrupt:
raise
@ -312,7 +321,7 @@ for query_index in queries_to_run:
for conn_index, c in enumerate(this_query_connections):
try:
res = c.execute(q, query_id = run_id)
except Exception as e:
except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier.
e.args = (run_id, *e.args)
e.message = run_id + ': ' + e.message
@ -389,7 +398,7 @@ for query_index in queries_to_run:
try:
res = c.execute(q, query_id = run_id, settings = {'query_profiler_real_time_period_ns': 10000000})
print(f'profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
except Exception as e:
except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier.
e.args = (run_id, *e.args)
e.message = run_id + ': ' + e.message

View File

@ -16,6 +16,14 @@ while true; do
done
set -e
echo "Configure to use Yandex dockerhub-proxy"
cat > /etc/docker/daemon.json << EOF
{
"insecure-registries": ["dockerhub-proxy.sas.yp-c.yandex.net:5000"],
"registry-mirrors": ["dockerhub-proxy.sas.yp-c.yandex.net:5000"]
}
EOF
echo "Start tests"
export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/clickhouse
export CLICKHOUSE_TESTS_CLIENT_BIN_PATH=/clickhouse

View File

@ -6,7 +6,7 @@ toc_title: Atomic
# Atomic {#atomic}
It is supports non-blocking `DROP` and `RENAME TABLE` queries and atomic `EXCHANGE TABLES t1 AND t2` queries. Atomic database engine is used by default.
It supports non-blocking `DROP` and `RENAME TABLE` queries and atomic `EXCHANGE TABLES t1 AND t2` queries. `Atomic` database engine is used by default.
## Creating a Database {#creating-a-database}
@ -14,4 +14,4 @@ It is supports non-blocking `DROP` and `RENAME TABLE` queries and atomic `EXCHAN
CREATE DATABASE test ENGINE = Atomic;
```
[Original article](https://clickhouse.tech/docs/en/engines/database_engines/atomic/) <!--hide-->
[Original article](https://clickhouse.tech/docs/en/engines/database-engines/atomic/) <!--hide-->

View File

@ -44,9 +44,15 @@ Columns:
- `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of rows in a result of the `SELECT` query, or a number of rows in the `INSERT` query.
- `result_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — RAM volume in bytes used to store a query result.
- `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Memory consumption by the query.
- `current_database` ([String](../../sql-reference/data-types/string.md)) — Name of the current database.
- `query` ([String](../../sql-reference/data-types/string.md)) — Query string.
- `exception` ([String](../../sql-reference/data-types/string.md)) — Exception message.
- `normalized_query_hash` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Identical hash value without the values of literals for similar queries.
- `query_kind` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Type of the query.
- `databases` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the databases present in the query.
- `tables` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the tables present in the query.
- `columns` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the columns present in the query.
- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code of an exception.
- `exception` ([String](../../sql-reference/data-types/string.md)) — Exception message.
- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [Stack trace](https://en.wikipedia.org/wiki/Stack_trace). An empty string, if the query was completed successfully.
- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Query type. Possible values:
- 1 — Query was initiated by the client.
@ -73,69 +79,98 @@ Columns:
- 0 — The query was launched from the TCP interface.
- 1 — `GET` method was used.
- 2 — `POST` method was used.
- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — The `UserAgent` header passed in the HTTP request.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The “quota key” specified in the [quotas](../../operations/quotas.md) setting (see `keyed`).
- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — HTTP header `UserAgent` passed in the HTTP query.
- `http_referer` ([String](../../sql-reference/data-types/string.md)) — HTTP header `Referer` passed in the HTTP query (contains an absolute or partial address of the page making the query).
- `forwarded_for` ([String](../../sql-reference/data-types/string.md)) — HTTP header `X-Forwarded-For` passed in the HTTP query.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The `quota key` specified in the [quotas](../../operations/quotas.md) setting (see `keyed`).
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision.
- `thread_numbers` ([Array(UInt32)](../../sql-reference/data-types/array.md)) — Number of threads that are participating in query execution.
- `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#settings-max_query_size). An empty string if it is not defined.
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution.
- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events)
- `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Values of metrics that are listed in the `ProfileEvents.Names` column.
- `Settings.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1.
- `Settings.Values` ([Array(String)](../../sql-reference/data-types/array.md)) — Values of settings that are listed in the `Settings.Names` column.
- `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions`, which were used during query execution.
- `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions combinators`, which were used during query execution.
- `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `database engines`, which were used during query execution.
- `used_data_type_families` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `data type families`, which were used during query execution.
- `used_dictionaries` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `dictionaries`, which were used during query execution.
- `used_formats` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `formats`, which were used during query execution.
- `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `functions`, which were used during query execution.
- `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution.
- `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `table functions`, which were used during query execution.
**Example**
``` sql
SELECT * FROM system.query_log LIMIT 1 \G
SELECT * FROM system.query_log WHERE type = 'QueryFinish' AND (query LIKE '%toDate(\'2000-12-05\')%') ORDER BY query_start_time DESC LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
type: QueryStart
event_date: 2020-09-11
event_time: 2020-09-11 10:08:17
event_time_microseconds: 2020-09-11 10:08:17.063321
query_start_time: 2020-09-11 10:08:17
query_start_time_microseconds: 2020-09-11 10:08:17.063321
query_duration_ms: 0
read_rows: 0
read_bytes: 0
written_rows: 0
written_bytes: 0
result_rows: 0
result_bytes: 0
memory_usage: 0
current_database: default
query: INSERT INTO test1 VALUES
exception_code: 0
exception:
stack_trace:
is_initial_query: 1
user: default
query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
address: ::ffff:127.0.0.1
port: 33452
initial_user: default
initial_query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
initial_address: ::ffff:127.0.0.1
initial_port: 33452
interface: 1
os_user: bharatnc
client_hostname: tower
client_name: ClickHouse
client_revision: 54437
client_version_major: 20
client_version_minor: 7
client_version_patch: 2
http_method: 0
http_user_agent:
quota_key:
revision: 54440
thread_ids: []
ProfileEvents.Names: []
ProfileEvents.Values: []
Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage','allow_introspection_functions']
Settings.Values: ['0','random','1','10000000000','1']
type: QueryFinish
event_date: 2021-03-18
event_time: 2021-03-18 20:54:18
event_time_microseconds: 2021-03-18 20:54:18.676686
query_start_time: 2021-03-18 20:54:18
query_start_time_microseconds: 2021-03-18 20:54:18.673934
query_duration_ms: 2
read_rows: 100
read_bytes: 800
written_rows: 0
written_bytes: 0
result_rows: 2
result_bytes: 4858
memory_usage: 0
current_database: default
query: SELECT uniqArray([1, 1, 2]), SUBSTRING('Hello, world', 7, 5), flatten([[[BIT_AND(123)]], [[mod(3, 2)], [CAST('1' AS INTEGER)]]]), week(toDate('2000-12-05')), CAST(arrayJoin([NULL, NULL]) AS Nullable(TEXT)), avgOrDefaultIf(number, number % 2), sumOrNull(number), toTypeName(sumOrNull(number)), countIf(toDate('2000-12-05') + number as d, toDayOfYear(d) % 2) FROM numbers(100)
normalized_query_hash: 17858008518552525706
query_kind: Select
databases: ['_table_function']
tables: ['_table_function.numbers']
columns: ['_table_function.numbers.number']
exception_code: 0
exception:
stack_trace:
is_initial_query: 1
user: default
query_id: 58f3d392-0fa0-4663-ae1d-29917a1a9c9c
address: ::ffff:127.0.0.1
port: 37486
initial_user: default
initial_query_id: 58f3d392-0fa0-4663-ae1d-29917a1a9c9c
initial_address: ::ffff:127.0.0.1
initial_port: 37486
interface: 1
os_user: sevirov
client_hostname: clickhouse.ru-central1.internal
client_name: ClickHouse
client_revision: 54447
client_version_major: 21
client_version_minor: 4
client_version_patch: 1
http_method: 0
http_user_agent:
http_referer:
forwarded_for:
quota_key:
revision: 54449
log_comment:
thread_ids: [587,11939]
ProfileEvents.Names: ['Query','SelectQuery','ReadCompressedBytes','CompressedReadBufferBlocks','CompressedReadBufferBytes','IOBufferAllocs','IOBufferAllocBytes','ArenaAllocChunks','ArenaAllocBytes','FunctionExecute','TableFunctionExecute','NetworkSendElapsedMicroseconds','SelectedRows','SelectedBytes','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SystemTimeMicroseconds','SoftPageFaults','OSCPUVirtualTimeMicroseconds','OSWriteBytes']
ProfileEvents.Values: [1,1,36,1,10,2,1048680,1,4096,36,1,110,100,800,77,1,3137,1476,1101,8,2577,8192]
Settings.Names: ['load_balancing','max_memory_usage']
Settings.Values: ['random','10000000000']
used_aggregate_functions: ['groupBitAnd','avg','sum','count','uniq']
used_aggregate_function_combinators: ['OrDefault','If','OrNull','Array']
used_database_engines: []
used_data_type_families: ['String','Array','Int32','Nullable']
used_dictionaries: []
used_formats: []
used_functions: ['toWeek','CAST','arrayFlatten','toTypeName','toDayOfYear','addDays','array','toDate','modulo','substring','plus']
used_storages: []
used_table_functions: ['numbers']
```
**See Also**
@ -143,4 +178,3 @@ Settings.Values: ['0','random','1','10000000000','1']
- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread.
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/query_log) <!--hide-->

View File

@ -853,7 +853,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %C | year divided by 100 and truncated to integer (00-99) | 20 |
| %d | day of the month, zero-padded (01-31) | 02 |
| %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/18 |
| %e | day of the month, space-padded ( 1-31) | 2 |
| %e | day of the month, space-padded ( 1-31) | &nbsp; 2 |
| %F | short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2018-01-02 |
| %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 |
| %g | two-digit year format, aligned to ISO 8601, abbreviated from four-digit notation | 18 |

View File

@ -47,6 +47,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name AS table_function()
Creates a table with the same result as that of the [table function](../../../sql-reference/table-functions/index.md#table-functions) specified. The created table will also work in the same way as the corresponding table function that was specified.
### From SELECT query {#from-select-query}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
```

View File

@ -62,7 +62,7 @@ Note that materialized view is influenced by [optimize_on_insert](../../../opera
Views look the same as normal tables. For example, they are listed in the result of the `SHOW TABLES` query.
There isnt a separate query for deleting views. To delete a view, use [DROP TABLE](../../../sql-reference/statements/drop.md).
To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop-view). Although `DROP TABLE` works for VIEWs as well.
## Live View (Experimental) {#live-view}

View File

@ -0,0 +1,17 @@
---
toc_priority: 32
toc_title: Atomic
---
# Atomic {#atomic}
Поддерживает неблокирующие запросы `DROP` и `RENAME TABLE` и запросы `EXCHANGE TABLES t1 AND t2`. Движок `Atomic` используется по умолчанию.
## Создание БД {#creating-a-database}
```sql
CREATE DATABASE test ENGINE = Atomic;
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/database-engines/atomic/) <!--hide-->

View File

@ -8,7 +8,7 @@ toc_title: "Введение"
Движки баз данных обеспечивают работу с таблицами.
По умолчанию ClickHouse использует собственный движок баз данных, который поддерживает конфигурируемые [движки таблиц](../../engines/database-engines/index.md) и [диалект SQL](../../engines/database-engines/index.md).
По умолчанию ClickHouse использует движок [Atomic](../../engines/database-engines/atomic.md). Он поддерживает конфигурируемые [движки таблиц](../../engines/table-engines/index.md) и [диалект SQL](../../sql-reference/syntax.md).
Также можно использовать следующие движки баз данных:

View File

@ -103,7 +103,11 @@ toc_title: "Визуальные интерфейсы от сторонних р
[xeus-clickhouse](https://github.com/wangfenjin/xeus-clickhouse) — это ядро Jupyter для ClickHouse, которое поддерживает запрос ClickHouse-данных с использованием SQL в Jupyter.
## Коммерческие {#kommercheskie}
### MindsDB Studio {#mindsdb}
[MindsDB](https://mindsdb.com/) — это продукт с открытым исходным кодом, реализующий слой искусственного интеллекта (Artificial Intelligence, AI) для различных СУБД, в том числе для ClickHouse. MindsDB облегчает процессы создания, обучения и развертывания современных моделей машинного обучения. Графический пользовательский интерфейс MindsDB Studio позволяет обучать новые модели на основе данных в БД, интерпретировать сделанные моделями прогнозы, выявлять потенциальные ошибки в данных, визуализировать и оценивать достоверность моделей с помощью функции Explainable AI, так чтобы вы могли быстрее адаптировать и настраивать ваши модели машинного обучения.
## Коммерческие {#commercial}
### DataGrip {#datagrip}

View File

@ -69,6 +69,9 @@ toc_title: "Библиотеки для интеграции от сторонн
- Гео
- [MaxMind](https://dev.maxmind.com/geoip/)
- [clickhouse-maxmind-geoip](https://github.com/AlexeyKupershtokh/clickhouse-maxmind-geoip)
- AutoML
- [MindsDB](https://mindsdb.com/)
- [MindsDB](https://github.com/mindsdb/mindsdb) - Слой предиктивной аналитики и искусственного интеллекта для СУБД ClickHouse.
## Экосистемы вокруг языков программирования {#ekosistemy-vokrug-iazykov-programmirovaniia}

View File

@ -27,7 +27,7 @@ toc_title: "Системные таблицы"
- `database` — база данных, к которой принадлежит системная таблица. Эта опция на текущий момент устарела. Все системные таблицы находятся в базе данных `system`.
- `table` — таблица для добавления данных.
- `partition_by` — [ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md).
- `ttl` — [время жизни](../../sql-reference/statements/alter/ttl.md) таблицы.
- `ttl` — [время жизни](../../sql-reference/statements/alter/ttl.md) записей в таблице.
- `flush_interval_milliseconds` — интервал сброса данных на диск, в миллисекундах.
- `engine` — полное имя движка (начиная с `ENGINE =` ) с параметрами. Эта опция противоречит `partition_by` и `ttl`. Если указать оба параметра вместе, сервер вернет ошибку и завершит работу.

View File

@ -44,9 +44,15 @@ ClickHouse не удаляет данные из таблица автомати
- `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество строк в результате запроса `SELECT` или количество строк в запросе `INSERT`.
- `result_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — объём RAM в байтах, использованный для хранения результата запроса.
- `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — потребление RAM запросом.
- `current_database` ([String](../../sql-reference/data-types/string.md)) — имя текущей базы данных.
- `query` ([String](../../sql-reference/data-types/string.md)) — текст запроса.
- `normalized_query_hash` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — идентичная хэш-сумма без значений литералов для аналогичных запросов.
- `query_kind` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — тип запроса.
- `databases` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена баз данных, присутствующих в запросе.
- `tables` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена таблиц, присутствующих в запросе.
- `columns` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена столбцов, присутствующих в запросе.
- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — код исключения.
- `exception` ([String](../../sql-reference/data-types/string.md)) — сообщение исключения, если запрос завершился по исключению.
- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — код исключения.
- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [stack trace](https://en.wikipedia.org/wiki/Stack_trace). Пустая строка, если запрос успешно завершен.
- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md)) — вид запроса. Возможные значения:
- 1 — запрос был инициирован клиентом.
@ -74,72 +80,101 @@ ClickHouse не удаляет данные из таблица автомати
- 1 — `GET`.
- 2 — `POST`.
- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — HTTP заголовок `UserAgent`.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — «ключ квоты» из настроек [квот](quotas.md) (см. `keyed`).
- `http_referer` ([String](../../sql-reference/data-types/string.md)) — HTTP заголовок `Referer` (содержит полный или частичный адрес страницы, с которой был выполнен запрос).
- `forwarded_for` ([String](../../sql-reference/data-types/string.md)) — HTTP заголовок `X-Forwarded-For`.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — `ключ квоты` из настроек [квот](quotas.md) (см. `keyed`).
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ревизия ClickHouse.
- `thread_numbers` ([Array(UInt32)](../../sql-reference/data-types/array.md)) — количество потоков, участвующих в обработке запросов.
- `log_comment` ([String](../../sql-reference/data-types/string.md)) — комментарий к записи в логе. Представляет собой произвольную строку, длина которой должна быть не больше, чем [max_query_size](../../operations/settings/settings.md#settings-max_query_size). Если нет комментария, то пустая строка.
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — идентификаторы потоков, участвующих в обработке запросов.
- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — счетчики для изменения различных метрик. Описание метрик можно получить из таблицы [system.events](#system_tables-events)(#system_tables-events
- `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — метрики, перечисленные в столбце `ProfileEvents.Names`.
- `Settings.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — имена настроек, которые меняются, когда клиент выполняет запрос. Чтобы разрешить логирование изменений настроек, установите параметр `log_query_settings` равным 1.
- `Settings.Values` ([Array(String)](../../sql-reference/data-types/array.md)) — значения настроек, которые перечислены в столбце `Settings.Names`.
- `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `агрегатных функций`, использованных при выполнении запроса.
- `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `комбинаторов агрегатных функций`, использованных при выполнении запроса.
- `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `движков баз данных`, использованных при выполнении запроса.
- `used_data_type_families` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `семейств типов данных`, использованных при выполнении запроса.
- `used_dictionaries` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `источников словарей`, использованных при выполнении запроса.
- `used_formats` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `форматов`, использованных при выполнении запроса.
- `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `функций`, использованных при выполнении запроса.
- `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `движков таблиц`, использованных при выполнении запроса.
- `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `табличных функций`, использованных при выполнении запроса.
**Пример**
``` sql
SELECT * FROM system.query_log LIMIT 1 \G
SELECT * FROM system.query_log WHERE type = 'QueryFinish' AND (query LIKE '%toDate(\'2000-12-05\')%') ORDER BY query_start_time DESC LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
type: QueryStart
event_date: 2020-09-11
event_time: 2020-09-11 10:08:17
event_time_microseconds: 2020-09-11 10:08:17.063321
query_start_time: 2020-09-11 10:08:17
query_start_time_microseconds: 2020-09-11 10:08:17.063321
query_duration_ms: 0
read_rows: 0
read_bytes: 0
written_rows: 0
written_bytes: 0
result_rows: 0
result_bytes: 0
memory_usage: 0
current_database: default
query: INSERT INTO test1 VALUES
exception_code: 0
exception:
stack_trace:
is_initial_query: 1
user: default
query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
address: ::ffff:127.0.0.1
port: 33452
initial_user: default
initial_query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
initial_address: ::ffff:127.0.0.1
initial_port: 33452
interface: 1
os_user: bharatnc
client_hostname: tower
client_name: ClickHouse
client_revision: 54437
client_version_major: 20
client_version_minor: 7
client_version_patch: 2
http_method: 0
http_user_agent:
quota_key:
revision: 54440
thread_ids: []
ProfileEvents.Names: []
ProfileEvents.Values: []
Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage','allow_introspection_functions']
Settings.Values: ['0','random','1','10000000000','1']
type: QueryFinish
event_date: 2021-03-18
event_time: 2021-03-18 20:54:18
event_time_microseconds: 2021-03-18 20:54:18.676686
query_start_time: 2021-03-18 20:54:18
query_start_time_microseconds: 2021-03-18 20:54:18.673934
query_duration_ms: 2
read_rows: 100
read_bytes: 800
written_rows: 0
written_bytes: 0
result_rows: 2
result_bytes: 4858
memory_usage: 0
current_database: default
query: SELECT uniqArray([1, 1, 2]), SUBSTRING('Hello, world', 7, 5), flatten([[[BIT_AND(123)]], [[mod(3, 2)], [CAST('1' AS INTEGER)]]]), week(toDate('2000-12-05')), CAST(arrayJoin([NULL, NULL]) AS Nullable(TEXT)), avgOrDefaultIf(number, number % 2), sumOrNull(number), toTypeName(sumOrNull(number)), countIf(toDate('2000-12-05') + number as d, toDayOfYear(d) % 2) FROM numbers(100)
normalized_query_hash: 17858008518552525706
query_kind: Select
databases: ['_table_function']
tables: ['_table_function.numbers']
columns: ['_table_function.numbers.number']
exception_code: 0
exception:
stack_trace:
is_initial_query: 1
user: default
query_id: 58f3d392-0fa0-4663-ae1d-29917a1a9c9c
address: ::ffff:127.0.0.1
port: 37486
initial_user: default
initial_query_id: 58f3d392-0fa0-4663-ae1d-29917a1a9c9c
initial_address: ::ffff:127.0.0.1
initial_port: 37486
interface: 1
os_user: sevirov
client_hostname: clickhouse.ru-central1.internal
client_name: ClickHouse
client_revision: 54447
client_version_major: 21
client_version_minor: 4
client_version_patch: 1
http_method: 0
http_user_agent:
http_referer:
forwarded_for:
quota_key:
revision: 54449
log_comment:
thread_ids: [587,11939]
ProfileEvents.Names: ['Query','SelectQuery','ReadCompressedBytes','CompressedReadBufferBlocks','CompressedReadBufferBytes','IOBufferAllocs','IOBufferAllocBytes','ArenaAllocChunks','ArenaAllocBytes','FunctionExecute','TableFunctionExecute','NetworkSendElapsedMicroseconds','SelectedRows','SelectedBytes','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SystemTimeMicroseconds','SoftPageFaults','OSCPUVirtualTimeMicroseconds','OSWriteBytes']
ProfileEvents.Values: [1,1,36,1,10,2,1048680,1,4096,36,1,110,100,800,77,1,3137,1476,1101,8,2577,8192]
Settings.Names: ['load_balancing','max_memory_usage']
Settings.Values: ['random','10000000000']
used_aggregate_functions: ['groupBitAnd','avg','sum','count','uniq']
used_aggregate_function_combinators: ['OrDefault','If','OrNull','Array']
used_database_engines: []
used_data_type_families: ['String','Array','Int32','Nullable']
used_dictionaries: []
used_formats: []
used_functions: ['toWeek','CAST','arrayFlatten','toTypeName','toDayOfYear','addDays','array','toDate','modulo','substring','plus']
used_storages: []
used_table_functions: ['numbers']
```
**Смотрите также**
- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — в этой таблице содержится информация о цепочке каждого выполненного запроса.
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/query_log) <!--hide-->

View File

@ -866,7 +866,7 @@ formatDateTime(Time, Format\[, Timezone\])
| %C | номер года, поделённый на 100 (00-99) | 20 |
| %d | день месяца, с ведущим нулём (01-31) | 02 |
| %D | короткая запись %m/%d/%y | 01/02/18 |
| %e | день месяца, с ведущим пробелом ( 1-31) | 2 |
| %e | день месяца, с ведущим пробелом ( 1-31) | &nbsp; 2 |
| %F | короткая запись %Y-%m-%d | 2018-01-02 |
| %G | четырехзначный формат вывода ISO-года, который основывается на особом подсчете номера недели согласно [стандарту ISO 8601](https://ru.wikipedia.org/wiki/ISO_8601), обычно используется вместе с %V | 2018 |
| %g | двузначный формат вывода года по стандарту ISO 8601 | 18 |
@ -877,6 +877,7 @@ formatDateTime(Time, Format\[, Timezone\])
| %M | минуты, с ведущим нулём (00-59) | 33 |
| %n | символ переноса строки () | |
| %p | обозначения AM или PM | PM |
| %Q | квартал (1-4) | 1 |
| %R | короткая запись %H:%M | 22:33 |
| %S | секунды, с ведущими нулями (00-59) | 44 |
| %t | символ табуляции () | |

View File

@ -5,7 +5,11 @@ toc_title: "Таблица"
# CREATE TABLE {#create-table-query}
Запрос `CREATE TABLE` может иметь несколько форм.
Запрос `CREATE TABLE` может иметь несколько форм, которые используются в зависимости от контекста и решаемых задач.
По умолчанию таблицы создаются на текущем сервере. Распределенные DDL запросы создаются с помощью секции `ON CLUSTER`, которая [описана отдельно](../../../sql-reference/distributed-ddl.md).
## Варианты синтаксиса {#syntax-forms}
### С описанием структуры {#with-explicit-schema}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
@ -23,17 +27,23 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Также могут быть указаны выражения для значений по умолчанию - смотрите ниже.
При необходимости можно указать [первичный ключ](#primary-key) с одним или несколькими ключевыми выражениями.
### Со структурой, аналогичной другой таблице {#with-a-schema-similar-to-other-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name AS [db2.]name2 [ENGINE = engine]
```
Создаёт таблицу с такой же структурой, как другая таблица. Можно указать другой движок для таблицы. Если движок не указан, то будет выбран такой же движок, как у таблицы `db2.name2`.
### Из табличной функции {#from-a-table-function}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name AS table_function()
```
Создаёт таблицу с такой же структурой и данными, как результат соответствующей табличной функции. Созданная таблица будет работать так же, как и указанная табличная функция.
Создаёт таблицу с такой же структурой и данными, как результат соответствующей табличной функцией.
### Из запроса SELECT {#from-select-query}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
@ -53,7 +63,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
Смотрите также настройку [data_type_default_nullable](../../../operations/settings/settings.md#data_type_default_nullable).
### Значения по умолчанию {#create-default-values}
## Значения по умолчанию {#create-default-values}
В описании столбца, может быть указано выражение для значения по умолчанию, одного из следующих видов:
`DEFAULT expr`, `MATERIALIZED expr`, `ALIAS expr`.
@ -67,16 +77,22 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
В качестве выражения для умолчания, может быть указано произвольное выражение от констант и столбцов таблицы. При создании и изменении структуры таблицы, проверяется, что выражения не содержат циклов. При INSERT-е проверяется разрешимость выражений - что все столбцы, из которых их можно вычислить, переданы.
### DEFAULT {#default}
`DEFAULT expr`
Обычное значение по умолчанию. Если в запросе INSERT не указан соответствующий столбец, то он будет заполнен путём вычисления соответствующего выражения.
### MATERIALIZED {#materialized}
`MATERIALIZED expr`
Материализованное выражение. Такой столбец не может быть указан при INSERT, то есть, он всегда вычисляется.
При INSERT без указания списка столбцов, такие столбцы не рассматриваются.
Также этот столбец не подставляется при использовании звёздочки в запросе SELECT. Это необходимо, чтобы сохранить инвариант, что дамп, полученный путём `SELECT *`, можно вставить обратно в таблицу INSERT-ом без указания списка столбцов.
### ALIAS {#alias}
`ALIAS expr`
Синоним. Такой столбец вообще не хранится в таблице.
@ -118,7 +134,7 @@ PRIMARY KEY(expr1[, expr2,...]);
!!! warning "Предупреждение"
Вы не можете сочетать оба способа в одном запросе.
### Ограничения (constraints) {#constraints}
## Ограничения {#constraints}
Наряду с объявлением столбцов можно объявить ограничения на значения в столбцах таблицы:
@ -136,11 +152,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Добавление большого числа ограничений может негативно повлиять на производительность `INSERT` запросов.
### Выражение для TTL {#vyrazhenie-dlia-ttl}
## Выражение для TTL {#vyrazhenie-dlia-ttl}
Определяет время хранения значений. Может быть указано только для таблиц семейства MergeTree. Подробнее смотрите в [TTL для столбцов и таблиц](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
### Кодеки сжатия столбцов {#codecs}
## Кодеки сжатия столбцов {#codecs}
По умолчанию, ClickHouse применяет к столбцу метод сжатия, определённый в [конфигурации сервера](../../../operations/server-configuration-parameters/settings.md). Кроме этого, можно задать метод сжатия для каждого отдельного столбца в запросе `CREATE TABLE`.
@ -182,7 +198,18 @@ ALTER TABLE codec_example MODIFY COLUMN float_value CODEC(Default);
ClickHouse поддерживает кодеки общего назначения и специализированные кодеки.
#### Специализированные кодеки {#create-query-specialized-codecs}
### Кодеки общего назначения {#create-query-common-purpose-codecs}
Кодеки:
- `NONE` — без сжатия.
- `LZ4` — [алгоритм сжатия без потерь](https://github.com/lz4/lz4) используемый по умолчанию. Применяет быстрое сжатие LZ4.
- `LZ4HC[(level)]` — алгоритм LZ4 HC (high compression) с настраиваемым уровнем сжатия. Уровень по умолчанию — 9. Настройка `level <= 0` устанавливает уровень сжания по умолчанию. Возможные уровни сжатия: \[1, 12\]. Рекомендуемый диапазон уровней: \[4, 9\].
- `ZSTD[(level)]` — [алгоритм сжатия ZSTD](https://en.wikipedia.org/wiki/Zstandard) с настраиваемым уровнем сжатия `level`. Возможные уровни сжатия: \[1, 22\]. Уровень сжатия по умолчанию: 1.
Высокие уровни сжатия полезны для ассимметричных сценариев, подобных «один раз сжал, много раз распаковал». Они подразумевают лучшее сжатие, но большее использование CPU.
### Специализированные кодеки {#create-query-specialized-codecs}
Эти кодеки разработаны для того, чтобы, используя особенности данных сделать сжатие более эффективным. Некоторые из этих кодеков не сжимают данные самостоятельно. Они готовят данные для кодеков общего назначения, которые сжимают подготовленные данные эффективнее, чем неподготовленные.
@ -203,18 +230,6 @@ CREATE TABLE codec_example
)
ENGINE = MergeTree()
```
#### Кодеки общего назначения {#create-query-common-purpose-codecs}
Кодеки:
- `NONE` — без сжатия.
- `LZ4` — [алгоритм сжатия без потерь](https://github.com/lz4/lz4) используемый по умолчанию. Применяет быстрое сжатие LZ4.
- `LZ4HC[(level)]` — алгоритм LZ4 HC (high compression) с настраиваемым уровнем сжатия. Уровень по умолчанию — 9. Настройка `level <= 0` устанавливает уровень сжания по умолчанию. Возможные уровни сжатия: \[1, 12\]. Рекомендуемый диапазон уровней: \[4, 9\].
- `ZSTD[(level)]` — [алгоритм сжатия ZSTD](https://en.wikipedia.org/wiki/Zstandard) с настраиваемым уровнем сжатия `level`. Возможные уровни сжатия: \[1, 22\]. Уровень сжатия по умолчанию: 1.
Высокие уровни сжатия полезны для ассимметричных сценариев, подобных «один раз сжал, много раз распаковал». Высокие уровни сжатия подразумеваю лучшее сжатие, но большее использование CPU.
## Временные таблицы {#vremennye-tablitsy}
ClickHouse поддерживает временные таблицы со следующими характеристиками:
@ -241,6 +256,77 @@ CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name
Вместо временных можно использовать обычные таблицы с [ENGINE = Memory](../../../engines/table-engines/special/memory.md).
## REPLACE TABLE {#replace-table-query}
Запрос `REPLACE` позволяет частично изменить таблицу (структуру или данные).
!!!note "Замечание"
Такие запросы поддерживаются только движком БД [Atomic](../../../engines/database-engines/atomic.md).
Чтобы удалить часть данных из таблицы, вы можете создать новую таблицу, добавить в нее данные из старой таблицы, которые вы хотите оставить (отобрав их с помощью запроса `SELECT`), затем удалить старую таблицу и переименовать новую таблицу так как старую:
```sql
CREATE TABLE myNewTable AS myOldTable;
INSERT INTO myNewTable SELECT * FROM myOldTable WHERE CounterID <12345;
DROP TABLE myOldTable;
RENAME TABLE myNewTable TO myOldTable;
```
Вместо перечисленных выше операций можно использовать один запрос:
```sql
REPLACE TABLE myOldTable SELECT * FROM myOldTable WHERE CounterID <12345;
```
### Синтаксис
{CREATE [OR REPLACE]|REPLACE} TABLE [db.]table_name
Для данного запроса можно использовать любые варианты синтаксиса запроса `CREATE`. Запрос `REPLACE` для несуществующей таблицы вызовет ошибку.
### Примеры:
Рассмотрим таблицу:
```sql
CREATE DATABASE base ENGINE = Atomic;
CREATE OR REPLACE TABLE base.t1 (n UInt64, s String) ENGINE = MergeTree ORDER BY n;
INSERT INTO base.t1 VALUES (1, 'test');
SELECT * FROM base.t1;
```
```text
┌─n─┬─s────┐
│ 1 │ test │
└───┴──────┘
```
Используем запрос `REPLACE` для удаления всех данных:
```sql
CREATE OR REPLACE TABLE base.t1 (n UInt64, s Nullable(String)) ENGINE = MergeTree ORDER BY n;
INSERT INTO base.t1 VALUES (2, null);
SELECT * FROM base.t1;
```
```text
┌─n─┬─s──┐
│ 2 │ \N │
└───┴────┘
```
Используем запрос `REPLACE` для изменения структуры таблицы:
```sql
REPLACE TABLE base.t1 (n UInt64) ENGINE = MergeTree ORDER BY n;
INSERT INTO base.t1 VALUES (3);
SELECT * FROM base.t1;
```
```text
┌─n─┐
│ 3 │
└───┘
```
<!--hide-->

View File

@ -60,5 +60,5 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
Представления выглядят так же, как обычные таблицы. Например, они перечисляются в результате запроса `SHOW TABLES`.
Отсутствует отдельный запрос для удаления представлений. Чтобы удалить представление, следует использовать `DROP TABLE`.
Чтобы удалить представление, следует использовать [DROP VIEW](../../../sql-reference/statements/drop.md#drop-view). Впрочем, `DROP TABLE` тоже работает для представлений.

View File

@ -5,7 +5,7 @@ toc_title: mysql
# mysql {#mysql}
Позволяет выполнять запросы `SELECT` над данными, хранящимися на удалённом MySQL сервере.
Позволяет выполнять запросы `SELECT` и `INSERT` над данными, хранящимися на удалённом MySQL сервере.
**Синтаксис**
@ -29,9 +29,10 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_
- `0` - выполняется запрос `INSERT INTO`.
- `1` - выполняется запрос `REPLACE INTO`.
- `on_duplicate_clause` — выражение `ON DUPLICATE KEY on_duplicate_clause`, добавляемое в запрос `INSERT`. Может быть передано только с помощью `replace_query = 0` (если вы одновременно передадите `replace_query = 1` и `on_duplicate_clause`, будет сгенерировано исключение).
- `on_duplicate_clause` — выражение `ON DUPLICATE KEY on_duplicate_clause`, добавляемое в запрос `INSERT`. Может быть передано только с помощью `replace_query = 0` (если вы одновременно передадите `replace_query = 1` и `on_duplicate_clause`, будет сгенерировано исключение).
Пример: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, где `on_duplicate_clause` это `UPDATE c2 = c2 + 1;`
Пример: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, где `on_duplicate_clause` это `UPDATE c2 = c2 + 1`.
Выражения, которые могут использоваться в качестве `on_duplicate_clause` в секции `ON DUPLICATE KEY`, можно посмотреть в документации по [MySQL](http://www.mysql.ru/docs/).
Простые условия `WHERE` такие как `=, !=, >, >=, <, =` выполняются на стороне сервера MySQL.
@ -42,7 +43,7 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_
Объект таблицы с теми же столбцами, что и в исходной таблице MySQL.
!!! note "Примечание"
Чтобы отличить табличную функцию `mysql (...)` в запросе `INSERT` от имени таблицы со списком имен столбцов, используйте ключевые слова `FUNCTION` или `TABLE FUNCTION`. См. примеры ниже.
Чтобы отличить табличную функцию `mysql (...)` в запросе `INSERT` от имени таблицы со списком столбцов, используйте ключевые слова `FUNCTION` или `TABLE FUNCTION`. См. примеры ниже.
**Примеры**

View File

@ -51,5 +51,5 @@ The easiest way to see the result is to use `--livereload=8888` argument of buil
At the moment theres no easy way to do just that, but you can consider:
- To hit the “Watch” button on top of GitHub web interface to know as early as possible, even during pull request. Alternative to this is `#github-activity` channel of [public ClickHouse Slack](https://join.slack.com/t/clickhousedb/shared_invite/enQtOTUzMjM4ODQwNTc5LWJmMjE3Yjc2YmI1ZDBlZmI4ZTc3OWY3ZTIwYTljYzY4MzBlODM3YzBjZTc1YmYyODRlZTJkYTgzYzBiNTA2Yjk).
- To hit the “Watch” button on top of GitHub web interface to know as early as possible, even during pull request. Alternative to this is `#github-activity` channel of [public ClickHouse Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-nwwakmk4-xOJ6cdy0sJC3It8j348~IA).
- Some search engines allow to subscribe on specific website changes via email and you can opt-in for that for https://clickhouse.tech.

View File

@ -10,7 +10,7 @@ cssmin==0.2.0
future==0.18.2
htmlmin==0.1.12
idna==2.10
Jinja2==2.11.2
Jinja2>=2.11.3
jinja2-highlight==0.6.1
jsmin==2.2.2
livereload==2.6.2

View File

@ -1,5 +1,5 @@
---
toc_folder_title: Interfaces
toc_folder_title: 接口
toc_priority: 14
toc_title: 客户端
---

View File

@ -17,7 +17,7 @@ toc_title: ClickHouse的特性
在一些列式数据库管理系统中(例如InfiniDB CE 和 MonetDB) 并没有使用数据压缩。但是, 若想达到比较优异的性能,数据压缩确实起到了至关重要的作用。
除了在磁盘空间和CPU消耗之间进行不同权衡的高效通用压缩编解码器之外ClickHouse还提供针对特定类型数据的[专用编解码器](../sql-reference/statements/create/table.md#create-query-specialized-codecs)这使得ClickHouse能够与更小的数据库(如时间序列数据库)竞争并超越它们。
除了在磁盘空间和CPU消耗之间进行不同权衡的高效通用压缩编解码器之外ClickHouse还提供针对特定类型数据的[专用编解码器](../sql-reference/statements/create.md#create-query-specialized-codecs)这使得ClickHouse能够与更小的数据库(如时间序列数据库)竞争并超越它们。
## 数据的磁盘存储 {#shu-ju-de-ci-pan-cun-chu}

View File

@ -1,7 +1,5 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_folder_title: "\u5BFC\u8A00"
toc_folder_title: 引言
toc_priority: 1
---

View File

@ -1,7 +1,7 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_folder_title: "\u53D1\u8A00"
toc_folder_title: "\u8BED\u53E5"
toc_priority: 31
---

View File

@ -1,4 +1,5 @@
---
toc_folder_title: 表函数
toc_priority: 34
toc_title: "\u5BFC\u8A00"
---

View File

@ -188,6 +188,7 @@ add_subdirectory (format)
add_subdirectory (obfuscator)
add_subdirectory (install)
add_subdirectory (git-import)
add_subdirectory (bash-completion)
if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
add_subdirectory (odbc-bridge)

View File

@ -0,0 +1 @@
add_subdirectory(completions)

View File

@ -0,0 +1,28 @@
macro(configure_bash_completion)
set(out "/usr/share/bash-completion/completions")
find_program(pkg-config PKG_CONFIG_BIN)
if (PKG_CONFIG_BIN)
execute_process(
COMMAND ${PKG_CONFIG_BIN} --variable=completionsdir bash-completion
OUTPUT_VARIABLE ${out}
OUTPUT_STRIP_TRAILING_WHITESPACE
)
endif()
string(REPLACE /usr "${CMAKE_INSTALL_PREFIX}" out "${out}")
message(STATUS "bash_completion will be written to ${out}")
endmacro()
configure_bash_completion()
foreach (name
# set of functions
clickhouse-bootstrap
# binaries that accept settings as command line argument
clickhouse-client
clickhouse-local
clickhouse-benchmark
clickhouse
)
install(FILES ${name} DESTINATION ${out})
endforeach()

View File

@ -0,0 +1,43 @@
[[ -v $_CLICKHOUSE_COMPLETION_LOADED ]] || source "$(dirname "${BASH_SOURCE[0]}")/clickhouse-bootstrap"
function _clickhouse_get_utils()
{
local cmd=$1 && shift
"$cmd" --help |& awk '/^clickhouse.*args/ { print $2 }'
}
function _complete_for_clickhouse_entrypoint_bin()
{
local cur prev cword words
eval local cmd="$( _clickhouse_quote "$1" )"
_clickhouse_bin_exist "$cmd" || return 0
COMPREPLY=()
_get_comp_words_by_ref cur prev cword words
local util="$cur"
# complete utils, until it will be finished
if [[ $cword -lt 2 ]]; then
COMPREPLY=( $(compgen -W "$(_clickhouse_get_utils "$cmd")" -- "$cur") )
return
fi
util="${words[1]}"
case "$prev" in
-C|--config-file|--config)
return
;;
# Argh... This looks like a bash bug...
# Redirections are passed to the completion function
# although it is managed by the shell directly...
'<'|'>'|'>>'|[12]'>'|[12]'>>')
return
;;
esac
COMPREPLY=( $(compgen -W "$(_clickhouse_get_options "$cmd" "$util")" -- "$cur") )
return 0
}
_complete_clickhouse_generic clickhouse _complete_for_clickhouse_entrypoint_bin

View File

@ -0,0 +1,2 @@
[[ -v $_CLICKHOUSE_COMPLETION_LOADED ]] || source "$(dirname "${BASH_SOURCE[0]}")/clickhouse-bootstrap"
_complete_clickhouse_generic clickhouse-benchmark

View File

@ -0,0 +1,81 @@
#
# bash autocomplete, that can work with:
# a) --help of program
#
# Also you may like:
# $ bind "set completion-ignore-case on"
# $ bind "set show-all-if-ambiguous on"
#
# It uses bash-completion dynamic loader.
# Known to work with bash 3.* with programmable completion and extended
# pattern matching enabled (use 'shopt -s extglob progcomp' to enable
# these if they are not already enabled).
shopt -s extglob
export _CLICKHOUSE_COMPLETION_LOADED=1
function _clickhouse_bin_exist()
{ [ -x "$1" ] || command -v "$1" >& /dev/null; }
function _clickhouse_quote()
{
local quoted=${1//\'/\'\\\'\'};
printf "'%s'" "$quoted"
}
# Extract every option (everything that starts with "-") from the --help dialog.
function _clickhouse_get_options()
{
"$@" --help 2>&1 | awk -F '[ ,=<>]' '{ for (i=1; i <= NF; ++i) { if (substr($i, 0, 1) == "-" && length($i) > 1) print $i; } }' | sort -u
}
function _complete_for_clickhouse_generic_bin()
{
local cur prev
eval local cmd="$( _clickhouse_quote "$1" )"
_clickhouse_bin_exist "$cmd" || return 0
COMPREPLY=()
_get_comp_words_by_ref cur prev
case "$prev" in
-C|--config-file|--config)
return
;;
# Argh... This looks like a bash bug...
# Redirections are passed to the completion function
# although it is managed by the shell directly...
'<'|'>'|'>>'|[12]'>'|[12]'>>')
return
;;
esac
COMPREPLY=( $(compgen -W "$(_clickhouse_get_options "$cmd")" -- "$cur") )
return 0
}
function _complete_clickhouse_generic()
{
local bin=$1 && shift
local f=${1:-_complete_for_clickhouse_generic_bin}
local o=(
-o default
-o bashdefault
-o nospace
-F "$f"
"$bin"
)
complete "${o[@]}"
}
function _complete_clickhouse_bootstrap_main()
{
local runtime=/usr/share/bash-completion/bash_completion
if ! type _get_comp_words_by_ref >& /dev/null && [[ -f $runtime ]]; then
source $runtime
fi
type _get_comp_words_by_ref >& /dev/null || return 0
}
_complete_clickhouse_bootstrap_main "$@"

View File

@ -0,0 +1,2 @@
[[ -v $_CLICKHOUSE_COMPLETION_LOADED ]] || source "$(dirname "${BASH_SOURCE[0]}")/clickhouse-bootstrap"
_complete_clickhouse_generic clickhouse-client

View File

@ -0,0 +1,2 @@
[[ -v $_CLICKHOUSE_COMPLETION_LOADED ]] || source "$(dirname "${BASH_SOURCE[0]}")/clickhouse-bootstrap"
_complete_clickhouse_generic clickhouse-local

View File

@ -47,6 +47,7 @@
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/DNSCacheUpdater.h>
#include <Interpreters/ExternalLoaderXMLConfigRepository.h>
#include <Interpreters/ExpressionJIT.h>
#include <Access/AccessControlManager.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/System/attachSystemTables.h>
@ -830,8 +831,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
#if USE_EMBEDDED_COMPILER
size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", 500);
if (compiled_expression_cache_size)
global_context->setCompiledExpressionCache(compiled_expression_cache_size);
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size);
#endif
/// Set path for format schema files

View File

@ -38,6 +38,16 @@ HedgedConnectionsFactory::HedgedConnectionsFactory(
HedgedConnectionsFactory::~HedgedConnectionsFactory()
{
/// Stop anything that maybe in progress,
/// to avoid interfer with the subsequent connections.
///
/// I.e. some replcas may be in the establishing state,
/// this means that hedged connection is waiting for TablesStatusResponse,
/// and if the connection will not be canceled,
/// then next user of the connection will get TablesStatusResponse,
/// while this is not the expected package.
stopChoosingReplicas();
pool->updateSharedError(shuffled_pools);
}

View File

@ -366,6 +366,9 @@ class IColumn;
M(Bool, check_query_single_value_result, true, "Return check query result as single 1/0 value", 0) \
M(Bool, allow_drop_detached, false, "Allow ALTER TABLE ... DROP DETACHED PART[ITION] ... queries", 0) \
\
M(UInt64, postgresql_connection_pool_size, 16, "Connection pool size for PostgreSQL table engine and database engine.", 0) \
M(Int64, postgresql_connection_pool_wait_timeout, -1, "Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool.", 0) \
\
M(Seconds, distributed_replica_error_half_life, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD, "Time period reduces replica error counter by 2 times.", 0) \
M(UInt64, distributed_replica_error_cap, DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT, "Max number of errors per replica, prevents piling up an incredible amount of errors if replica was offline for some time and allows it to be reconsidered in a shorter amount of time.", 0) \
M(UInt64, distributed_replica_max_ignored_errors, 0, "Number of errors that will be ignored while choosing replicas", 0) \

View File

@ -14,7 +14,7 @@ AddingDefaultBlockOutputStream::AddingDefaultBlockOutputStream(
: output(output_), header(header_)
{
auto dag = addMissingDefaults(header_, output->getHeader().getNamesAndTypesList(), columns_, context_);
adding_defaults_actions = std::make_shared<ExpressionActions>(std::move(dag));
adding_defaults_actions = std::make_shared<ExpressionActions>(std::move(dag), ExpressionActionsSettings::fromContext(context_));
}
void AddingDefaultBlockOutputStream::write(const Block & block)

View File

@ -174,7 +174,7 @@ Block AddingDefaultsBlockInputStream::readImpl()
auto dag = evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), columns, context, false);
if (dag)
{
auto actions = std::make_shared<ExpressionActions>(std::move(dag));
auto actions = std::make_shared<ExpressionActions>(std::move(dag), ExpressionActionsSettings::fromContext(context));
actions->execute(evaluate_block);
}

View File

@ -28,13 +28,13 @@ namespace ErrorCodes
}
PostgreSQLBlockInputStream::PostgreSQLBlockInputStream(
ConnectionPtr connection_,
PostgreSQLConnectionHolderPtr connection_,
const std::string & query_str_,
const Block & sample_block,
const UInt64 max_block_size_)
: query_str(query_str_)
, max_block_size(max_block_size_)
, connection(connection_)
, connection(std::move(connection_))
{
description.init(sample_block);
for (const auto idx : ext::range(0, description.sample_block.columns()))
@ -48,7 +48,7 @@ PostgreSQLBlockInputStream::PostgreSQLBlockInputStream(
void PostgreSQLBlockInputStream::readPrefix()
{
tx = std::make_unique<pqxx::read_transaction>(*connection);
tx = std::make_unique<pqxx::read_transaction>(connection->conn());
stream = std::make_unique<pqxx::stream_from>(*tx, pqxx::from_query, std::string_view(query_str));
}

View File

@ -9,18 +9,17 @@
#include <DataStreams/IBlockInputStream.h>
#include <Core/ExternalResultDescription.h>
#include <Core/Field.h>
#include <pqxx/pqxx>
#include <Storages/PostgreSQL/PostgreSQLConnectionPool.h>
namespace DB
{
using ConnectionPtr = std::shared_ptr<pqxx::connection>;
class PostgreSQLBlockInputStream : public IBlockInputStream
{
public:
PostgreSQLBlockInputStream(
ConnectionPtr connection_,
PostgreSQLConnectionHolderPtr connection_,
const std::string & query_str,
const Block & sample_block,
const UInt64 max_block_size_);
@ -47,7 +46,7 @@ private:
const UInt64 max_block_size;
ExternalResultDescription description;
ConnectionPtr connection;
PostgreSQLConnectionHolderPtr connection;
std::unique_ptr<pqxx::read_transaction> tx;
std::unique_ptr<pqxx::stream_from> stream;

View File

@ -36,7 +36,7 @@
#if USE_LIBPQXX
#include <Databases/PostgreSQL/DatabasePostgreSQL.h> // Y_IGNORE
#include <Storages/PostgreSQL/PostgreSQLConnection.h>
#include <Storages/PostgreSQL/PostgreSQLConnectionPool.h>
#endif
namespace DB
@ -246,11 +246,15 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
auto parsed_host_port = parseAddress(host_port, 5432);
/// no connection is made here
auto connection = std::make_shared<PostgreSQLConnection>(
postgres_database_name, parsed_host_port.first, parsed_host_port.second, username, password);
auto connection_pool = std::make_shared<PostgreSQLConnectionPool>(
postgres_database_name,
parsed_host_port.first, parsed_host_port.second,
username, password,
context.getSettingsRef().postgresql_connection_pool_size,
context.getSettingsRef().postgresql_connection_pool_wait_timeout);
return std::make_shared<DatabasePostgreSQL>(
context, metadata_path, engine_define, database_name, postgres_database_name, connection, use_table_cache);
context, metadata_path, engine_define, database_name, postgres_database_name, connection_pool, use_table_cache);
}
#endif

View File

@ -5,7 +5,6 @@
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeArray.h>
#include <Storages/StoragePostgreSQL.h>
#include <Storages/PostgreSQL/PostgreSQLConnection.h>
#include <Interpreters/Context.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
@ -17,6 +16,7 @@
#include <Poco/File.h>
#include <Databases/PostgreSQL/fetchPostgreSQLTableStructure.h>
#include <Common/quoteString.h>
#include <Storages/PostgreSQL/PostgreSQLConnectionPool.h>
namespace DB
@ -40,14 +40,14 @@ DatabasePostgreSQL::DatabasePostgreSQL(
const ASTStorage * database_engine_define_,
const String & dbname_,
const String & postgres_dbname,
PostgreSQLConnectionPtr connection_,
PostgreSQLConnectionPoolPtr connection_pool_,
const bool cache_tables_)
: IDatabase(dbname_)
, global_context(context.getGlobalContext())
, metadata_path(metadata_path_)
, database_engine_define(database_engine_define_->clone())
, dbname(postgres_dbname)
, connection(std::move(connection_))
, connection_pool(std::move(connection_pool_))
, cache_tables(cache_tables_)
{
cleaner_task = context.getSchedulePool().createTask("PostgreSQLCleanerTask", [this]{ removeOutdatedTables(); });
@ -90,7 +90,8 @@ std::unordered_set<std::string> DatabasePostgreSQL::fetchTablesList() const
std::unordered_set<std::string> tables;
std::string query = "SELECT tablename FROM pg_catalog.pg_tables "
"WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema'";
pqxx::read_transaction tx(*connection->conn());
auto connection = connection_pool->get();
pqxx::read_transaction tx(connection->conn());
for (auto table_name : tx.stream<std::string>(query))
tables.insert(std::get<0>(table_name));
@ -108,7 +109,8 @@ bool DatabasePostgreSQL::checkPostgresTable(const String & table_name) const
"PostgreSQL table name cannot contain single quote or backslash characters, passed {}", table_name);
}
pqxx::nontransaction tx(*connection->conn());
auto connection = connection_pool->get();
pqxx::nontransaction tx(connection->conn());
try
{
@ -163,13 +165,13 @@ StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, const Conte
return StoragePtr{};
auto use_nulls = context.getSettingsRef().external_table_functions_use_nulls;
auto columns = fetchPostgreSQLTableStructure(connection->conn(), doubleQuoteString(table_name), use_nulls);
auto columns = fetchPostgreSQLTableStructure(connection_pool->get(), doubleQuoteString(table_name), use_nulls);
if (!columns)
return StoragePtr{};
auto storage = StoragePostgreSQL::create(
StorageID(database_name, table_name), table_name, std::make_shared<PostgreSQLConnection>(*connection),
StorageID(database_name, table_name), table_name, std::make_shared<PostgreSQLConnectionPool>(*connection_pool),
ColumnsDescription{*columns}, ConstraintsDescription{}, context);
if (cache_tables)

View File

@ -15,8 +15,8 @@ namespace DB
{
class Context;
class PostgreSQLConnection;
using PostgreSQLConnectionPtr = std::shared_ptr<PostgreSQLConnection>;
class PostgreSQLConnectionPool;
using PostgreSQLConnectionPoolPtr = std::shared_ptr<PostgreSQLConnectionPool>;
/** Real-time access to table list and table structure from remote PostgreSQL.
@ -34,7 +34,7 @@ public:
const ASTStorage * database_engine_define,
const String & dbname_,
const String & postgres_dbname,
PostgreSQLConnectionPtr connection_,
PostgreSQLConnectionPoolPtr connection_pool_,
const bool cache_tables_);
String getEngineName() const override { return "PostgreSQL"; }
@ -72,7 +72,7 @@ private:
String metadata_path;
ASTPtr database_engine_define;
String dbname;
PostgreSQLConnectionPtr connection;
PostgreSQLConnectionPoolPtr connection_pool;
const bool cache_tables;
mutable Tables cached_tables;

View File

@ -94,7 +94,7 @@ static DataTypePtr convertPostgreSQLDataType(std::string & type, bool is_nullabl
std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
std::shared_ptr<pqxx::connection> connection, const String & postgres_table_name, bool use_nulls)
PostgreSQLConnectionHolderPtr connection, const String & postgres_table_name, bool use_nulls)
{
auto columns = NamesAndTypesList();
@ -113,7 +113,7 @@ std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
"AND NOT attisdropped AND attnum > 0", postgres_table_name);
try
{
pqxx::read_transaction tx(*connection);
pqxx::read_transaction tx(connection->conn());
pqxx::stream_from stream(tx, pqxx::from_query, std::string_view(query));
std::tuple<std::string, std::string, std::string, uint16_t> row;
@ -133,7 +133,7 @@ std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
{
throw Exception(fmt::format(
"PostgreSQL table {}.{} does not exist",
connection->dbname(), postgres_table_name), ErrorCodes::UNKNOWN_TABLE);
connection->conn().dbname(), postgres_table_name), ErrorCodes::UNKNOWN_TABLE);
}
catch (Exception & e)
{

View File

@ -12,7 +12,7 @@ namespace DB
{
std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
std::shared_ptr<pqxx::connection> connection, const String & postgres_table_name, bool use_nulls);
PostgreSQLConnectionHolderPtr connection, const String & postgres_table_name, bool use_nulls);
}

View File

@ -384,42 +384,13 @@ void HashedDictionary::loadData()
{
if (!source_ptr->hasUpdateField())
{
/// atomic since progress callbac called in parallel
std::atomic<uint64_t> new_size = 0;
auto stream = source_ptr->loadAll();
/// preallocation can be used only when we know number of rows, for this we need:
/// - source clickhouse
/// - no filtering (i.e. lack of <where>), since filtering can filter
/// too much rows and eventually it may allocate memory that will
/// never be used.
bool preallocate = false;
if (const auto & clickhouse_source = dynamic_cast<ClickHouseDictionarySource *>(source_ptr.get()))
{
if (!clickhouse_source->hasWhere())
preallocate = true;
}
if (preallocate)
{
stream->setProgressCallback([&new_size](const Progress & progress)
{
new_size += progress.total_rows_to_read;
});
}
stream->readPrefix();
while (const auto block = stream->read())
{
if (new_size)
{
size_t current_new_size = new_size.exchange(0);
if (current_new_size)
resize(current_new_size);
}
else
resize(block.rows());
resize(block.rows());
blockToAttributes(block);
}

View File

@ -1,4 +1,8 @@
#include "PolygonDictionary.h"
#include <numeric>
#include <cmath>
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
@ -8,8 +12,6 @@
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypesDecimal.h>
#include <numeric>
namespace DB
{
@ -35,63 +37,9 @@ IPolygonDictionary::IPolygonDictionary(
, input_type(input_type_)
, point_type(point_type_)
{
createAttributes();
setup();
loadData();
}
std::string IPolygonDictionary::getTypeName() const
{
return "Polygon";
}
std::string IPolygonDictionary::getKeyDescription() const
{
return dict_struct.getKeyDescription();
}
size_t IPolygonDictionary::getBytesAllocated() const
{
return bytes_allocated;
}
size_t IPolygonDictionary::getQueryCount() const
{
return query_count.load(std::memory_order_relaxed);
}
double IPolygonDictionary::getHitRate() const
{
return 1.0;
}
size_t IPolygonDictionary::getElementCount() const
{
return element_count;
}
double IPolygonDictionary::getLoadFactor() const
{
return 1.0;
}
const IDictionarySource * IPolygonDictionary::getSource() const
{
return source_ptr.get();
}
const DictionaryLifetime & IPolygonDictionary::getLifetime() const
{
return dict_lifetime;
}
const DictionaryStructure & IPolygonDictionary::getStructure() const
{
return dict_struct;
}
bool IPolygonDictionary::isInjective(const std::string &) const
{
return false;
calculateBytesAllocated();
}
ColumnPtr IPolygonDictionary::getColumn(
@ -101,50 +49,101 @@ ColumnPtr IPolygonDictionary::getColumn(
const DataTypes &,
const ColumnPtr & default_values_column) const
{
ColumnPtr result;
const auto requested_key_points = extractPoints(key_columns);
const auto index = getAttributeIndex(attribute_name);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
const auto & attribute = dict_struct.getAttribute(attribute_name, result_type);
bool complex_attribute = attribute.is_nullable || attribute.is_array;
DefaultValueProvider default_value_provider(attribute.null_value, default_values_column);
auto keys_size = key_columns.front()->size();
size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
const auto & attribute_values_column = attributes[attribute_index];
auto type_call = [&](const auto &dictionary_attribute_type)
auto result = attribute_values_column->cloneEmpty();
result->reserve(requested_key_points.size());
Field row_value_to_insert;
size_t polygon_index = 0;
if (unlikely(complex_attribute))
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
const auto & null_value = std::get<AttributeType>(null_values[index]);
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<AttributeType, String>)
for (size_t requested_key_index = 0; requested_key_index < requested_key_points.size(); ++requested_key_index)
{
auto column_string = ColumnString::create();
auto * out = column.get();
const auto found = find(requested_key_points[requested_key_index], polygon_index);
getItemsImpl<String, StringRef>(
index,
key_columns,
[&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); },
default_value_extractor);
if (found)
{
size_t attribute_values_index = polygon_index_to_attribute_value_index[polygon_index];
attribute_values_column->get(attribute_values_index, row_value_to_insert);
}
else
row_value_to_insert = default_value_provider.getDefaultValue(requested_key_index);
result->insert(row_value_to_insert);
}
else
}
else
{
auto type_call = [&](const auto & dictionary_attribute_type)
{
auto & out = column->getData();
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnType = std::conditional_t<
std::is_same_v<AttributeType, String>,
ColumnString,
std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<ValueType>, ColumnVector<AttributeType>>>;
getItemsImpl<AttributeType, AttributeType>(
index,
key_columns,
[&](const size_t row, const auto value) { return out[row] = value; },
default_value_extractor);
}
const auto attribute_values_column_typed = typeid_cast<const ColumnType *>(attribute_values_column.get());
if (!attribute_values_column_typed)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "An attribute type should be same as dictionary type");
result = std::move(column);
};
ColumnType & result_column_typed = static_cast<ColumnType &>(*result);
callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
if constexpr (std::is_same_v<ColumnType, ColumnString>)
{
for (size_t requested_key_index = 0; requested_key_index < requested_key_points.size(); ++requested_key_index)
{
const auto found = find(requested_key_points[requested_key_index], polygon_index);
if (found)
{
size_t attribute_values_index = polygon_index_to_attribute_value_index[polygon_index];
auto data_to_insert = attribute_values_column->getDataAt(attribute_values_index);
result_column_typed.insertData(data_to_insert.data, data_to_insert.size);
}
else
result_column_typed.insert(default_value_provider.getDefaultValue(requested_key_index));
}
}
else
{
auto & attribute_data = attribute_values_column_typed->getData();
auto & result_data = result_column_typed.getData();
for (size_t requested_key_index = 0; requested_key_index < requested_key_points.size(); ++requested_key_index)
{
const auto found = find(requested_key_points[requested_key_index], polygon_index);
if (found)
{
size_t attribute_values_index = polygon_index_to_attribute_value_index[polygon_index];
auto & item = attribute_data[attribute_values_index];
result_data.emplace_back(item);
}
else
{
row_value_to_insert = default_value_provider.getDefaultValue(requested_key_index);
result_data.emplace_back(row_value_to_insert.template get<NearestFieldType<ValueType>>());
}
}
}
};
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
}
query_count.fetch_add(requested_key_points.size(), std::memory_order_relaxed);
return result;
}
@ -156,75 +155,16 @@ BlockInputStreamPtr IPolygonDictionary::getBlockInputStream(const Names &, size_
throw Exception{"Reading the dictionary is not allowed", ErrorCodes::UNSUPPORTED_METHOD};
}
template <typename T>
void IPolygonDictionary::appendNullValueImpl(const Field & null_value)
void IPolygonDictionary::setup()
{
null_values.emplace_back(T(null_value.get<NearestFieldType<T>>()));
}
attributes.reserve(dict_struct.attributes.size());
void IPolygonDictionary::appendNullValue(AttributeUnderlyingType type, const Field & null_value)
{
switch (type)
for (const auto & attribute : dict_struct.attributes)
{
case AttributeUnderlyingType::utUInt8:
appendNullValueImpl<UInt8>(null_value);
break;
case AttributeUnderlyingType::utUInt16:
appendNullValueImpl<UInt16>(null_value);
break;
case AttributeUnderlyingType::utUInt32:
appendNullValueImpl<UInt32>(null_value);
break;
case AttributeUnderlyingType::utUInt64:
appendNullValueImpl<UInt64>(null_value);
break;
case AttributeUnderlyingType::utUInt128:
appendNullValueImpl<UInt128>(null_value);
break;
case AttributeUnderlyingType::utInt8:
appendNullValueImpl<Int8>(null_value);
break;
case AttributeUnderlyingType::utInt16:
appendNullValueImpl<Int16>(null_value);
break;
case AttributeUnderlyingType::utInt32:
appendNullValueImpl<Int32>(null_value);
break;
case AttributeUnderlyingType::utInt64:
appendNullValueImpl<Int64>(null_value);
break;
case AttributeUnderlyingType::utFloat32:
appendNullValueImpl<Float32>(null_value);
break;
case AttributeUnderlyingType::utFloat64:
appendNullValueImpl<Float64>(null_value);
break;
case AttributeUnderlyingType::utDecimal32:
appendNullValueImpl<Decimal32>(null_value);
break;
case AttributeUnderlyingType::utDecimal64:
appendNullValueImpl<Decimal64>(null_value);
break;
case AttributeUnderlyingType::utDecimal128:
appendNullValueImpl<Decimal128>(null_value);
break;
case AttributeUnderlyingType::utString:
appendNullValueImpl<String>(null_value);
break;
}
}
auto column = attribute.type->createColumn();
attributes.emplace_back(std::move(column));
void IPolygonDictionary::createAttributes()
{
attributes.resize(dict_struct.attributes.size());
for (size_t i = 0; i < dict_struct.attributes.size(); ++i)
{
const auto & attr = dict_struct.attributes[i];
attribute_index_by_name.emplace(attr.name, i);
appendNullValue(attr.underlying_type, attr.null_value);
if (attr.hierarchical)
if (attribute.hierarchical)
throw Exception{ErrorCodes::TYPE_MISMATCH,
"{}: hierarchical attributes not supported for dictionary of polygonal type",
getDictionaryID().getNameForLogs()};
@ -234,22 +174,20 @@ void IPolygonDictionary::createAttributes()
void IPolygonDictionary::blockToAttributes(const DB::Block & block)
{
const auto rows = block.rows();
element_count += rows;
size_t skip_key_column_offset = 1;
for (size_t i = 0; i < attributes.size(); ++i)
{
const auto & column = block.safeGetByPosition(i + 1);
if (attributes[i])
{
MutableColumnPtr mutated = IColumn::mutate(std::move(attributes[i]));
mutated->insertRangeFrom(*column.column, 0, column.column->size());
attributes[i] = std::move(mutated);
}
else
attributes[i] = column.column;
const auto & block_column = block.safeGetByPosition(i + skip_key_column_offset);
const auto & column = block_column.column;
attributes[i]->assumeMutable()->insertRangeFrom(*column, 0, column->size());
}
/** Multi-polygons could cause bigger sizes, but this is better than nothing. */
polygons.reserve(polygons.size() + rows);
ids.reserve(ids.size() + rows);
polygon_index_to_attribute_value_index.reserve(polygon_index_to_attribute_value_index.size() + rows);
const auto & key = block.safeGetByPosition(0).column;
extractPolygons(key);
}
@ -262,114 +200,104 @@ void IPolygonDictionary::loadData()
blockToAttributes(block);
stream->readSuffix();
std::vector<double> areas;
areas.reserve(polygons.size());
/// Correct and sort polygons by area and update polygon_index_to_attribute_value_index after sort
PaddedPODArray<double> areas;
areas.resize_fill(polygons.size());
std::vector<std::pair<Polygon, size_t>> polygon_ids;
polygon_ids.reserve(polygons.size());
for (size_t i = 0; i < polygons.size(); ++i)
{
auto & polygon = polygons[i];
bg::correct(polygon);
areas.push_back(bg::area(polygon));
areas[i] = bg::area(polygon);
polygon_ids.emplace_back(polygon, i);
}
sort(polygon_ids.begin(), polygon_ids.end(), [& areas](const auto & lhs, const auto & rhs)
std::sort(polygon_ids.begin(), polygon_ids.end(), [& areas](const auto & lhs, const auto & rhs)
{
return areas[lhs.second] < areas[rhs.second];
});
std::vector<size_t> correct_ids;
correct_ids.reserve(polygon_ids.size());
for (size_t i = 0; i < polygon_ids.size(); ++i)
{
auto & polygon = polygon_ids[i];
correct_ids.emplace_back(ids[polygon.second]);
correct_ids.emplace_back(polygon_index_to_attribute_value_index[polygon.second]);
polygons[i] = polygon.first;
}
ids = correct_ids;
polygon_index_to_attribute_value_index = std::move(correct_ids);
}
void IPolygonDictionary::calculateBytesAllocated()
{
// TODO:: Account for key.
/// Index allocated by subclass not counted because it take a small part in relation to attributes and polygons
for (const auto & column : attributes)
bytes_allocated += column->allocatedBytes();
for (auto & polygon : polygons)
bytes_allocated += bg::num_points(polygon) * sizeof(Point);
}
std::vector<IPolygonDictionary::Point> IPolygonDictionary::extractPoints(const Columns & key_columns)
{
if (key_columns.size() != 2)
throw Exception{"Expected two columns of coordinates", ErrorCodes::BAD_ARGUMENTS};
throw Exception{"Expected two columns of coordinates with type Float64", ErrorCodes::BAD_ARGUMENTS};
const auto * column_x = typeid_cast<const ColumnVector<Float64>*>(key_columns[0].get());
const auto * column_y = typeid_cast<const ColumnVector<Float64>*>(key_columns[1].get());
if (!column_x || !column_y)
throw Exception{"Expected columns of Float64", ErrorCodes::TYPE_MISMATCH};
const auto rows = key_columns.front()->size();
std::vector<Point> result;
result.reserve(rows);
for (const auto row : ext::range(0, rows))
result.emplace_back(column_x->getElement(row), column_y->getElement(row));
{
auto x = column_x->getElement(row);
auto y = column_y->getElement(row);
if (isNaN(x) || isNaN(y))
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"PolygonDictionary input point component must not be NaN");
if (isinf(x) || isinf(y))
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"PolygonDictionary input point component must not be infinite");
result.emplace_back(x, y);
}
return result;
}
ColumnUInt8::Ptr IPolygonDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
{
auto size = key_columns.front()->size();
auto result = ColumnUInt8::create(size);
std::vector<IPolygonDictionary::Point> points = extractPoints(key_columns);
auto result = ColumnUInt8::create(points.size());
auto& out = result->getData();
size_t row = 0;
for (const auto & pt : extractPoints(key_columns))
for (size_t i = 0; i < points.size(); ++i)
{
size_t trash = 0;
out[row] = find(pt, trash);
++row;
}
query_count.fetch_add(row, std::memory_order_relaxed);
return result;
}
size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name) const
{
const auto it = attribute_index_by_name.find(attribute_name);
if (it == attribute_index_by_name.end())
throw Exception{"No such attribute: " + attribute_name, ErrorCodes::BAD_ARGUMENTS};
return it->second;
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void IPolygonDictionary::getItemsImpl(
size_t attribute_ind,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto points = extractPoints(key_columns);
using ColVecType = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
using ColType = std::conditional_t<std::is_same<AttributeType, String>::value, ColumnString, ColVecType>;
const auto column = typeid_cast<const ColType *>(attributes[attribute_ind].get());
if (!column)
throw Exception{"An attribute should be a column of its type", ErrorCodes::BAD_ARGUMENTS};
for (const auto i : ext::range(0, points.size()))
{
size_t id = 0;
const auto found = find(points[i], id);
id = ids[id];
if (!found)
{
set_value(i, static_cast<OutputType>(default_value_extractor[i]));
continue;
}
if constexpr (std::is_same<AttributeType, String>::value)
set_value(i, static_cast<OutputType>(column->getDataAt(id)));
else
set_value(i, static_cast<OutputType>(column->getElement(id)));
size_t unused_find_result = 0;
auto & point = points[i];
out[i] = find(point, unused_find_result);
}
query_count.fetch_add(points.size(), std::memory_order_relaxed);
return result;
}
namespace
@ -531,7 +459,7 @@ void handlePointsReprByTuples(const IColumn * column, Data & data, Offset & offs
void IPolygonDictionary::extractPolygons(const ColumnPtr & column)
{
Data data = {polygons, ids};
Data data = {polygons, polygon_index_to_attribute_value_index};
Offset offset;
const IColumn * points_collection = nullptr;

View File

@ -57,27 +57,25 @@ public:
InputType input_type_,
PointType point_type_);
std::string getTypeName() const override;
std::string getTypeName() const override { return "Polygon"; }
std::string getKeyDescription() const;
size_t getBytesAllocated() const override { return bytes_allocated; }
size_t getBytesAllocated() const override;
size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
size_t getQueryCount() const override;
double getHitRate() const override { return 1.0; }
double getHitRate() const override;
size_t getElementCount() const override { return attributes.empty() ? 0 : attributes.front()->size(); }
size_t getElementCount() const override;
double getLoadFactor() const override { return 1.0; }
double getLoadFactor() const override;
const IDictionarySource * getSource() const override { return source_ptr.get(); }
const IDictionarySource * getSource() const override;
const DictionaryStructure & getStructure() const override { return dict_struct; }
const DictionaryStructure & getStructure() const override;
const DictionaryLifetime & getLifetime() const override { return dict_lifetime; }
const DictionaryLifetime & getLifetime() const override;
bool isInjective(const std::string & attribute_name) const override;
bool isInjective(const std::string & attribute_name) const override { return dict_struct.getAttribute(attribute_name).injective; }
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
@ -106,13 +104,9 @@ protected:
* If true id is set to the index of a polygon containing the given point.
* Overridden in different implementations of this interface.
*/
virtual bool find(const Point & point, size_t & id) const = 0;
virtual bool find(const Point & point, size_t & polygon_index) const = 0;
std::vector<Polygon> polygons;
/** Since the original data may have been in the form of multi-polygons, an id is stored for each single polygon
* corresponding to the row in which any other attributes for this entry are located.
*/
std::vector<size_t> ids;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;
@ -126,7 +120,7 @@ private:
* The polygons serving as keys are extracted into boost types.
* All other values are stored in one column per attribute.
*/
void createAttributes();
void setup();
void blockToAttributes(const Block & block);
void loadData();
@ -135,13 +129,6 @@ private:
/** Checks whether a given attribute exists and returns its index */
size_t getAttributeIndex(const std::string & attribute_name) const;
/** Helper functions to retrieve and instantiate the provided null value of an attribute.
* Since a null value is obligatory for every attribute they are simply appended to null_values defined below.
*/
template <typename T>
void appendNullValueImpl(const Field & null_value);
void appendNullValue(AttributeUnderlyingType type, const Field & value);
/** Helper function for retrieving the value of an attribute by key. */
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
@ -150,32 +137,16 @@ private:
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
/** A mapping from the names of the attributes to their index in the two vectors defined below. */
std::map<std::string, size_t> attribute_index_by_name;
/** A vector of columns storing the values of each attribute. */
Columns attributes;
/** A vector of null values corresponding to each attribute. */
std::vector<std::variant<
UInt8,
UInt16,
UInt32,
UInt64,
UInt128,
Int8,
Int16,
Int32,
Int64,
Decimal32,
Decimal64,
Decimal128,
Float32,
Float64,
String>> null_values;
size_t bytes_allocated = 0;
size_t element_count = 0;
mutable std::atomic<size_t> query_count{0};
/** Since the original data may have been in the form of multi-polygons, an id is stored for each single polygon
* corresponding to the row in which any other attributes for this entry are located.
*/
std::vector<size_t> polygon_index_to_attribute_value_index;
/** Extracts a list of polygons from a column according to input_type and point_type.
* The polygons are appended to the dictionary with the corresponding ids.
*/

View File

@ -39,14 +39,14 @@ std::shared_ptr<const IExternalLoadable> PolygonDictionarySimple::clone() const
this->point_type);
}
bool PolygonDictionarySimple::find(const Point & point, size_t & id) const
bool PolygonDictionarySimple::find(const Point & point, size_t & polygon_index) const
{
bool found = false;
for (size_t i = 0; i < polygons.size(); ++i)
{
if (bg::covered_by(point, polygons[i]))
{
id = i;
polygon_index = i;
found = true;
break;
}
@ -90,7 +90,7 @@ std::shared_ptr<const IExternalLoadable> PolygonDictionaryIndexEach::clone() con
this->max_depth);
}
bool PolygonDictionaryIndexEach::find(const Point & point, size_t & id) const
bool PolygonDictionaryIndexEach::find(const Point & point, size_t & polygon_index) const
{
const auto * cell = grid.find(point.x(), point.y());
if (cell)
@ -100,13 +100,13 @@ bool PolygonDictionaryIndexEach::find(const Point & point, size_t & id) const
size_t unused;
if (buckets[candidate].find(point, unused))
{
id = candidate;
polygon_index = candidate;
return true;
}
}
if (cell->first_covered != FinalCell::kNone)
{
id = cell->first_covered;
polygon_index = cell->first_covered;
return true;
}
}
@ -142,19 +142,19 @@ std::shared_ptr<const IExternalLoadable> PolygonDictionaryIndexCell::clone() con
this->max_depth);
}
bool PolygonDictionaryIndexCell::find(const Point & point, size_t & id) const
bool PolygonDictionaryIndexCell::find(const Point & point, size_t & polygon_index) const
{
const auto * cell = index.find(point.x(), point.y());
if (cell)
{
if (!(cell->corresponding_ids).empty() && cell->index.find(point, id))
if (!(cell->corresponding_ids).empty() && cell->index.find(point, polygon_index))
{
id = cell->corresponding_ids[id];
polygon_index = cell->corresponding_ids[polygon_index];
return true;
}
if (cell->first_covered != FinalCellWithSlabs::kNone)
{
id = cell->first_covered;
polygon_index = cell->first_covered;
return true;
}
}

View File

@ -27,7 +27,7 @@ public:
std::shared_ptr<const IExternalLoadable> clone() const override;
private:
bool find(const Point & point, size_t & id) const override;
bool find(const Point & point, size_t & polygon_index) const override;
};
/** A polygon dictionary which generates a recursive grid in order to efficiently cut the number
@ -55,7 +55,7 @@ public:
static constexpr size_t kMaxDepthDefault = 5;
private:
bool find(const Point & point, size_t & id) const override;
bool find(const Point & point, size_t & polygon_index) const override;
std::vector<SlabsPolygonIndex> buckets;
GridRoot<FinalCell> grid;
@ -84,7 +84,7 @@ public:
static constexpr size_t kMaxDepthDefault = 5;
private:
bool find(const Point & point, size_t & id) const override;
bool find(const Point & point, size_t & polygon_index) const override;
GridRoot<FinalCellWithSlabs> index;

View File

@ -90,7 +90,6 @@ std::vector<Coord> SlabsPolygonIndex::uniqueX(const std::vector<Polygon> & polyg
std::sort(all_x.begin(), all_x.end());
all_x.erase(std::unique(all_x.begin(), all_x.end()), all_x.end());
LOG_TRACE(log, "Found {} unique x coordinates", all_x.size());
return all_x;
}
@ -112,8 +111,6 @@ void SlabsPolygonIndex::indexBuild(const std::vector<Polygon> & polygons)
/** Total number of edges */
size_t m = all_edges.size();
LOG_TRACE(log, "Just sorted {} edges from all {} polygons", all_edges.size(), polygons.size());
/** Using custom comparator for fetching edges in right_point order, like in scanline */
auto cmp = [](const Edge & a, const Edge & b)
{
@ -180,8 +177,6 @@ void SlabsPolygonIndex::indexBuild(const std::vector<Polygon> & polygons)
}
}
}
LOG_TRACE(log, "Polygon index is built, total_index_edges = {}", total_index_edges);
}
void SlabsPolygonIndex::indexAddRing(const Ring & ring, size_t polygon_id)

View File

@ -73,7 +73,7 @@ public:
private:
/** Returns unique x coordinates among all points */
std::vector<Coord> uniqueX(const std::vector<Polygon> & polygons);
static std::vector<Coord> uniqueX(const std::vector<Polygon> & polygons);
/** Builds index described above */
void indexBuild(const std::vector<Polygon> & polygons);

View File

@ -75,27 +75,11 @@ namespace ErrorCodes
class FunctionDictHelper
{
public:
explicit FunctionDictHelper(const Context & context_) : context(context_), external_loader(context.getExternalDictionariesLoader()) {}
explicit FunctionDictHelper(const Context & context_) : context(context_) {}
std::shared_ptr<const IDictionaryBase> getDictionary(const String & dictionary_name)
{
String resolved_name = DatabaseCatalog::instance().resolveDictionaryName(dictionary_name);
bool can_load_dictionary = external_loader.hasDictionary(resolved_name);
if (!can_load_dictionary)
{
/// If dictionary not found. And database was not implicitly specified
/// we can qualify dictionary name with current database name.
/// It will help if dictionary is created with DDL and is in current database.
if (dictionary_name.find('.') == std::string::npos)
{
String dictionary_name_with_database = context.getCurrentDatabase() + '.' + dictionary_name;
resolved_name = DatabaseCatalog::instance().resolveDictionaryName(dictionary_name_with_database);
}
}
auto dict = external_loader.getDictionary(resolved_name);
auto dict = context.getExternalDictionariesLoader().getDictionary(dictionary_name, context);
if (!access_checked)
{
@ -134,31 +118,11 @@ public:
DictionaryStructure getDictionaryStructure(const String & dictionary_name) const
{
String resolved_name = DatabaseCatalog::instance().resolveDictionaryName(dictionary_name);
auto load_result = external_loader.getLoadResult(resolved_name);
if (!load_result.config)
{
/// If dictionary not found. And database was not implicitly specified
/// we can qualify dictionary name with current database name.
/// It will help if dictionary is created with DDL and is in current database.
if (dictionary_name.find('.') == std::string::npos)
{
String dictionary_name_with_database = context.getCurrentDatabase() + '.' + dictionary_name;
resolved_name = DatabaseCatalog::instance().resolveDictionaryName(dictionary_name_with_database);
load_result = external_loader.getLoadResult(resolved_name);
}
}
if (!load_result.config)
throw Exception("Dictionary " + backQuote(dictionary_name) + " not found", ErrorCodes::BAD_ARGUMENTS);
return ExternalDictionariesLoader::getDictionaryStructure(*load_result.config);
return context.getExternalDictionariesLoader().getDictionaryStructure(dictionary_name, context);
}
const Context & context;
private:
const ExternalDictionariesLoader & external_loader;
const Context & context;
/// Access cannot be not granted, since in this case checkAccess() will throw and access_checked will not be updated.
std::atomic<bool> access_checked = false;

View File

@ -159,7 +159,8 @@ void WriteBufferFromS3::writePart()
auto outcome = client_ptr->UploadPart(req);
LOG_TRACE(log, "Writing part. Bucket: {}, Key: {}, Upload_id: {}, Data size: {}", bucket, key, multipart_upload_id, temporary_buffer->tellp());
LOG_TRACE(
log, "Writing part. Bucket: {}, Key: {}, Upload_id: {}, Data size: {}", bucket, key, multipart_upload_id, req.GetContentLength());
if (outcome.IsSuccess())
{
@ -215,7 +216,7 @@ void WriteBufferFromS3::makeSinglepartUpload()
auto outcome = client_ptr->PutObject(req);
if (outcome.IsSuccess())
LOG_DEBUG(log, "Single part upload has completed. Bucket: {}, Key: {}", bucket, key);
LOG_DEBUG(log, "Single part upload has completed. Bucket: {}, Key: {}, Object size: {}", bucket, key, req.GetContentLength());
else
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
}

View File

@ -20,7 +20,6 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int DUPLICATE_COLUMN;
extern const int UNKNOWN_IDENTIFIER;
extern const int TYPE_MISMATCH;
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
@ -32,7 +31,7 @@ namespace ErrorCodes
ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs_)
{
for (const auto & input : inputs_)
addInput(input.name, input.type, true);
index.push_back(&addInput(input.name, input.type));
}
ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_)
@ -41,7 +40,7 @@ ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_)
{
if (input.column && isColumnConst(*input.column))
{
addInput(input, true);
addInput(input);
/// Here we also add column.
/// It will allow to remove input which is actually constant (after projection).
@ -49,49 +48,34 @@ ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_)
/// without any respect to header structure. So, it is a way to drop materialized column and use
/// constant value from header.
/// We cannot remove such input right now cause inputs positions are important in some cases.
addColumn(input, true);
index.push_back(&addColumn(input));
}
else
addInput(input.name, input.type, true);
index.push_back(&addInput(input.name, input.type));
}
}
ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace, bool add_to_index)
ActionsDAG::Node & ActionsDAG::addNode(Node node)
{
auto it = index.find(node.result_name);
if (it != index.end() && !can_replace && add_to_index)
throw Exception("Column '" + node.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
auto & res = nodes.emplace_back(std::move(node));
if (res.type == ActionType::INPUT)
inputs.emplace_back(&res);
if (add_to_index)
index.replace(&res);
return res;
}
ActionsDAG::Node & ActionsDAG::getNode(const std::string & name)
{
auto it = index.find(name);
if (it == index.end())
throw Exception("Unknown identifier: '" + name + "'", ErrorCodes::UNKNOWN_IDENTIFIER);
return **it;
}
const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type, bool can_replace, bool add_to_index)
const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type)
{
Node node;
node.type = ActionType::INPUT;
node.result_type = std::move(type);
node.result_name = std::move(name);
return addNode(std::move(node), can_replace, add_to_index);
return addNode(std::move(node));
}
const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column, bool can_replace, bool add_to_index)
const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column)
{
Node node;
node.type = ActionType::INPUT;
@ -99,10 +83,10 @@ const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column, bool
node.result_name = std::move(column.name);
node.column = std::move(column.column);
return addNode(std::move(node), can_replace, add_to_index);
return addNode(std::move(node));
}
const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, bool can_replace, bool materialize)
const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column)
{
if (!column.column)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add column {} because it is nullptr", column.name);
@ -113,30 +97,10 @@ const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, boo
node.result_name = std::move(column.name);
node.column = std::move(column.column);
auto * res = &addNode(std::move(node), can_replace, !materialize);
if (materialize)
{
auto & name = res->result_name;
FunctionOverloadResolverPtr func_builder_materialize =
std::make_shared<FunctionOverloadResolverAdaptor>(
std::make_unique<DefaultOverloadResolver>(
std::make_shared<FunctionMaterialize>()));
res = &addFunction(func_builder_materialize, {res}, {}, true, false);
res = &addAlias(*res, name, true);
}
return *res;
return addNode(std::move(node));
}
const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::string alias, bool can_replace)
{
return addAlias(getNode(name), alias, can_replace);
}
ActionsDAG::Node & ActionsDAG::addAlias(Node & child, std::string alias, bool can_replace)
const ActionsDAG::Node & ActionsDAG::addAlias(const Node & child, std::string alias)
{
Node node;
node.type = ActionType::ALIAS;
@ -145,13 +109,11 @@ ActionsDAG::Node & ActionsDAG::addAlias(Node & child, std::string alias, bool ca
node.column = child.column;
node.children.emplace_back(&child);
return addNode(std::move(node), can_replace);
return addNode(std::move(node));
}
const ActionsDAG::Node & ActionsDAG::addArrayJoin(const std::string & source_name, std::string result_name)
const ActionsDAG::Node & ActionsDAG::addArrayJoin(const Node & child, std::string result_name)
{
auto & child = getNode(source_name);
const DataTypeArray * array_type = typeid_cast<const DataTypeArray *>(child.result_type.get());
if (!array_type)
throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH);
@ -167,37 +129,8 @@ const ActionsDAG::Node & ActionsDAG::addArrayJoin(const std::string & source_nam
const ActionsDAG::Node & ActionsDAG::addFunction(
const FunctionOverloadResolverPtr & function,
const Names & argument_names,
std::string result_name,
const Context & context [[maybe_unused]],
bool can_replace)
{
const auto & all_settings = context.getSettingsRef();
settings.max_temporary_columns = all_settings.max_temporary_columns;
settings.max_temporary_non_const_columns = all_settings.max_temporary_non_const_columns;
#if USE_EMBEDDED_COMPILER
settings.compile_expressions = all_settings.compile_expressions;
settings.min_count_to_compile_expression = all_settings.min_count_to_compile_expression;
if (!compilation_cache)
compilation_cache = context.getCompiledExpressionCache();
#endif
Inputs children;
children.reserve(argument_names.size());
for (const auto & name : argument_names)
children.push_back(&getNode(name));
return addFunction(function, children, std::move(result_name), can_replace);
}
ActionsDAG::Node & ActionsDAG::addFunction(
const FunctionOverloadResolverPtr & function,
Inputs children,
std::string result_name,
bool can_replace,
bool add_to_index)
NodeRawConstPtrs children,
std::string result_name)
{
size_t num_arguments = children.size();
@ -211,7 +144,7 @@ ActionsDAG::Node & ActionsDAG::addFunction(
for (size_t i = 0; i < num_arguments; ++i)
{
auto & child = *node.children[i];
const auto & child = *node.children[i];
ColumnWithTypeAndName argument;
argument.column = child.column;
@ -229,10 +162,7 @@ ActionsDAG::Node & ActionsDAG::addFunction(
node.function = node.function_base->prepare(arguments);
/// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function.
/// But if we compile expressions compiled version of this function maybe placed in cache,
/// so we don't want to unfold non deterministic functions
if (all_const && node.function_base->isSuitableForConstantFolding()
&& (!settings.compile_expressions || node.function_base->isDeterministic()))
if (all_const && node.function_base->isSuitableForConstantFolding())
{
size_t num_rows = arguments.empty() ? 0 : arguments.front().column->size();
auto col = node.function->execute(arguments, node.result_type, num_rows, true);
@ -277,9 +207,39 @@ ActionsDAG::Node & ActionsDAG::addFunction(
node.result_name = std::move(result_name);
return addNode(std::move(node), can_replace, add_to_index);
return addNode(std::move(node));
}
const ActionsDAG::Node & ActionsDAG::findInIndex(const std::string & name) const
{
if (const auto * node = tryFindInIndex(name))
return *node;
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier: '{}'", name);
}
const ActionsDAG::Node * ActionsDAG::tryFindInIndex(const std::string & name) const
{
for (const auto & node : index)
if (node->result_name == name)
return node;
return nullptr;
}
void ActionsDAG::addOrReplaceInIndex(const Node & node)
{
for (auto & index_node : index)
{
if (index_node->result_name == node.result_name)
{
index_node = &node;
return;
}
}
index.push_back(&node);
}
NamesAndTypesList ActionsDAG::getRequiredColumns() const
{
@ -331,37 +291,53 @@ std::string ActionsDAG::dumpNames() const
return out.str();
}
void ActionsDAG::removeUnusedActions(const NameSet & required_names)
{
NodeRawConstPtrs required_nodes;
required_nodes.reserve(required_names.size());
NameSet added;
for (const auto & node : index)
{
if (required_names.count(node->result_name) && added.count(node->result_name) == 0)
{
required_nodes.push_back(node);
added.insert(node->result_name);
}
}
if (added.size() < required_names.size())
{
for (const auto & name : required_names)
if (added.count(name) == 0)
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Unknown column: {}, there are only columns {}", name, dumpNames());
}
index.swap(required_nodes);
removeUnusedActions();
}
void ActionsDAG::removeUnusedActions(const Names & required_names)
{
std::unordered_set<Node *> nodes_set;
std::vector<Node *> required_nodes;
NodeRawConstPtrs required_nodes;
required_nodes.reserve(required_names.size());
std::unordered_map<std::string_view, const Node *> names_map;
for (const auto * node : index)
names_map[node->result_name] = node;
for (const auto & name : required_names)
{
auto it = index.find(name);
if (it == index.end())
auto it = names_map.find(name);
if (it == names_map.end())
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Unknown column: {}, there are only columns {}", name, dumpNames());
"Unknown column: {}, there are only columns {}", name, dumpDAG());
if (nodes_set.insert(*it).second)
required_nodes.push_back(*it);
}
removeUnusedActions(required_nodes);
}
void ActionsDAG::removeUnusedActions(const std::vector<Node *> & required_nodes)
{
{
Index new_index;
for (auto * node : required_nodes)
new_index.insert(node);
index.swap(new_index);
required_nodes.push_back(it->second);
}
index.swap(required_nodes);
removeUnusedActions();
}
@ -370,10 +346,10 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs)
std::unordered_set<const Node *> visited_nodes;
std::stack<Node *> stack;
for (auto * node : index)
for (const auto * node : index)
{
visited_nodes.insert(node);
stack.push(node);
stack.push(const_cast<Node *>(node));
}
for (auto & node : nodes)
@ -406,11 +382,11 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs)
node->children.clear();
}
for (auto * child : node->children)
for (const auto * child : node->children)
{
if (visited_nodes.count(child) == 0)
{
stack.push(child);
stack.push(const_cast<Node *>(child));
visited_nodes.insert(child);
}
}
@ -421,22 +397,29 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs)
inputs.erase(it, inputs.end());
}
void ActionsDAG::addAliases(const NamesWithAliases & aliases, std::vector<Node *> & result_nodes)
void ActionsDAG::addAliases(const NamesWithAliases & aliases)
{
std::vector<Node *> required_nodes;
std::unordered_map<std::string_view, size_t> names_map;
for (size_t i = 0; i < index.size(); ++i)
names_map[index[i]->result_name] = i;
NodeRawConstPtrs required_nodes;
required_nodes.reserve(aliases.size());
for (const auto & item : aliases)
{
auto & child = getNode(item.first);
required_nodes.push_back(&child);
}
auto it = names_map.find(item.first);
if (it == names_map.end())
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Unknown column: {}, there are only columns {}", item.first, dumpNames());
result_nodes.reserve(aliases.size());
required_nodes.push_back(index[it->second]);
}
for (size_t i = 0; i < aliases.size(); ++i)
{
const auto & item = aliases[i];
auto * child = required_nodes[i];
const auto * child = required_nodes[i];
if (!item.second.empty() && item.first != item.second)
{
@ -447,40 +430,74 @@ void ActionsDAG::addAliases(const NamesWithAliases & aliases, std::vector<Node *
node.column = child->column;
node.children.emplace_back(child);
auto & alias = addNode(std::move(node), true);
result_nodes.push_back(&alias);
child = &addNode(std::move(node));
}
auto it = names_map.find(child->result_name);
if (it == names_map.end())
{
names_map[child->result_name] = index.size();
index.push_back(child);
}
else
result_nodes.push_back(child);
index[it->second] = child;
}
}
void ActionsDAG::addAliases(const NamesWithAliases & aliases)
{
std::vector<Node *> result_nodes;
addAliases(aliases, result_nodes);
}
void ActionsDAG::project(const NamesWithAliases & projection)
{
std::vector<Node *> result_nodes;
addAliases(projection, result_nodes);
removeUnusedActions(result_nodes);
std::unordered_map<std::string_view, const Node *> names_map;
for (const auto * node : index)
names_map.emplace(node->result_name, node);
index.clear();
index.reserve(projection.size());
for (const auto & item : projection)
{
auto it = names_map.find(item.first);
if (it == names_map.end())
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Unknown column: {}, there are only columns {}", item.first, dumpNames());
index.push_back(it->second);
}
for (size_t i = 0; i < projection.size(); ++i)
{
const auto & item = projection[i];
auto & child = index[i];
if (!item.second.empty() && item.first != item.second)
{
Node node;
node.type = ActionType::ALIAS;
node.result_type = child->result_type;
node.result_name = std::move(item.second);
node.column = child->column;
node.children.emplace_back(child);
child = &addNode(std::move(node));
}
}
removeUnusedActions();
projectInput();
settings.projected_output = true;
projected_output = true;
}
bool ActionsDAG::tryRestoreColumn(const std::string & column_name)
{
if (index.contains(column_name))
return true;
for (const auto * node : index)
if (node->result_name == column_name)
return true;
for (auto it = nodes.rbegin(); it != nodes.rend(); ++it)
{
auto & node = *it;
if (node.result_name == column_name)
{
index.replace(&node);
index.push_back(&node);
return true;
}
}
@ -502,7 +519,7 @@ bool ActionsDAG::removeUnusedResult(const std::string & column_name)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not found result {} in ActionsDAG\n{}", column_name, dumpDAG());
col = *it;
index.remove(it);
index.erase(it);
}
/// Check if column is in input.
@ -541,7 +558,9 @@ bool ActionsDAG::removeUnusedResult(const std::string & column_name)
ActionsDAGPtr ActionsDAG::clone() const
{
auto actions = cloneEmpty();
auto actions = std::make_shared<ActionsDAG>();
actions->project_input = project_input;
actions->projected_output = projected_output;
std::unordered_map<const Node *, Node *> copy_map;
@ -556,7 +575,7 @@ ActionsDAGPtr ActionsDAG::clone() const
child = copy_map[child];
for (const auto & node : index)
actions->index.insert(copy_map[node]);
actions->index.push_back(copy_map[node]);
for (const auto & node : inputs)
actions->inputs.push_back(copy_map[node]);
@ -564,16 +583,13 @@ ActionsDAGPtr ActionsDAG::clone() const
return actions;
}
void ActionsDAG::compileExpressions()
{
#if USE_EMBEDDED_COMPILER
if (settings.compile_expressions)
{
compileFunctions();
removeUnusedActions();
}
#endif
void ActionsDAG::compileExpressions(size_t min_count_to_compile_expression)
{
compileFunctions(min_count_to_compile_expression);
removeUnusedActions();
}
#endif
std::string ActionsDAG::dumpDAG() const
{
@ -665,23 +681,21 @@ bool ActionsDAG::trivial() const
}
void ActionsDAG::addMaterializingOutputActions()
{
for (auto & node : index)
node = &materializeNode(*node);
}
const ActionsDAG::Node & ActionsDAG::materializeNode(const Node & node)
{
FunctionOverloadResolverPtr func_builder_materialize =
std::make_shared<FunctionOverloadResolverAdaptor>(
std::make_unique<DefaultOverloadResolver>(
std::make_shared<FunctionMaterialize>()));
Index new_index;
std::vector<Node *> index_nodes(index.begin(), index.end());
for (auto * node : index_nodes)
{
auto & name = node->result_name;
node = &addFunction(func_builder_materialize, {node}, {}, true, false);
node = &addAlias(*node, name, true);
new_index.insert(node);
}
index.swap(new_index);
const auto & name = node.result_name;
const auto * func = &addFunction(func_builder_materialize, {&node}, {});
return addAlias(*func, name);
}
ActionsDAGPtr ActionsDAG::makeConvertingActions(
@ -702,7 +716,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
throw Exception("Converting with add_casted_columns supported only for MatchColumnsMode::Name", ErrorCodes::LOGICAL_ERROR);
auto actions_dag = std::make_shared<ActionsDAG>(source);
std::vector<Node *> projection(num_result_columns);
NodeRawConstPtrs projection(num_result_columns);
FunctionOverloadResolverPtr func_builder_materialize =
std::make_shared<FunctionOverloadResolverAdaptor>(
@ -719,8 +733,8 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
for (size_t result_col_num = 0; result_col_num < num_result_columns; ++result_col_num)
{
const auto & res_elem = result[result_col_num];
Node * src_node = nullptr;
Node * dst_node = nullptr;
const Node * src_node = nullptr;
const Node * dst_node = nullptr;
switch (mode)
{
@ -749,7 +763,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
if (const auto * src_const = typeid_cast<const ColumnConst *>(dst_node->column.get()))
{
if (ignore_constant_values)
dst_node = const_cast<Node *>(&actions_dag->addColumn(res_elem, true));
dst_node = &actions_dag->addColumn(res_elem);
else if (res_const->getField() != src_const->getField())
throw Exception("Cannot convert column " + backQuote(res_elem.name) + " because "
"it is constant but values of constants are different in source and result",
@ -769,22 +783,22 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
column.column = DataTypeString().createColumnConst(0, column.name);
column.type = std::make_shared<DataTypeString>();
auto * right_arg = const_cast<Node *>(&actions_dag->addColumn(std::move(column), true));
auto * left_arg = dst_node;
const auto * right_arg = &actions_dag->addColumn(std::move(column));
const auto * left_arg = dst_node;
FunctionCast::Diagnostic diagnostic = {dst_node->result_name, res_elem.name};
FunctionOverloadResolverPtr func_builder_cast =
std::make_shared<FunctionOverloadResolverAdaptor>(
CastOverloadResolver<CastType::nonAccurate>::createImpl(false, std::move(diagnostic)));
Inputs children = { left_arg, right_arg };
dst_node = &actions_dag->addFunction(func_builder_cast, std::move(children), {}, true);
NodeRawConstPtrs children = { left_arg, right_arg };
dst_node = &actions_dag->addFunction(func_builder_cast, std::move(children), {});
}
if (dst_node->column && isColumnConst(*dst_node->column) && !(res_elem.column && isColumnConst(*res_elem.column)))
{
Inputs children = {dst_node};
dst_node = &actions_dag->addFunction(func_builder_materialize, std::move(children), {}, true);
NodeRawConstPtrs children = {dst_node};
dst_node = &actions_dag->addFunction(func_builder_materialize, std::move(children), {});
}
if (dst_node->result_name != res_elem.name)
@ -805,7 +819,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
}
else
{
dst_node = &actions_dag->addAlias(*dst_node, res_elem.name, true);
dst_node = &actions_dag->addAlias(*dst_node, res_elem.name);
projection[result_col_num] = dst_node;
}
}
@ -815,7 +829,8 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
}
}
actions_dag->removeUnusedActions(projection);
actions_dag->index.swap(projection);
actions_dag->removeUnusedActions();
actions_dag->projectInput();
return actions_dag;
@ -830,11 +845,12 @@ ActionsDAGPtr ActionsDAG::makeAddingColumnActions(ColumnWithTypeAndName column)
std::make_shared<FunctionMaterialize>()));
auto column_name = column.name;
const auto & column_node = adding_column_action->addColumn(std::move(column));
Inputs inputs = {const_cast<Node *>(&column_node)};
auto & function_node = adding_column_action->addFunction(func_builder_materialize, std::move(inputs), {}, true);
adding_column_action->addAlias(function_node, std::move(column_name), true);
const auto * column_node = &adding_column_action->addColumn(std::move(column));
NodeRawConstPtrs inputs = {column_node};
const auto & function_node = adding_column_action->addFunction(func_builder_materialize, std::move(inputs), {});
const auto & alias_node = adding_column_action->addAlias(function_node, std::move(column_name));
adding_column_action->index.push_back(&alias_node);
return adding_column_action;
}
@ -848,23 +864,23 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
/// This map contains nodes which should be removed from `first` index, cause they are used as inputs for `second`.
/// The second element is the number of removes (cause one node may be repeated several times in result).
std::unordered_map<Node *, size_t> removed_first_result;
std::unordered_map<const Node *, size_t> removed_first_result;
/// Map inputs of `second` to nodes of `first`.
std::unordered_map<Node *, Node *> inputs_map;
std::unordered_map<const Node *, const Node *> inputs_map;
/// Update inputs list.
{
/// Index may have multiple columns with same name. They also may be used by `second`. Order is important.
std::unordered_map<std::string_view, std::list<Node *>> first_result;
for (auto & node : first.index)
std::unordered_map<std::string_view, std::list<const Node *>> first_result;
for (const auto & node : first.index)
first_result[node->result_name].push_back(node);
for (auto & node : second.inputs)
for (const auto & node : second.inputs)
{
auto it = first_result.find(node->result_name);
if (it == first_result.end() || it->second.empty())
{
if (first.settings.project_input)
if (first.project_input)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Cannot find column {} in ActionsDAG result", node->result_name);
@ -904,50 +920,29 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
}
/// Update index.
if (second.settings.project_input)
if (second.project_input)
{
first.index.swap(second.index);
first.settings.project_input = true;
first.project_input = true;
}
else
{
/// Remove `second` inputs from index.
for (auto it = first.index.begin(); it != first.index.end();)
/// Add not removed result from first actions.
for (const auto * node : first.index)
{
auto cur = it;
++it;
auto jt = removed_first_result.find(*cur);
if (jt != removed_first_result.end() && jt->second > 0)
{
first.index.remove(cur);
--jt->second;
}
auto it = removed_first_result.find(node);
if (it != removed_first_result.end() && it->second > 0)
--it->second;
else
second.index.push_back(node);
}
for (auto it = second.index.rbegin(); it != second.index.rend(); ++it)
first.index.prepend(*it);
first.index.swap(second.index);
}
first.nodes.splice(first.nodes.end(), std::move(second.nodes));
/// Here we rebuild index because some string_view from the first map now may point to string from second.
ActionsDAG::Index first_index;
for (auto * node : first.index)
first_index.insert(node);
first.index.swap(first_index);
#if USE_EMBEDDED_COMPILER
if (first.compilation_cache == nullptr)
first.compilation_cache = second.compilation_cache;
#endif
first.settings.max_temporary_columns = std::max(first.settings.max_temporary_columns, second.settings.max_temporary_columns);
first.settings.max_temporary_non_const_columns = std::max(first.settings.max_temporary_non_const_columns, second.settings.max_temporary_non_const_columns);
first.settings.min_count_to_compile_expression = std::max(first.settings.min_count_to_compile_expression, second.settings.min_count_to_compile_expression);
first.settings.projected_output = second.settings.projected_output;
first.projected_output = second.projected_output;
/// Drop unused inputs and, probably, some actions.
first.removeUnusedActions();
@ -960,13 +955,13 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
/// Split DAG into two parts.
/// (first_nodes, first_index) is a part which will have split_list in result.
/// (second_nodes, second_index) is a part which will have same index as current actions.
std::list<Node> second_nodes;
std::list<Node> first_nodes;
Index second_index;
Index first_index;
Nodes second_nodes;
Nodes first_nodes;
NodeRawConstPtrs second_index;
NodeRawConstPtrs first_index;
/// List of nodes from current actions which are not inputs, but will be in second part.
std::vector<const Node *> new_inputs;
NodeRawConstPtrs new_inputs;
struct Frame
{
@ -1036,7 +1031,7 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
/// At first, visit all children.
while (cur.next_child_to_visit < cur.node->children.size())
{
auto * child = cur.node->children[cur.next_child_to_visit];
const auto * child = cur.node->children[cur.next_child_to_visit];
auto & child_data = data[child];
if (!child_data.visited)
@ -1124,13 +1119,13 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
}
}
for (auto * node : index)
second_index.insert(data[node].to_second);
for (const auto * node : index)
second_index.push_back(data[node].to_second);
Inputs second_inputs;
Inputs first_inputs;
NodeRawConstPtrs second_inputs;
NodeRawConstPtrs first_inputs;
for (auto * input : inputs)
for (const auto * input : inputs)
{
const auto & cur = data[input];
first_inputs.push_back(cur.to_first);
@ -1140,15 +1135,15 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
{
const auto & cur = data[input];
second_inputs.push_back(cur.to_second);
first_index.insert(cur.to_first);
first_index.push_back(cur.to_first);
}
auto first_actions = cloneEmpty();
auto first_actions = std::make_shared<ActionsDAG>();
first_actions->nodes.swap(first_nodes);
first_actions->index.swap(first_index);
first_actions->inputs.swap(first_inputs);
auto second_actions = cloneEmpty();
auto second_actions = std::make_shared<ActionsDAG>();
second_actions->nodes.swap(second_nodes);
second_actions->index.swap(second_index);
second_actions->inputs.swap(second_inputs);
@ -1186,7 +1181,7 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet &
/// At first, visit all children. We depend on ARRAY JOIN if any child does.
while (cur.next_child_to_visit < cur.node->children.size())
{
auto * child = cur.node->children[cur.next_child_to_visit];
const auto * child = cur.node->children[cur.next_child_to_visit];
if (visited_nodes.count(child) == 0)
{
@ -1220,23 +1215,19 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet &
auto res = split(split_nodes);
/// Do not remove array joined columns if they are not used.
res.first->settings.project_input = false;
res.first->project_input = false;
return res;
}
ActionsDAG::SplitResult ActionsDAG::splitActionsForFilter(const std::string & column_name) const
{
auto it = index.begin();
for (; it != index.end(); ++it)
if ((*it)->result_name == column_name)
break;
if (it == index.end())
const auto * node = tryFindInIndex(column_name);
if (!node)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Index for ActionsDAG does not contain filter column name {}. DAG:\n{}",
column_name, dumpDAG());
std::unordered_set<const Node *> split_nodes = {*it};
std::unordered_set<const Node *> split_nodes = {node};
return split(split_nodes);
}
@ -1245,8 +1236,8 @@ namespace
struct ConjunctionNodes
{
std::vector<ActionsDAG::Node *> allowed;
std::vector<ActionsDAG::Node *> rejected;
ActionsDAG::NodeRawConstPtrs allowed;
ActionsDAG::NodeRawConstPtrs rejected;
};
/// Take a node which result is predicate.
@ -1256,19 +1247,19 @@ struct ConjunctionNodes
ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordered_set<const ActionsDAG::Node *> allowed_nodes)
{
ConjunctionNodes conjunction;
std::unordered_set<ActionsDAG::Node *> allowed;
std::unordered_set<ActionsDAG::Node *> rejected;
std::unordered_set<const ActionsDAG::Node *> allowed;
std::unordered_set<const ActionsDAG::Node *> rejected;
struct Frame
{
ActionsDAG::Node * node;
const ActionsDAG::Node * node;
bool is_predicate = false;
size_t next_child_to_visit = 0;
size_t num_allowed_children = 0;
};
std::stack<Frame> stack;
std::unordered_set<ActionsDAG::Node *> visited_nodes;
std::unordered_set<const ActionsDAG::Node *> visited_nodes;
stack.push(Frame{.node = predicate, .is_predicate = true});
visited_nodes.insert(predicate);
@ -1282,7 +1273,7 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere
/// At first, visit all children.
while (cur.next_child_to_visit < cur.node->children.size())
{
auto * child = cur.node->children[cur.next_child_to_visit];
const auto * child = cur.node->children[cur.next_child_to_visit];
if (visited_nodes.count(child) == 0)
{
@ -1305,7 +1296,7 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere
}
else if (is_conjunction)
{
for (auto * child : cur.node->children)
for (const auto * child : cur.node->children)
{
if (allowed_nodes.count(child))
{
@ -1335,7 +1326,7 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere
return conjunction;
}
ColumnsWithTypeAndName prepareFunctionArguments(const std::vector<ActionsDAG::Node *> nodes)
ColumnsWithTypeAndName prepareFunctionArguments(const ActionsDAG::NodeRawConstPtrs & nodes)
{
ColumnsWithTypeAndName arguments;
arguments.reserve(nodes.size());
@ -1360,21 +1351,20 @@ ColumnsWithTypeAndName prepareFunctionArguments(const std::vector<ActionsDAG::No
///
/// Result actions add single column with conjunction result (it is always last in index).
/// No other columns are added or removed.
ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::vector<Node *> conjunction, const ColumnsWithTypeAndName & all_inputs)
ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs)
{
if (conjunction.empty())
return nullptr;
auto actions = cloneEmpty();
actions->settings.project_input = false;
auto actions = std::make_shared<ActionsDAG>();
FunctionOverloadResolverPtr func_builder_and =
std::make_shared<FunctionOverloadResolverAdaptor>(
std::make_unique<DefaultOverloadResolver>(
std::make_shared<FunctionAnd>()));
std::unordered_map<const ActionsDAG::Node *, ActionsDAG::Node *> nodes_mapping;
std::unordered_map<std::string, std::list<Node *>> required_inputs;
std::unordered_map<const ActionsDAG::Node *, const ActionsDAG::Node *> nodes_mapping;
std::unordered_map<std::string, std::list<const Node *>> required_inputs;
struct Frame
{
@ -1397,7 +1387,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::vector<Node *> conjunc
/// At first, visit all children.
while (cur.next_child_to_visit < cur.node->children.size())
{
auto * child = cur.node->children[cur.next_child_to_visit];
const auto * child = cur.node->children[cur.next_child_to_visit];
if (nodes_mapping.count(child) == 0)
{
@ -1424,14 +1414,12 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::vector<Node *> conjunc
}
}
/// Actions must have the same inputs as in all_inputs list.
/// See comment to cloneActionsForFilterPushDown.
for (const auto & col : all_inputs)
{
Node * input;
const Node * input;
auto & list = required_inputs[col.name];
if (list.empty())
input = &const_cast<Node &>(actions->addInput(col, true, false));
input = &actions->addInput(col);
else
{
input = list.front();
@ -1439,22 +1427,22 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::vector<Node *> conjunc
actions->inputs.push_back(input);
}
actions->index.insert(input);
actions->index.push_back(input);
}
Node * result_predicate = nodes_mapping[*conjunction.begin()];
const Node * result_predicate = nodes_mapping[*conjunction.begin()];
if (conjunction.size() > 1)
{
std::vector<Node *> args;
NodeRawConstPtrs args;
args.reserve(conjunction.size());
for (const auto * predicate : conjunction)
args.emplace_back(nodes_mapping[predicate]);
result_predicate = &actions->addFunction(func_builder_and, args, {}, true, false);
result_predicate = &actions->addFunction(func_builder_and, std::move(args), {});
}
actions->index.insert(result_predicate);
actions->index.push_back(result_predicate);
return actions;
}
@ -1464,22 +1452,12 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
const Names & available_inputs,
const ColumnsWithTypeAndName & all_inputs)
{
Node * predicate;
{
auto it = index.begin();
for (; it != index.end(); ++it)
if ((*it)->result_name == filter_name)
break;
if (it == index.end())
Node * predicate = const_cast<Node *>(tryFindInIndex(filter_name));
if (!predicate)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Index for ActionsDAG does not contain filter column name {}. DAG:\n{}",
filter_name, dumpDAG());
predicate = *it;
}
std::unordered_set<const Node *> allowed_nodes;
/// Get input nodes from available_inputs names.
@ -1516,7 +1494,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
{
if (*i == predicate)
{
index.remove(i);
index.erase(i);
break;
}
}
@ -1539,7 +1517,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
/// Predicate is conjunction, where both allowed and rejected sets are not empty.
/// Replace this node to conjunction of rejected predicates.
std::vector<Node *> new_children(conjunction.rejected.begin(), conjunction.rejected.end());
NodeRawConstPtrs new_children = std::move(conjunction.rejected);
if (new_children.size() == 1)
{
@ -1564,8 +1542,8 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
node.column = DataTypeString().createColumnConst(0, node.result_name);
node.result_type = std::make_shared<DataTypeString>();
auto * right_arg = &nodes.emplace_back(std::move(node));
auto * left_arg = new_children.front();
const auto * right_arg = &nodes.emplace_back(std::move(node));
const auto * left_arg = new_children.front();
predicate->children = {left_arg, right_arg};
auto arguments = prepareFunctionArguments(predicate->children);

View File

@ -26,7 +26,6 @@ using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>;
class IDataType;
using DataTypePtr = std::shared_ptr<const IDataType>;
class Context;
class CompiledExpressionCache;
/// Directed acyclic graph of expressions.
@ -55,9 +54,13 @@ public:
FUNCTION,
};
struct Node;
using NodeRawPtrs = std::vector<Node *>;
using NodeRawConstPtrs = std::vector<const Node *>;
struct Node
{
std::vector<Node *> children;
NodeRawConstPtrs children;
ActionType type;
@ -79,128 +82,18 @@ public:
bool allow_constant_folding = true;
};
/// Index is used to:
/// * find Node by it's result_name
/// * specify order of columns in result
/// It represents a set of available columns.
/// Removing of column from index is equivalent to removing of column from final result.
///
/// DAG allows actions with duplicating result names. In this case index will point to last added Node.
/// It does not cause any problems as long as execution of actions does not depend on action names anymore.
///
/// Index is a list of nodes + [map: name -> list::iterator].
/// List is ordered, may contain nodes with same names, or one node several times.
class Index
{
private:
std::list<Node *> list;
/// Map key is a string_view to Node::result_name for node from value.
/// Map always point to existing node, so key always valid (nodes live longer then index).
std::unordered_map<std::string_view, std::list<Node *>::iterator> map;
public:
auto size() const { return list.size(); }
bool contains(std::string_view key) const { return map.count(key) != 0; }
std::list<Node *>::iterator begin() { return list.begin(); }
std::list<Node *>::iterator end() { return list.end(); }
std::list<Node *>::const_iterator begin() const { return list.begin(); }
std::list<Node *>::const_iterator end() const { return list.end(); }
std::list<Node *>::const_reverse_iterator rbegin() const { return list.rbegin(); }
std::list<Node *>::const_reverse_iterator rend() const { return list.rend(); }
std::list<Node *>::const_iterator find(std::string_view key) const
{
auto it = map.find(key);
if (it == map.end())
return list.end();
return it->second;
}
/// Insert method doesn't check if map already have node with the same name.
/// If node with the same name exists, it is removed from map, but not list.
/// It is expected and used for project(), when result may have several columns with the same name.
void insert(Node * node)
{
auto it = list.emplace(list.end(), node);
if (auto handle = map.extract(node->result_name))
{
handle.key() = node->result_name; /// Change string_view
handle.mapped() = it;
map.insert(std::move(handle));
}
else
map[node->result_name] = it;
}
void prepend(Node * node)
{
auto it = list.emplace(list.begin(), node);
if (auto handle = map.extract(node->result_name))
{
handle.key() = node->result_name; /// Change string_view
handle.mapped() = it;
map.insert(std::move(handle));
}
else
map[node->result_name] = it;
}
/// If node with same name exists in index, replace it. Otherwise insert new node to index.
void replace(Node * node)
{
if (auto handle = map.extract(node->result_name))
{
handle.key() = node->result_name; /// Change string_view
*handle.mapped() = node;
map.insert(std::move(handle));
}
else
insert(node);
}
void remove(std::list<Node *>::iterator it)
{
auto map_it = map.find((*it)->result_name);
if (map_it != map.end() && map_it->second == it)
map.erase(map_it);
list.erase(it);
}
void swap(Index & other)
{
list.swap(other.list);
map.swap(other.map);
}
};
/// NOTE: std::list is an implementation detail.
/// It allows to add and remove new nodes inplace without reallocation.
/// Raw pointers to nodes remain valid.
using Nodes = std::list<Node>;
using Inputs = std::vector<Node *>;
struct ActionsSettings
{
size_t max_temporary_columns = 0;
size_t max_temporary_non_const_columns = 0;
size_t min_count_to_compile_expression = 0;
bool compile_expressions = false;
bool project_input = false;
bool projected_output = false;
};
private:
Nodes nodes;
Index index;
Inputs inputs;
NodeRawConstPtrs index;
NodeRawConstPtrs inputs;
ActionsSettings settings;
#if USE_EMBEDDED_COMPILER
std::shared_ptr<CompiledExpressionCache> compilation_cache;
#endif
bool project_input = false;
bool projected_output = false;
public:
ActionsDAG() = default;
@ -211,8 +104,8 @@ public:
explicit ActionsDAG(const ColumnsWithTypeAndName & inputs_);
const Nodes & getNodes() const { return nodes; }
const Index & getIndex() const { return index; }
const Inputs & getInputs() const { return inputs; }
const NodeRawConstPtrs & getIndex() const { return index; }
const NodeRawConstPtrs & getInputs() const { return inputs; }
NamesAndTypesList getRequiredColumns() const;
ColumnsWithTypeAndName getResultColumns() const;
@ -222,19 +115,26 @@ public:
std::string dumpNames() const;
std::string dumpDAG() const;
const Node & addInput(std::string name, DataTypePtr type, bool can_replace = false, bool add_to_index = true);
const Node & addInput(ColumnWithTypeAndName column, bool can_replace = false, bool add_to_index = true);
const Node & addColumn(ColumnWithTypeAndName column, bool can_replace = false, bool materialize = false);
const Node & addAlias(const std::string & name, std::string alias, bool can_replace = false);
const Node & addArrayJoin(const std::string & source_name, std::string result_name);
const Node & addInput(std::string name, DataTypePtr type);
const Node & addInput(ColumnWithTypeAndName column);
const Node & addColumn(ColumnWithTypeAndName column);
const Node & addAlias(const Node & child, std::string alias);
const Node & addArrayJoin(const Node & child, std::string result_name);
const Node & addFunction(
const FunctionOverloadResolverPtr & function,
const Names & argument_names,
std::string result_name,
const Context & context,
bool can_replace = false);
NodeRawConstPtrs children,
std::string result_name);
void addNodeToIndex(const Node * node) { index.insert(const_cast<Node *>(node)); }
/// Index can contain any column returned from DAG.
/// You may manually change it if needed.
NodeRawConstPtrs & getIndex() { return index; }
/// Find first column by name in index. This search is linear.
const Node & findInIndex(const std::string & name) const;
/// Same, but return nullptr if node not found.
const Node * tryFindInIndex(const std::string & name) const;
/// Find first node with the same name in index and replace it.
/// If was not found, add node to index end.
void addOrReplaceInIndex(const Node & node);
/// Call addAlias several times.
void addAliases(const NamesWithAliases & aliases);
@ -248,16 +148,20 @@ public:
/// Return true if column was removed from inputs.
bool removeUnusedResult(const std::string & column_name);
void projectInput(bool project = true) { settings.project_input = project; }
void projectInput(bool project = true) { project_input = project; }
bool isInputProjected() const { return project_input; }
bool isOutputProjected() const { return projected_output; }
void removeUnusedActions(const Names & required_names);
void removeUnusedActions(const NameSet & required_names);
bool hasArrayJoin() const;
bool hasStatefulFunctions() const;
bool trivial() const; /// If actions has no functions or array join.
const ActionsSettings & getSettings() const { return settings; }
void compileExpressions();
#if USE_EMBEDDED_COMPILER
void compileExpressions(size_t min_count_to_compile_expression);
#endif
ActionsDAGPtr clone() const;
@ -265,6 +169,9 @@ public:
/// Also add aliases so the result names remain unchanged.
void addMaterializingOutputActions();
/// Apply materialize() function to node. Result node has the same name.
const Node & materializeNode(const Node & node);
enum class MatchColumnsMode
{
/// Require same number of columns in source and result. Match columns by corresponding positions, regardless to names.
@ -334,36 +241,15 @@ public:
const ColumnsWithTypeAndName & all_inputs);
private:
Node & addNode(Node node, bool can_replace = false, bool add_to_index = true);
Node & getNode(const std::string & name);
Node & addNode(Node node);
Node & addAlias(Node & child, std::string alias, bool can_replace);
Node & addFunction(
const FunctionOverloadResolverPtr & function,
Inputs children,
std::string result_name,
bool can_replace,
bool add_to_index = true);
ActionsDAGPtr cloneEmpty() const
{
auto actions = std::make_shared<ActionsDAG>();
actions->settings = settings;
void removeUnusedActions(bool allow_remove_inputs = true);
#if USE_EMBEDDED_COMPILER
actions->compilation_cache = compilation_cache;
void compileFunctions(size_t min_count_to_compile_expression);
#endif
return actions;
}
void removeUnusedActions(const std::vector<Node *> & required_nodes);
void removeUnusedActions(bool allow_remove_inputs = true);
void addAliases(const NamesWithAliases & aliases, std::vector<Node *> & result_nodes);
void compileFunctions();
ActionsDAGPtr cloneActionsForConjunction(std::vector<Node *> conjunction, const ColumnsWithTypeAndName & all_inputs);
static ActionsDAGPtr cloneActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs);
};
}

View File

@ -52,6 +52,7 @@ namespace ErrorCodes
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int INCORRECT_ELEMENT_OF_SET;
extern const int BAD_ARGUMENTS;
extern const int DUPLICATE_COLUMN;
}
static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols)
@ -346,11 +347,9 @@ SetPtr makeExplicitSet(
const ASTPtr & left_arg = args.children.at(0);
const ASTPtr & right_arg = args.children.at(1);
const auto & index = actions.getIndex();
auto it = index.find(left_arg->getColumnName());
if (it == index.end())
throw Exception("Unknown identifier: '" + left_arg->getColumnName() + "'", ErrorCodes::UNKNOWN_IDENTIFIER);
const DataTypePtr & left_arg_type = (*it)->result_type;
auto column_name = left_arg->getColumnName();
const auto & dag_node = actions.findInIndex(column_name);
const DataTypePtr & left_arg_type = dag_node.result_type;
DataTypes set_element_types = {left_arg_type};
const auto * left_tuple_type = typeid_cast<const DataTypeTuple *>(left_arg_type.get());
@ -381,6 +380,54 @@ SetPtr makeExplicitSet(
return set;
}
ScopeStack::Level::~Level() = default;
ScopeStack::Level::Level() = default;
ScopeStack::Level::Level(Level &&) = default;
class ScopeStack::Index
{
/// Map column name -> Node.
/// Use string_view as key which always points to Node::result_name.
std::unordered_map<std::string_view, const ActionsDAG::Node *> map;
ActionsDAG::NodeRawConstPtrs & index;
public:
explicit Index(ActionsDAG::NodeRawConstPtrs & index_) : index(index_)
{
for (const auto * node : index)
map.emplace(node->result_name, node);
}
void addNode(const ActionsDAG::Node * node)
{
bool inserted = map.emplace(node->result_name, node).second;
if (!inserted)
throw Exception("Column '" + node->result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
index.push_back(node);
}
const ActionsDAG::Node * tryGetNode(const std::string & name) const
{
auto it = map.find(name);
if (it == map.end())
return nullptr;
return it->second;
}
const ActionsDAG::Node & getNode(const std::string & name) const
{
const auto * node = tryGetNode(name);
if (!node)
throw Exception("Unknown identifier: '" + name + "'", ErrorCodes::UNKNOWN_IDENTIFIER);
return *node;
}
bool contains(const std::string & name) const { return map.count(name) > 0; }
};
ActionsMatcher::Data::Data(
const Context & context_, SizeLimits set_size_limit_, size_t subquery_depth_,
const NamesAndTypesList & source_columns_, ActionsDAGPtr actions_dag,
@ -404,7 +451,7 @@ ActionsMatcher::Data::Data(
bool ActionsMatcher::Data::hasColumn(const String & column_name) const
{
return actions_stack.getLastActions().getIndex().contains(column_name);
return actions_stack.getLastActionsIndex().contains(column_name);
}
ScopeStack::ScopeStack(ActionsDAGPtr actions_dag, const Context & context_)
@ -412,6 +459,7 @@ ScopeStack::ScopeStack(ActionsDAGPtr actions_dag, const Context & context_)
{
auto & level = stack.emplace_back();
level.actions_dag = std::move(actions_dag);
level.index = std::make_unique<ScopeStack::Index>(level.actions_dag->getIndex());
for (const auto & node : level.actions_dag->getIndex())
if (node->type == ActionsDAG::ActionType::INPUT)
@ -422,20 +470,23 @@ void ScopeStack::pushLevel(const NamesAndTypesList & input_columns)
{
auto & level = stack.emplace_back();
level.actions_dag = std::make_shared<ActionsDAG>();
level.index = std::make_unique<ScopeStack::Index>(level.actions_dag->getIndex());
const auto & prev = stack[stack.size() - 2];
for (const auto & input_column : input_columns)
{
level.actions_dag->addInput(input_column.name, input_column.type);
const auto & node = level.actions_dag->addInput(input_column.name, input_column.type);
level.index->addNode(&node);
level.inputs.emplace(input_column.name);
}
const auto & index = level.actions_dag->getIndex();
for (const auto & node : prev.actions_dag->getIndex())
{
if (!index.contains(node->result_name))
level.actions_dag->addInput({node->column, node->result_type, node->result_name});
if (!level.index->contains(node->result_name))
{
const auto & input = level.actions_dag->addInput({node->column, node->result_type, node->result_name});
level.index->addNode(&input);
}
}
}
@ -448,10 +499,8 @@ size_t ScopeStack::getColumnLevel(const std::string & name)
if (stack[i].inputs.count(name))
return i;
const auto & index = stack[i].actions_dag->getIndex();
auto it = index.find(name);
if (it != index.end() && (*it)->type != ActionsDAG::ActionType::INPUT)
const auto * node = stack[i].index->tryGetNode(name);
if (node && node->type != ActionsDAG::ActionType::INPUT)
return i;
}
@ -461,32 +510,46 @@ size_t ScopeStack::getColumnLevel(const std::string & name)
void ScopeStack::addColumn(ColumnWithTypeAndName column)
{
const auto & node = stack[0].actions_dag->addColumn(std::move(column));
stack[0].index->addNode(&node);
for (size_t j = 1; j < stack.size(); ++j)
stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
{
const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
stack[j].index->addNode(&input);
}
}
void ScopeStack::addAlias(const std::string & name, std::string alias)
{
auto level = getColumnLevel(name);
const auto & node = stack[level].actions_dag->addAlias(name, std::move(alias));
const auto & source = stack[level].index->getNode(name);
const auto & node = stack[level].actions_dag->addAlias(source, std::move(alias));
stack[level].index->addNode(&node);
for (size_t j = level + 1; j < stack.size(); ++j)
stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
{
const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
stack[j].index->addNode(&input);
}
}
void ScopeStack::addArrayJoin(const std::string & source_name, std::string result_name)
{
getColumnLevel(source_name);
if (!stack.front().actions_dag->getIndex().contains(source_name))
const auto * source_node = stack.front().index->tryGetNode(source_name);
if (!source_node)
throw Exception("Expression with arrayJoin cannot depend on lambda argument: " + source_name,
ErrorCodes::BAD_ARGUMENTS);
const auto & node = stack.front().actions_dag->addArrayJoin(source_name, std::move(result_name));
const auto & node = stack.front().actions_dag->addArrayJoin(*source_node, std::move(result_name));
stack.front().index->addNode(&node);
for (size_t j = 1; j < stack.size(); ++j)
stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
{
const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
stack[j].index->addNode(&input);
}
}
void ScopeStack::addFunction(
@ -498,17 +561,26 @@ void ScopeStack::addFunction(
for (const auto & argument : argument_names)
level = std::max(level, getColumnLevel(argument));
const auto & node = stack[level].actions_dag->addFunction(function, argument_names, std::move(result_name), context);
ActionsDAG::NodeRawConstPtrs children;
children.reserve(argument_names.size());
for (const auto & argument : argument_names)
children.push_back(&stack[level].index->getNode(argument));
const auto & node = stack[level].actions_dag->addFunction(function, std::move(children), std::move(result_name));
stack[level].index->addNode(&node);
for (size_t j = level + 1; j < stack.size(); ++j)
stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
{
const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
stack[j].index->addNode(&input);
}
}
ActionsDAGPtr ScopeStack::popLevel()
{
auto res = std::move(stack.back());
auto res = std::move(stack.back().actions_dag);
stack.pop_back();
return res.actions_dag;
return res;
}
std::string ScopeStack::dumpNames() const
@ -521,6 +593,11 @@ const ActionsDAG & ScopeStack::getLastActions() const
return *stack.back().actions_dag;
}
const ScopeStack::Index & ScopeStack::getLastActionsIndex() const
{
return *stack.back().index;
}
bool ActionsMatcher::needChildVisit(const ASTPtr & node, const ASTPtr & child)
{
/// Visit children themself
@ -568,10 +645,9 @@ std::optional<NameAndTypePair> ActionsMatcher::getNameAndTypeFromAST(const ASTPt
child_column_name = as_literal->unique_column_name;
}
const auto & index = data.actions_stack.getLastActions().getIndex();
auto it = index.find(child_column_name);
if (it != index.end())
return NameAndTypePair(child_column_name, (*it)->result_type);
const auto & index = data.actions_stack.getLastActionsIndex();
if (const auto * node = index.tryGetNode(child_column_name))
return NameAndTypePair(child_column_name, node->result_type);
if (!data.only_consts)
throw Exception("Unknown identifier: " + child_column_name + " there are columns: " + data.actions_stack.dumpNames(),
@ -927,7 +1003,9 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
String result_name = lambda->arguments->children.at(1)->getColumnName();
lambda_dag->removeUnusedActions(Names(1, result_name));
auto lambda_actions = std::make_shared<ExpressionActions>(lambda_dag);
auto lambda_actions = std::make_shared<ExpressionActions>(
lambda_dag,
ExpressionActionsSettings::fromContext(data.context));
DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type;
@ -983,12 +1061,8 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */,
if (literal.unique_column_name.empty())
{
const auto default_name = literal.getColumnName();
const auto & index = data.actions_stack.getLastActions().getIndex();
const ActionsDAG::Node * existing_column = nullptr;
auto it = index.find(default_name);
if (it != index.end())
existing_column = *it;
const auto & index = data.actions_stack.getLastActionsIndex();
const auto * existing_column = index.tryGetNode(default_name);
/*
* To approximate CSE, bind all identical literals to a single temporary
@ -1101,7 +1175,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
else
{
const auto & last_actions = data.actions_stack.getLastActions();
const auto & index = last_actions.getIndex();
const auto & index = data.actions_stack.getLastActionsIndex();
if (index.contains(left_in_operand->getColumnName()))
/// An explicit enumeration of values in parentheses.
return makeExplicitSet(&node, last_actions, false, data.context, data.set_size_limit, data.prepared_sets);

View File

@ -62,10 +62,18 @@ Block createBlockForSet(
*/
struct ScopeStack
{
class Index;
using IndexPtr = std::unique_ptr<Index>;
struct Level
{
ActionsDAGPtr actions_dag;
IndexPtr index;
NameSet inputs;
Level();
Level(Level &&);
~Level();
};
using Levels = std::vector<Level>;
@ -91,6 +99,7 @@ struct ScopeStack
ActionsDAGPtr popLevel();
const ActionsDAG & getLastActions() const;
const Index & getLastActionsIndex() const;
std::string dumpNames() const;
};

View File

@ -188,7 +188,7 @@ void AsynchronousMetrics::update()
#if USE_EMBEDDED_COMPILER
{
if (auto compiled_expression_cache = global_context.getCompiledExpressionCache())
if (auto * compiled_expression_cache = CompiledExpressionCacheFactory::instance().tryGetCache())
new_values["CompiledExpressionCacheCount"] = compiled_expression_cache->count();
}
#endif

View File

@ -15,6 +15,8 @@
#include <Processors/Sources/DelayedSource.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
namespace ProfileEvents
@ -284,7 +286,9 @@ void SelectStreamFactory::createForShard(
if (try_results.empty() || local_delay < max_remote_delay)
{
auto plan = createLocalPlan(modified_query_ast, header, context, stage);
return QueryPipeline::getPipe(std::move(*plan->buildQueryPipeline(QueryPlanOptimizationSettings(context.getSettingsRef()))));
return QueryPipeline::getPipe(std::move(*plan->buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(*context_ptr),
BuildQueryPipelineSettings::fromContext(*context_ptr))));
}
else
{

View File

@ -380,10 +380,6 @@ struct ContextShared
ConfigurationPtr clusters_config; /// Stores updated configs
mutable std::mutex clusters_mutex; /// Guards clusters and clusters_config
#if USE_EMBEDDED_COMPILER
std::shared_ptr<CompiledExpressionCache> compiled_expression_cache;
#endif
bool shutdown_called = false;
Stopwatch uptime_watch;
@ -2327,35 +2323,6 @@ void Context::setQueryParameter(const String & name, const String & value)
}
#if USE_EMBEDDED_COMPILER
std::shared_ptr<CompiledExpressionCache> Context::getCompiledExpressionCache() const
{
auto lock = getLock();
return shared->compiled_expression_cache;
}
void Context::setCompiledExpressionCache(size_t cache_size)
{
auto lock = getLock();
if (shared->compiled_expression_cache)
throw Exception("Compiled expressions cache has been already created.", ErrorCodes::LOGICAL_ERROR);
shared->compiled_expression_cache = std::make_shared<CompiledExpressionCache>(cache_size);
}
void Context::dropCompiledExpressionCache() const
{
auto lock = getLock();
if (shared->compiled_expression_cache)
shared->compiled_expression_cache->reset();
}
#endif
void Context::addXDBCBridgeCommand(std::unique_ptr<ShellCommand> cmd) const
{
auto lock = getLock();

View File

@ -740,12 +740,6 @@ public:
void setQueryParameter(const String & name, const String & value);
void setQueryParameters(const NameToNameMap & parameters) { query_parameters = parameters; }
#if USE_EMBEDDED_COMPILER
std::shared_ptr<CompiledExpressionCache> getCompiledExpressionCache() const;
void setCompiledExpressionCache(size_t cache_size);
void dropCompiledExpressionCache() const;
#endif
/// Add started bridge command. It will be killed after context destruction
void addXDBCBridgeCommand(std::unique_ptr<ShellCommand> cmd) const;

View File

@ -910,31 +910,6 @@ String DatabaseCatalog::getPathForUUID(const UUID & uuid)
return toString(uuid).substr(0, uuid_prefix_len) + '/' + toString(uuid) + '/';
}
String DatabaseCatalog::resolveDictionaryName(const String & name) const
{
/// If it's dictionary from Atomic database, then we need to convert qualified name to UUID.
/// Try to split name and get id from associated StorageDictionary.
/// If something went wrong, return name as is.
/// TODO support dot in name for dictionaries in Atomic databases
auto pos = name.find('.');
if (pos == std::string::npos || name.find('.', pos + 1) != std::string::npos)
return name;
String maybe_database_name = name.substr(0, pos);
String maybe_table_name = name.substr(pos + 1);
auto db_and_table = tryGetDatabaseAndTable({maybe_database_name, maybe_table_name}, global_context);
if (!db_and_table.first)
return name;
assert(db_and_table.second);
if (db_and_table.first->getUUID() == UUIDHelpers::Nil)
return name;
if (db_and_table.second->getName() != "Dictionary")
return name;
return toString(db_and_table.second->getStorageID().uuid);
}
void DatabaseCatalog::waitTableFinallyDropped(const UUID & uuid)
{
if (uuid == UUIDHelpers::Nil)

View File

@ -192,9 +192,6 @@ public:
String getPathForDroppedMetadata(const StorageID & table_id) const;
void enqueueDroppedTableCleanup(StorageID table_id, StoragePtr table, String dropped_metadata_path, bool ignore_delay = false);
/// Try convert qualified dictionary name to persistent UUID
String resolveDictionaryName(const String & name) const;
void waitTableFinallyDropped(const UUID & uuid);
private:

View File

@ -44,16 +44,18 @@ namespace ErrorCodes
ExpressionActions::~ExpressionActions() = default;
ExpressionActions::ExpressionActions(ActionsDAGPtr actions_dag_)
ExpressionActions::ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_)
: settings(settings_)
{
actions_dag = actions_dag_->clone();
actions_dag->compileExpressions();
#if USE_EMBEDDED_COMPILER
if (settings.compile_expressions)
actions_dag->compileExpressions(settings.min_count_to_compile_expression);
#endif
linearizeActions();
const auto & settings = actions_dag->getSettings();
if (settings.max_temporary_columns && num_columns > settings.max_temporary_columns)
throw Exception(ErrorCodes::TOO_MANY_TEMPORARY_COLUMNS,
"Too many temporary columns: {}. Maximum: {}",
@ -141,7 +143,7 @@ void ExpressionActions::linearizeActions()
ExpressionActions::Arguments arguments;
arguments.reserve(cur.node->children.size());
for (auto * child : cur.node->children)
for (const auto * child : cur.node->children)
{
auto & arg = data[reverse_index[child]];
@ -258,15 +260,14 @@ std::string ExpressionActions::Action::toString() const
void ExpressionActions::checkLimits(const ColumnsWithTypeAndName & columns) const
{
auto max_temporary_non_const_columns = actions_dag->getSettings().max_temporary_non_const_columns;
if (max_temporary_non_const_columns)
if (settings.max_temporary_non_const_columns)
{
size_t non_const_columns = 0;
for (const auto & column : columns)
if (column.column && !isColumnConst(*column.column))
++non_const_columns;
if (non_const_columns > max_temporary_non_const_columns)
if (non_const_columns > settings.max_temporary_non_const_columns)
{
WriteBufferFromOwnString list_of_non_const_columns;
for (const auto & column : columns)
@ -274,7 +275,7 @@ void ExpressionActions::checkLimits(const ColumnsWithTypeAndName & columns) cons
list_of_non_const_columns << "\n" << column.name;
throw Exception("Too many temporary non-const columns:" + list_of_non_const_columns.str()
+ ". Maximum: " + std::to_string(max_temporary_non_const_columns),
+ ". Maximum: " + std::to_string(settings.max_temporary_non_const_columns),
ErrorCodes::TOO_MANY_TEMPORARY_NON_CONST_COLUMNS);
}
}
@ -460,7 +461,7 @@ void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run)
}
}
if (actions_dag->getSettings().project_input)
if (actions_dag->isInputProjected())
{
block.clear();
}
@ -554,7 +555,7 @@ std::string ExpressionActions::dumpActions() const
for (const auto & output_column : output_columns)
ss << output_column.name << " " << output_column.type->getName() << "\n";
ss << "\nproject input: " << actions_dag->getSettings().project_input << "\noutput positions:";
ss << "\nproject input: " << actions_dag->isInputProjected() << "\noutput positions:";
for (auto pos : result_positions)
ss << " " << pos;
ss << "\n";
@ -621,11 +622,10 @@ void ExpressionActionsChain::finalize()
/// Finalize all steps. Right to left to define unnecessary input columns.
for (int i = static_cast<int>(steps.size()) - 1; i >= 0; --i)
{
Names required_output = steps[i]->required_output;
std::unordered_map<String, size_t> required_output_indexes;
for (size_t j = 0; j < required_output.size(); ++j)
required_output_indexes[required_output[j]] = j;
auto & can_remove_required_output = steps[i]->can_remove_required_output;
auto & required_output = steps[i]->required_output;
NameSet required_names;
for (const auto & output : required_output)
required_names.insert(output.first);
if (i + 1 < static_cast<int>(steps.size()))
{
@ -634,15 +634,15 @@ void ExpressionActionsChain::finalize()
{
if (additional_input.count(it.name) == 0)
{
auto iter = required_output_indexes.find(it.name);
if (iter == required_output_indexes.end())
required_output.push_back(it.name);
else if (!can_remove_required_output.empty())
can_remove_required_output[iter->second] = false;
auto iter = required_output.find(it.name);
if (iter == required_output.end())
required_names.insert(it.name);
else
iter->second = false;
}
}
}
steps[i]->finalize(required_output);
steps[i]->finalize(required_names);
}
/// Adding the ejection of unnecessary columns to the beginning of each step.
@ -666,8 +666,8 @@ std::string ExpressionActionsChain::dumpChain() const
{
ss << "step " << i << "\n";
ss << "required output:\n";
for (const std::string & name : steps[i]->required_output)
ss << name << "\n";
for (const auto & it : steps[i]->required_output)
ss << it.first << "\n";
ss << "\n" << steps[i]->dump() << "\n";
}
@ -693,20 +693,19 @@ ExpressionActionsChain::ArrayJoinStep::ArrayJoinStep(ArrayJoinActionPtr array_jo
}
}
void ExpressionActionsChain::ArrayJoinStep::finalize(const Names & required_output_)
void ExpressionActionsChain::ArrayJoinStep::finalize(const NameSet & required_output_)
{
NamesAndTypesList new_required_columns;
ColumnsWithTypeAndName new_result_columns;
NameSet names(required_output_.begin(), required_output_.end());
for (const auto & column : result_columns)
{
if (array_join->columns.count(column.name) != 0 || names.count(column.name) != 0)
if (array_join->columns.count(column.name) != 0 || required_output_.count(column.name) != 0)
new_result_columns.emplace_back(column);
}
for (const auto & column : required_columns)
{
if (array_join->columns.count(column.name) != 0 || names.count(column.name) != 0)
if (array_join->columns.count(column.name) != 0 || required_output_.count(column.name) != 0)
new_required_columns.emplace_back(column);
}
@ -729,14 +728,14 @@ ExpressionActionsChain::JoinStep::JoinStep(
analyzed_join->addJoinedColumnsAndCorrectTypes(result_columns);
}
void ExpressionActionsChain::JoinStep::finalize(const Names & required_output_)
void ExpressionActionsChain::JoinStep::finalize(const NameSet & required_output_)
{
/// We need to update required and result columns by removing unused ones.
NamesAndTypesList new_required_columns;
ColumnsWithTypeAndName new_result_columns;
/// That's an input columns we need.
NameSet required_names(required_output_.begin(), required_output_.end());
NameSet required_names = required_output_;
for (const auto & name : analyzed_join->keyNamesLeft())
required_names.emplace(name);

View File

@ -3,6 +3,7 @@
#include <Core/Block.h>
#include <Core/ColumnNumbers.h>
#include <Interpreters/ActionsDAG.h>
#include <Interpreters/ExpressionActionsSettings.h>
#include <variant>
@ -38,7 +39,6 @@ class ExpressionActions
{
public:
using Node = ActionsDAG::Node;
using Index = ActionsDAG::Index;
struct Argument
{
@ -78,10 +78,12 @@ private:
ColumnNumbers result_positions;
Block sample_block;
ExpressionActionsSettings settings;
public:
ExpressionActions() = delete;
~ExpressionActions();
explicit ExpressionActions(ActionsDAGPtr actions_dag_);
explicit ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_ = {});
ExpressionActions(const ExpressionActions &) = default;
ExpressionActions & operator=(const ExpressionActions &) = default;
@ -89,6 +91,7 @@ public:
const std::list<Node> & getNodes() const { return actions_dag->getNodes(); }
const ActionsDAG & getActionsDAG() const { return *actions_dag; }
const ColumnNumbers & getResultPositions() const { return result_positions; }
const ExpressionActionsSettings & getSettings() const { return settings; }
/// Get a list of input columns.
Names getRequiredColumns() const;
@ -138,21 +141,26 @@ struct ExpressionActionsChain
struct Step
{
virtual ~Step() = default;
explicit Step(Names required_output_) : required_output(std::move(required_output_)) {}
explicit Step(Names required_output_)
{
for (const auto & name : required_output_)
required_output[name] = true;
}
/// Columns were added to the block before current step in addition to prev step output.
NameSet additional_input;
/// Columns which are required in the result of current step.
Names required_output;
/// True if column from required_output is needed only for current step and not used in next actions
/// Flag is true if column from required_output is needed only for current step and not used in next actions
/// (and can be removed from block). Example: filter column for where actions.
/// If not empty, has the same size with required_output; is filled in finalize().
std::vector<bool> can_remove_required_output;
std::unordered_map<std::string, bool> required_output;
void addRequiredOutput(const std::string & name) { required_output[name] = true; }
virtual NamesAndTypesList getRequiredColumns() const = 0;
virtual ColumnsWithTypeAndName getResultColumns() const = 0;
/// Remove unused result and update required columns
virtual void finalize(const Names & required_output_) = 0;
virtual void finalize(const NameSet & required_output_) = 0;
/// Add projections to expression
virtual void prependProjectInput() const = 0;
virtual std::string dump() const = 0;
@ -182,9 +190,9 @@ struct ExpressionActionsChain
return actions_dag->getResultColumns();
}
void finalize(const Names & required_output_) override
void finalize(const NameSet & required_output_) override
{
if (!actions_dag->getSettings().projected_output)
if (!actions_dag->isOutputProjected())
actions_dag->removeUnusedActions(required_output_);
}
@ -209,7 +217,7 @@ struct ExpressionActionsChain
NamesAndTypesList getRequiredColumns() const override { return required_columns; }
ColumnsWithTypeAndName getResultColumns() const override { return result_columns; }
void finalize(const Names & required_output_) override;
void finalize(const NameSet & required_output_) override;
void prependProjectInput() const override {} /// TODO: remove unused columns before ARRAY JOIN ?
std::string dump() const override { return "ARRAY JOIN"; }
};
@ -225,7 +233,7 @@ struct ExpressionActionsChain
JoinStep(std::shared_ptr<TableJoin> analyzed_join_, JoinPtr join_, ColumnsWithTypeAndName required_columns_);
NamesAndTypesList getRequiredColumns() const override { return required_columns; }
ColumnsWithTypeAndName getResultColumns() const override { return result_columns; }
void finalize(const Names & required_output_) override;
void finalize(const NameSet & required_output_) override;
void prependProjectInput() const override {} /// TODO: remove unused columns before JOIN ?
std::string dump() const override { return "JOIN"; }
};

View File

@ -0,0 +1,24 @@
#include <Interpreters/ExpressionActionsSettings.h>
#include <Core/Settings.h>
#include <Interpreters/Context.h>
namespace DB
{
ExpressionActionsSettings ExpressionActionsSettings::fromSettings(const Settings & from)
{
ExpressionActionsSettings settings;
settings.compile_expressions = from.compile_expressions;
settings.min_count_to_compile_expression = from.min_count_to_compile_expression;
settings.max_temporary_columns = from.max_temporary_columns;
settings.max_temporary_non_const_columns = from.max_temporary_non_const_columns;
return settings;
}
ExpressionActionsSettings ExpressionActionsSettings::fromContext(const Context & from)
{
return fromSettings(from.getSettingsRef());
}
}

View File

@ -0,0 +1,23 @@
#pragma once
#include <cstddef>
namespace DB
{
struct Settings;
class Context;
struct ExpressionActionsSettings
{
bool compile_expressions = false;
size_t min_count_to_compile_expression = 0;
size_t max_temporary_columns = 0;
size_t max_temporary_non_const_columns = 0;
static ExpressionActionsSettings fromSettings(const Settings & from);
static ExpressionActionsSettings fromContext(const Context & from);
};
}

View File

@ -235,14 +235,10 @@ void ExpressionAnalyzer::analyzeAggregation()
getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false);
const auto & column_name = group_asts[i]->getColumnName();
const auto & index = temp_actions->getIndex();
auto it = index.find(column_name);
if (it == index.end())
const auto * node = temp_actions->tryFindInIndex(column_name);
if (!node)
throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER);
const auto & node = *it;
/// Constant expressions have non-null column pointer at this stage.
if (node->column && isColumnConst(*node->column))
{
@ -392,7 +388,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
auto temp_actions = std::make_shared<ActionsDAG>(columns_after_join);
getRootActions(left_in_operand, true, temp_actions);
if (temp_actions->getIndex().contains(left_in_operand->getColumnName()))
if (temp_actions->tryFindInIndex(left_in_operand->getColumnName()))
makeExplicitSet(func, *temp_actions, true, context,
settings.size_limits_for_set, prepared_sets);
}
@ -438,7 +434,8 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions)
for (const ASTFunction * node : aggregates())
{
AggregateDescription aggregate;
if (node->arguments) getRootActionsNoMakeSet(node->arguments, true, actions);
if (node->arguments)
getRootActionsNoMakeSet(node->arguments, true, actions);
aggregate.column_name = node->getColumnName();
@ -446,20 +443,18 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions)
aggregate.argument_names.resize(arguments.size());
DataTypes types(arguments.size());
const auto & index = actions->getIndex();
for (size_t i = 0; i < arguments.size(); ++i)
{
const std::string & name = arguments[i]->getColumnName();
auto it = index.find(name);
if (it == index.end())
const auto * dag_node = actions->tryFindInIndex(name);
if (!dag_node)
{
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Unknown identifier '{}' in aggregate function '{}'",
name, node->formatForErrorMessage());
}
types[i] = (*it)->result_type;
types[i] = dag_node->result_type;
aggregate.argument_names[i] = name;
}
@ -595,20 +590,19 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
= window_function.function_node->arguments->children;
window_function.argument_types.resize(arguments.size());
window_function.argument_names.resize(arguments.size());
const auto & index = actions->getIndex();
for (size_t i = 0; i < arguments.size(); ++i)
{
const std::string & name = arguments[i]->getColumnName();
const auto * node = actions->tryFindInIndex(name);
auto it = index.find(name);
if (it == index.end())
if (!node)
{
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Unknown identifier '{}' in window function '{}'",
name, window_function.function_node->formatForErrorMessage());
}
window_function.argument_types[i] = (*it)->result_type;
window_function.argument_types[i] = node->result_type;
window_function.argument_names[i] = name;
}
@ -682,7 +676,10 @@ ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAGPtr
{
/// Assign new names to columns, if needed.
if (result_source.first != result_source.second)
actions->addAlias(result_source.second, result_source.first);
{
const auto & node = actions->findInIndex(result_source.second);
actions->getIndex().push_back(&actions->addAlias(node, result_source.first));
}
/// Make ARRAY JOIN (replace arrays with their insides) for the columns in these new names.
result_columns.insert(result_source.first);
@ -761,8 +758,8 @@ static bool allowDictJoin(StoragePtr joined_storage, const Context & context, St
if (!dict)
return false;
dict_name = dict->resolvedDictionaryName();
auto dictionary = context.getExternalDictionariesLoader().getDictionary(dict_name);
dict_name = dict->dictionaryName();
auto dictionary = context.getExternalDictionariesLoader().getDictionary(dict_name, context);
if (!dictionary)
return false;
@ -842,7 +839,9 @@ JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin(
const ColumnsWithTypeAndName & right_sample_columns = subquery_for_join.sample_block.getColumnsWithTypeAndName();
bool need_convert = syntax->analyzed_join->applyJoinKeyConvert(left_sample_columns, right_sample_columns);
if (need_convert)
subquery_for_join.addJoinActions(std::make_shared<ExpressionActions>(syntax->analyzed_join->rightConvertingActions()));
subquery_for_join.addJoinActions(std::make_shared<ExpressionActions>(
syntax->analyzed_join->rightConvertingActions(),
ExpressionActionsSettings::fromContext(context)));
subquery_for_join.join = makeJoin(syntax->analyzed_join, subquery_for_join.sample_block, context);
@ -881,15 +880,10 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
auto & step = chain.lastStep(sourceColumns());
getRootActions(select_query->prewhere(), only_types, step.actions());
String prewhere_column_name = select_query->prewhere()->getColumnName();
step.required_output.push_back(prewhere_column_name);
step.can_remove_required_output.push_back(true);
step.addRequiredOutput(prewhere_column_name);
const auto & index = step.actions()->getIndex();
auto it = index.find(prewhere_column_name);
if (it == index.end())
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier: '{}'", prewhere_column_name);
auto filter_type = (*it)->result_type;
const auto & node = step.actions()->findInIndex(prewhere_column_name);
auto filter_type = node.result_type;
if (!filter_type->canBeUsedInBooleanContext())
throw Exception("Invalid type for filter in PREWHERE: " + filter_type->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
@ -898,8 +892,8 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
/// Remove unused source_columns from prewhere actions.
auto tmp_actions_dag = std::make_shared<ActionsDAG>(sourceColumns());
getRootActions(select_query->prewhere(), only_types, tmp_actions_dag);
tmp_actions_dag->removeUnusedActions({prewhere_column_name});
auto tmp_actions = std::make_shared<ExpressionActions>(tmp_actions_dag);
tmp_actions_dag->removeUnusedActions(NameSet{prewhere_column_name});
auto tmp_actions = std::make_shared<ExpressionActions>(tmp_actions_dag, ExpressionActionsSettings::fromContext(context));
auto required_columns = tmp_actions->getRequiredColumns();
NameSet required_source_columns(required_columns.begin(), required_columns.end());
required_source_columns.insert(first_action_names.begin(), first_action_names.end());
@ -909,10 +903,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
for (const auto & column : additional_required_columns)
{
if (required_source_columns.count(column))
{
step.required_output.push_back(column);
step.can_remove_required_output.push_back(true);
}
step.addRequiredOutput(column);
}
auto names = step.actions()->getNames();
@ -969,8 +960,7 @@ void SelectQueryExpressionAnalyzer::appendPreliminaryFilter(ExpressionActionsCha
// FIXME: assert(filter_info);
auto * expression_step = typeid_cast<ExpressionActionsChain::ExpressionActionsStep *>(&step);
expression_step->actions_dag = std::move(actions_dag);
step.required_output.push_back(std::move(column_name));
step.can_remove_required_output = {true};
step.addRequiredOutput(column_name);
chain.addStep();
}
@ -987,15 +977,10 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
getRootActions(select_query->where(), only_types, step.actions());
auto where_column_name = select_query->where()->getColumnName();
step.required_output.push_back(where_column_name);
step.can_remove_required_output = {true};
step.addRequiredOutput(where_column_name);
const auto & index = step.actions()->getIndex();
auto it = index.find(where_column_name);
if (it == index.end())
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier: '{}'", where_column_name);
auto filter_type = (*it)->result_type;
const auto & node = step.actions()->findInIndex(where_column_name);
auto filter_type = node.result_type;
if (!filter_type->canBeUsedInBooleanContext())
throw Exception("Invalid type for filter in WHERE: " + filter_type->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
@ -1016,7 +1001,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain
ASTs asts = select_query->groupBy()->children;
for (const auto & ast : asts)
{
step.required_output.emplace_back(ast->getColumnName());
step.addRequiredOutput(ast->getColumnName());
getRootActions(ast, only_types, step.actions());
}
@ -1026,7 +1011,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain
{
auto actions_dag = std::make_shared<ActionsDAG>(columns_after_join);
getRootActions(child, only_types, actions_dag);
group_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(actions_dag));
group_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(context)));
}
}
@ -1041,7 +1026,7 @@ void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(Expression
for (const auto & desc : aggregate_descriptions)
for (const auto & name : desc.argument_names)
step.required_output.emplace_back(name);
step.addRequiredOutput(name);
/// Collect aggregates removing duplicates by node.getColumnName()
/// It's not clear why we recollect aggregates (for query parts) while we're able to use previously collected ones (for entire query)
@ -1098,14 +1083,14 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
// (2b) Required function argument columns.
for (const auto & a : f.function_node->arguments->children)
{
step.required_output.push_back(a->getColumnName());
step.addRequiredOutput(a->getColumnName());
}
}
// (2a) Required PARTITION BY and ORDER BY columns.
for (const auto & c : w.full_sort_description)
{
step.required_output.push_back(c.column_name);
step.addRequiredOutput(c.column_name);
}
}
}
@ -1120,7 +1105,7 @@ bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain,
ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns);
getRootActionsForHaving(select_query->having(), only_types, step.actions());
step.required_output.push_back(select_query->having()->getColumnName());
step.addRequiredOutput(select_query->having()->getColumnName());
return true;
}
@ -1144,7 +1129,7 @@ void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain,
continue;
}
step.required_output.push_back(child->getColumnName());
step.addRequiredOutput(child->getColumnName());
}
}
@ -1172,7 +1157,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai
if (!ast || ast->children.empty())
throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
ASTPtr order_expression = ast->children.at(0);
step.required_output.push_back(order_expression->getColumnName());
step.addRequiredOutput(order_expression->getColumnName());
if (ast->with_fill)
with_fill = true;
@ -1184,7 +1169,8 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai
{
auto actions_dag = std::make_shared<ActionsDAG>(columns_after_join);
getRootActions(child, only_types, actions_dag);
order_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(actions_dag));
order_by_elements_actions.emplace_back(
std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(context)));
}
}
@ -1215,7 +1201,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain
NameSet aggregated_names;
for (const auto & column : aggregated_columns)
{
step.required_output.push_back(column.name);
step.addRequiredOutput(column.name);
aggregated_names.insert(column.name);
}
@ -1223,7 +1209,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain
{
auto child_name = child->getColumnName();
if (!aggregated_names.count(child_name))
step.required_output.push_back(std::move(child_name));
step.addRequiredOutput(std::move(child_name));
}
return true;
@ -1271,7 +1257,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActio
}
result_columns.emplace_back(source_name, result_name);
step.required_output.push_back(result_columns.back().second);
step.addRequiredOutput(result_columns.back().second);
}
}
@ -1285,7 +1271,7 @@ void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const
{
ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns());
getRootActions(expr, only_types, step.actions());
step.required_output.push_back(expr->getColumnName());
step.addRequiredOutput(expr->getColumnName());
}
@ -1325,18 +1311,26 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r
if (!(add_aliases && project_result))
{
NameSet name_set(result_names.begin(), result_names.end());
/// We will not delete the original columns.
for (const auto & column_name_type : sourceColumns())
result_names.push_back(column_name_type.name);
{
if (name_set.count(column_name_type.name) == 0)
{
result_names.push_back(column_name_type.name);
name_set.insert(column_name_type.name);
}
}
actions_dag->removeUnusedActions(name_set);
}
actions_dag->removeUnusedActions(result_names);
return actions_dag;
}
ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result)
{
return std::make_shared<ExpressionActions>(getActionsDAG(add_aliases, project_result));
return std::make_shared<ExpressionActions>(getActionsDAG(add_aliases, project_result), ExpressionActionsSettings::fromContext(context));
}
@ -1345,7 +1339,7 @@ ExpressionActionsPtr ExpressionAnalyzer::getConstActions()
auto actions = std::make_shared<ActionsDAG>(NamesAndTypesList());
getRootActions(query, true, actions, true);
return std::make_shared<ExpressionActions>(actions);
return std::make_shared<ExpressionActions>(actions, ExpressionActionsSettings::fromContext(context));
}
ActionsDAGPtr SelectQueryExpressionAnalyzer::simpleSelectActions()
@ -1390,7 +1384,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
if (!finalized)
{
finalize(chain, where_step_num);
finalize(chain, where_step_num, query);
finalized = true;
}
@ -1436,7 +1430,9 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
Block before_prewhere_sample = source_header;
if (sanitizeBlock(before_prewhere_sample))
{
ExpressionActions(prewhere_info->prewhere_actions).execute(before_prewhere_sample);
ExpressionActions(
prewhere_info->prewhere_actions,
ExpressionActionsSettings::fromSettings(context.getSettingsRef())).execute(before_prewhere_sample);
auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName());
/// If the filter column is a constant, record it.
if (column_elem.column)
@ -1469,7 +1465,9 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
before_where_sample = source_header;
if (sanitizeBlock(before_where_sample))
{
ExpressionActions(before_where).execute(before_where_sample);
ExpressionActions(
before_where,
ExpressionActionsSettings::fromSettings(context.getSettingsRef())).execute(before_where_sample);
auto & column_elem = before_where_sample.getByName(query.where()->getColumnName());
/// If the filter column is a constant, record it.
if (column_elem.column)
@ -1511,6 +1509,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
settings.optimize_read_in_order
&& storage && query.orderBy()
&& !query_analyzer.hasAggregation()
&& !query_analyzer.hasWindow()
&& !query.final()
&& join_allow_read_in_order;
@ -1559,11 +1558,14 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
const auto * select_query = query_analyzer.getSelectQuery();
for (const auto & child : select_query->select()->children)
{
step.required_output.push_back(child->getColumnName());
step.addRequiredOutput(child->getColumnName());
}
}
selected_columns = chain.getLastStep().required_output;
selected_columns.clear();
selected_columns.reserve(chain.getLastStep().required_output.size());
for (const auto & it : chain.getLastStep().required_output)
selected_columns.emplace_back(it.first);
has_order_by = query.orderBy() != nullptr;
before_order_by = query_analyzer.appendOrderBy(
@ -1589,21 +1591,22 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
checkActions();
}
void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, size_t where_step_num)
void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, size_t where_step_num, const ASTSelectQuery & query)
{
size_t next_step_i = 0;
if (hasPrewhere())
{
const ExpressionActionsChain::Step & step = *chain.steps.at(next_step_i++);
prewhere_info->remove_prewhere_column = step.can_remove_required_output.at(0);
prewhere_info->prewhere_actions->projectInput(false);
NameSet columns_to_remove;
for (size_t i = 1; i < step.required_output.size(); ++i)
for (const auto & [name, can_remove] : step.required_output)
{
if (step.can_remove_required_output[i])
columns_to_remove.insert(step.required_output[i]);
if (name == prewhere_info->prewhere_column_name)
prewhere_info->remove_prewhere_column = can_remove;
else if (can_remove)
columns_to_remove.insert(name);
}
columns_to_remove_after_prewhere = std::move(columns_to_remove);
@ -1611,8 +1614,8 @@ void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, si
if (hasWhere())
{
const ExpressionActionsChain::Step & step = *chain.steps.at(where_step_num);
remove_where_filter = step.can_remove_required_output.at(0);
auto where_column_name = query.where()->getColumnName();
remove_where_filter = chain.steps.at(where_step_num)->required_output.find(where_column_name)->second;
}
}

View File

@ -247,7 +247,7 @@ struct ExpressionAnalysisResult
void removeExtraColumns() const;
void checkActions() const;
void finalize(const ExpressionActionsChain & chain, size_t where_step_num);
void finalize(const ExpressionActionsChain & chain, size_t where_step_num, const ASTSelectQuery & query);
};
/// SelectQuery specific ExpressionAnalyzer part.

View File

@ -596,8 +596,8 @@ static bool isCompilableFunction(const ActionsDAG::Node & node)
}
static LLVMFunction::CompileDAG getCompilableDAG(
ActionsDAG::Node * root,
std::vector<ActionsDAG::Node *> & children,
const ActionsDAG::Node * root,
ActionsDAG::NodeRawConstPtrs & children,
const std::unordered_set<const ActionsDAG::Node *> & used_in_result)
{
LLVMFunction::CompileDAG dag;
@ -605,7 +605,7 @@ static LLVMFunction::CompileDAG getCompilableDAG(
std::unordered_map<const ActionsDAG::Node *, size_t> positions;
struct Frame
{
ActionsDAG::Node * node;
const ActionsDAG::Node * node;
size_t next_child_to_visit = 0;
};
@ -621,7 +621,7 @@ static LLVMFunction::CompileDAG getCompilableDAG(
while (is_compilable_function && frame.next_child_to_visit < frame.node->children.size())
{
auto * child = frame.node->children[frame.next_child_to_visit];
const auto * child = frame.node->children[frame.next_child_to_visit];
if (positions.count(child))
++frame.next_child_to_visit;
@ -743,8 +743,7 @@ UInt128 LLVMFunction::CompileDAG::hash() const
static FunctionBasePtr compile(
const LLVMFunction::CompileDAG & dag,
size_t min_count_to_compile_expression,
const std::shared_ptr<CompiledExpressionCache> & compilation_cache)
size_t min_count_to_compile_expression)
{
static std::unordered_map<UInt128, UInt32, UInt128Hash> counter;
static std::mutex mutex;
@ -769,7 +768,7 @@ static FunctionBasePtr compile(
}
FunctionBasePtr fn;
if (compilation_cache)
if (auto * compilation_cache = CompiledExpressionCacheFactory::instance().tryGetCache())
{
std::tie(fn, std::ignore) = compilation_cache->getOrSet(hash_key, [&dag] ()
{
@ -790,7 +789,7 @@ static FunctionBasePtr compile(
return fn;
}
void ActionsDAG::compileFunctions()
void ActionsDAG::compileFunctions(size_t min_count_to_compile_expression)
{
struct Data
{
@ -815,7 +814,7 @@ void ActionsDAG::compileFunctions()
struct Frame
{
Node * node;
const Node * node;
size_t next_child_to_visit = 0;
};
@ -834,7 +833,7 @@ void ActionsDAG::compileFunctions()
while (frame.next_child_to_visit < frame.node->children.size())
{
auto * child = frame.node->children[frame.next_child_to_visit];
const auto * child = frame.node->children[frame.next_child_to_visit];
if (visited.count(child))
++frame.next_child_to_visit;
@ -871,10 +870,10 @@ void ActionsDAG::compileFunctions()
if (should_compile)
{
std::vector<Node *> new_children;
NodeRawConstPtrs new_children;
auto dag = getCompilableDAG(frame.node, new_children, used_in_result);
if (auto fn = compile(dag, settings.min_count_to_compile_expression, compilation_cache))
if (auto fn = compile(dag, min_count_to_compile_expression))
{
/// Replace current node to compilable function.
@ -883,12 +882,13 @@ void ActionsDAG::compileFunctions()
for (const auto * child : new_children)
arguments.emplace_back(child->column, child->result_type, child->result_name);
frame.node->type = ActionsDAG::ActionType::FUNCTION;
frame.node->function_base = fn;
frame.node->function = fn->prepare(arguments);
frame.node->children.swap(new_children);
frame.node->is_function_compiled = true;
frame.node->column = nullptr; /// Just in case.
auto * frame_node = const_cast<Node *>(frame.node);
frame_node->type = ActionsDAG::ActionType::FUNCTION;
frame_node->function_base = fn;
frame_node->function = fn->prepare(arguments);
frame_node->children.swap(new_children);
frame_node->is_function_compiled = true;
frame_node->column = nullptr; /// Just in case.
}
}
}
@ -900,6 +900,25 @@ void ActionsDAG::compileFunctions()
}
}
CompiledExpressionCacheFactory & CompiledExpressionCacheFactory::instance()
{
static CompiledExpressionCacheFactory factory;
return factory;
}
void CompiledExpressionCacheFactory::init(size_t cache_size)
{
if (cache)
throw Exception(ErrorCodes::LOGICAL_ERROR, "CompiledExpressionCache was already initialized");
cache = std::make_unique<CompiledExpressionCache>(cache_size);
}
CompiledExpressionCache * CompiledExpressionCacheFactory::tryGetCache()
{
return cache.get();
}
}
#endif

View File

@ -100,6 +100,18 @@ public:
using Base::Base;
};
class CompiledExpressionCacheFactory
{
private:
std::unique_ptr<CompiledExpressionCache> cache;
public:
static CompiledExpressionCacheFactory & instance();
void init(size_t cache_size);
CompiledExpressionCache * tryGetCache();
};
}
#endif

View File

@ -1,6 +1,10 @@
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/Context.h>
#include <Dictionaries/DictionaryFactory.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Databases/IDatabase.h>
#include <Storages/IStorage.h>
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
@ -13,10 +17,15 @@
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
/// Must not acquire Context lock in constructor to avoid possibility of deadlocks.
ExternalDictionariesLoader::ExternalDictionariesLoader(Context & context_)
ExternalDictionariesLoader::ExternalDictionariesLoader(Context & global_context_)
: ExternalLoader("external dictionary", &Poco::Logger::get("ExternalDictionariesLoader"))
, context(context_)
, global_context(global_context_)
{
setConfigSettings({"dictionary", "name", "database", "uuid"});
enableAsyncLoading(true);
@ -31,9 +40,88 @@ ExternalLoader::LoadablePtr ExternalDictionariesLoader::create(
/// For dictionaries from databases (created with DDL queries) we have to perform
/// additional checks, so we identify them here.
bool dictionary_from_database = !repository_name.empty();
return DictionaryFactory::instance().create(name, config, key_in_config, context, dictionary_from_database);
return DictionaryFactory::instance().create(name, config, key_in_config, global_context, dictionary_from_database);
}
ExternalDictionariesLoader::DictPtr ExternalDictionariesLoader::getDictionary(const std::string & dictionary_name, const Context & context) const
{
std::string resolved_dictionary_name = resolveDictionaryName(dictionary_name, context.getCurrentDatabase());
return std::static_pointer_cast<const IDictionaryBase>(load(resolved_dictionary_name));
}
ExternalDictionariesLoader::DictPtr ExternalDictionariesLoader::tryGetDictionary(const std::string & dictionary_name, const Context & context) const
{
std::string resolved_dictionary_name = resolveDictionaryName(dictionary_name, context.getCurrentDatabase());
return std::static_pointer_cast<const IDictionaryBase>(tryLoad(resolved_dictionary_name));
}
void ExternalDictionariesLoader::reloadDictionary(const std::string & dictionary_name, const Context & context) const
{
std::string resolved_dictionary_name = resolveDictionaryName(dictionary_name, context.getCurrentDatabase());
loadOrReload(resolved_dictionary_name);
}
DictionaryStructure ExternalDictionariesLoader::getDictionaryStructure(const std::string & dictionary_name, const Context & query_context) const
{
std::string resolved_name = resolveDictionaryName(dictionary_name, query_context.getCurrentDatabase());
auto load_result = getLoadResult(resolved_name);
if (!load_result.config)
throw Exception("Dictionary " + backQuote(dictionary_name) + " config not found", ErrorCodes::BAD_ARGUMENTS);
return ExternalDictionariesLoader::getDictionaryStructure(*load_result.config);
}
std::string ExternalDictionariesLoader::resolveDictionaryName(const std::string & dictionary_name, const std::string & current_database_name) const
{
std::string resolved_name = resolveDictionaryNameFromDatabaseCatalog(dictionary_name);
bool has_dictionary = has(resolved_name);
if (!has_dictionary)
{
/// If dictionary not found. And database was not implicitly specified
/// we can qualify dictionary name with current database name.
/// It will help if dictionary is created with DDL and is in current database.
if (dictionary_name.find('.') == std::string::npos)
{
String dictionary_name_with_database = current_database_name + '.' + dictionary_name;
resolved_name = resolveDictionaryNameFromDatabaseCatalog(dictionary_name_with_database);
has_dictionary = has(resolved_name);
}
}
if (!has_dictionary)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dictionary ({}) not found", backQuote(dictionary_name));
return resolved_name;
}
std::string ExternalDictionariesLoader::resolveDictionaryNameFromDatabaseCatalog(const std::string & name) const
{
/// If it's dictionary from Atomic database, then we need to convert qualified name to UUID.
/// Try to split name and get id from associated StorageDictionary.
/// If something went wrong, return name as is.
auto pos = name.find('.');
if (pos == std::string::npos || name.find('.', pos + 1) != std::string::npos)
return name;
std::string maybe_database_name = name.substr(0, pos);
std::string maybe_table_name = name.substr(pos + 1);
auto [db, table] = DatabaseCatalog::instance().tryGetDatabaseAndTable({maybe_database_name, maybe_table_name}, global_context);
if (!db)
return name;
assert(table);
if (db->getUUID() == UUIDHelpers::Nil)
return name;
if (table->getName() != "Dictionary")
return name;
return toString(table->getStorageID().uuid);
}
DictionaryStructure
ExternalDictionariesLoader::getDictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & key_in_config)

View File

@ -1,9 +1,11 @@
#pragma once
#include <Dictionaries/IDictionary.h>
#include <Interpreters/ExternalLoader.h>
#include <memory>
#include <Common/quoteString.h>
#include <Interpreters/ExternalLoader.h>
#include <Dictionaries/IDictionary.h>
namespace DB
{
class Context;
@ -16,24 +18,18 @@ public:
using DictPtr = std::shared_ptr<const IDictionaryBase>;
/// Dictionaries will be loaded immediately and then will be updated in separate thread, each 'reload_period' seconds.
explicit ExternalDictionariesLoader(Context & context_);
explicit ExternalDictionariesLoader(Context & global_context_);
DictPtr getDictionary(const std::string & name) const
{
return std::static_pointer_cast<const IDictionaryBase>(load(name));
}
DictPtr getDictionary(const std::string & dictionary_name, const Context & context) const;
DictPtr tryGetDictionary(const std::string & name) const
{
return std::static_pointer_cast<const IDictionaryBase>(tryLoad(name));
}
DictPtr tryGetDictionary(const std::string & dictionary_name, const Context & context) const;
bool hasDictionary(const std::string & name) const
{
return has(name);
}
void reloadDictionary(const std::string & dictionary_name, const Context & context) const;
DictionaryStructure getDictionaryStructure(const std::string & dictionary_name, const Context & context) const;
static DictionaryStructure getDictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & key_in_config = "dictionary");
static DictionaryStructure getDictionaryStructure(const ObjectConfig & config);
static void resetAll();
@ -42,11 +38,16 @@ protected:
LoadablePtr create(const std::string & name, const Poco::Util::AbstractConfiguration & config,
const std::string & key_in_config, const std::string & repository_name) const override;
std::string resolveDictionaryName(const std::string & dictionary_name, const std::string & current_database_name) const;
/// Try convert qualified dictionary name to persistent UUID
std::string resolveDictionaryNameFromDatabaseCatalog(const std::string & name) const;
friend class StorageSystemDictionaries;
friend class DatabaseDictionary;
private:
Context & context;
Context & global_context;
};
}

View File

@ -14,6 +14,8 @@
#include <Storages/StorageView.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
#include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
#include <Processors/printPipeline.h>
namespace DB
@ -251,7 +253,7 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl()
interpreter.buildQueryPlan(plan);
if (settings.optimize)
plan.optimize(QueryPlanOptimizationSettings(context.getSettingsRef()));
plan.optimize(QueryPlanOptimizationSettings::fromContext(context));
plan.explainPlan(buf, settings.query_plan_options);
}
@ -265,7 +267,9 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl()
InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), context, SelectQueryOptions());
interpreter.buildQueryPlan(plan);
auto pipeline = plan.buildQueryPipeline(QueryPlanOptimizationSettings(context.getSettingsRef()));
auto pipeline = plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(context),
BuildQueryPipelineSettings::fromContext(context));
if (settings.graph)
{

View File

@ -250,7 +250,7 @@ BlockIO InterpreterInsertQuery::execute()
}
}
res.pipeline = QueryPipeline::unitePipelines(std::move(pipelines), {});
res.pipeline = QueryPipeline::unitePipelines(std::move(pipelines), {}, ExpressionActionsSettings::fromContext(context));
}
}
@ -378,7 +378,7 @@ BlockIO InterpreterInsertQuery::execute()
res.pipeline.getHeader().getColumnsWithTypeAndName(),
header.getColumnsWithTypeAndName(),
ActionsDAG::MatchColumnsMode::Position);
auto actions = std::make_shared<ExpressionActions>(actions_dag);
auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(context));
res.pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
{

View File

@ -62,6 +62,7 @@
#include <Processors/QueryPlan/SettingQuotaAndLimitsStep.h>
#include <Processors/QueryPlan/TotalsHavingStep.h>
#include <Processors/QueryPlan/WindowStep.h>
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
#include <Processors/Sources/NullSource.h>
#include <Processors/Sources/SourceFromInputStream.h>
#include <Processors/Transforms/AggregatingTransform.h>
@ -142,12 +143,11 @@ String InterpreterSelectQuery::generateFilterActions(ActionsDAGPtr & actions, co
actions = analyzer.simpleSelectActions();
auto column_name = expr_list->children.at(0)->getColumnName();
actions->removeUnusedActions({column_name});
actions->removeUnusedActions(NameSet{column_name});
actions->projectInput(false);
ActionsDAG::Index index;
for (const auto * node : actions->getInputs())
actions->addNodeToIndex(node);
actions->getIndex().push_back(node);
return column_name;
}
@ -561,7 +561,9 @@ BlockIO InterpreterSelectQuery::execute()
buildQueryPlan(query_plan);
res.pipeline = std::move(*query_plan.buildQueryPipeline(QueryPlanOptimizationSettings(context->getSettingsRef())));
res.pipeline = std::move(*query_plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(*context),
BuildQueryPipelineSettings::fromContext(*context)));
return res;
}
@ -606,7 +608,9 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
if (analysis_result.prewhere_info)
{
ExpressionActions(analysis_result.prewhere_info->prewhere_actions).execute(header);
ExpressionActions(
analysis_result.prewhere_info->prewhere_actions,
ExpressionActionsSettings::fromContext(*context)).execute(header);
if (analysis_result.prewhere_info->remove_prewhere_column)
header.erase(analysis_result.prewhere_info->prewhere_column_name);
}
@ -1668,19 +1672,19 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
query_info.syntax_analyzer_result = syntax_analyzer_result;
query_info.sets = query_analyzer->getPreparedSets();
auto actions_settings = ExpressionActionsSettings::fromContext(*context);
if (prewhere_info)
{
query_info.prewhere_info = std::make_shared<PrewhereInfo>();
query_info.prewhere_info->prewhere_actions = std::make_shared<ExpressionActions>(prewhere_info->prewhere_actions);
query_info.prewhere_info->prewhere_actions = std::make_shared<ExpressionActions>(prewhere_info->prewhere_actions, actions_settings);
if (prewhere_info->row_level_filter_actions)
query_info.prewhere_info->row_level_filter = std::make_shared<ExpressionActions>(prewhere_info->row_level_filter_actions);
query_info.prewhere_info->row_level_filter = std::make_shared<ExpressionActions>(prewhere_info->row_level_filter_actions, actions_settings);
if (prewhere_info->alias_actions)
query_info.prewhere_info->alias_actions = std::make_shared<ExpressionActions>(prewhere_info->alias_actions);
query_info.prewhere_info->alias_actions = std::make_shared<ExpressionActions>(prewhere_info->alias_actions, actions_settings);
if (prewhere_info->remove_columns_actions)
query_info.prewhere_info->remove_columns_actions = std::make_shared<ExpressionActions>(prewhere_info->remove_columns_actions);
query_info.prewhere_info->remove_columns_actions = std::make_shared<ExpressionActions>(prewhere_info->remove_columns_actions, actions_settings);
query_info.prewhere_info->prewhere_column_name = prewhere_info->prewhere_column_name;
query_info.prewhere_info->remove_prewhere_column = prewhere_info->remove_prewhere_column;

View File

@ -11,6 +11,7 @@
#include <Processors/QueryPlan/UnionStep.h>
#include <Processors/QueryPlan/LimitStep.h>
#include <Processors/QueryPlan/OffsetStep.h>
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
#include <Common/typeid_cast.h>
#include <Interpreters/InDepthNodeVisitor.h>
@ -296,7 +297,9 @@ BlockIO InterpreterSelectWithUnionQuery::execute()
QueryPlan query_plan;
buildQueryPlan(query_plan);
auto pipeline = query_plan.buildQueryPipeline(QueryPlanOptimizationSettings(context->getSettingsRef()));
auto pipeline = query_plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(*context),
BuildQueryPipelineSettings::fromContext(*context));
res.pipeline = std::move(*pipeline);
res.pipeline.addInterpreterContext(context);

View File

@ -24,6 +24,7 @@
#include <Interpreters/MetricLog.h>
#include <Interpreters/AsynchronousMetricLog.h>
#include <Interpreters/OpenTelemetrySpanLog.h>
#include <Interpreters/ExpressionJIT.h>
#include <Access/ContextAccess.h>
#include <Access/AllowedClientHosts.h>
#include <Databases/IDatabase.h>
@ -270,14 +271,17 @@ BlockIO InterpreterSystemQuery::execute()
#if USE_EMBEDDED_COMPILER
case Type::DROP_COMPILED_EXPRESSION_CACHE:
context.checkAccess(AccessType::SYSTEM_DROP_COMPILED_EXPRESSION_CACHE);
system_context.dropCompiledExpressionCache();
if (auto * cache = CompiledExpressionCacheFactory::instance().tryGetCache())
cache->reset();
break;
#endif
case Type::RELOAD_DICTIONARY:
{
context.checkAccess(AccessType::SYSTEM_RELOAD_DICTIONARY);
system_context.getExternalDictionariesLoader().loadOrReload(
DatabaseCatalog::instance().resolveDictionaryName(query.target_dictionary));
auto & external_dictionaries_loader = system_context.getExternalDictionariesLoader();
external_dictionaries_loader.reloadDictionary(query.target_dictionary, context);
ExternalDictionariesLoader::resetAll();
break;
}

View File

@ -673,16 +673,24 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> &
for (const auto & kv : stage.column_to_updated)
stage.analyzer->appendExpression(actions_chain, kv.second, dry_run);
auto & actions = actions_chain.getLastStep().actions();
for (const auto & kv : stage.column_to_updated)
{
actions_chain.getLastStep().actions()->addAlias(
kv.second->getColumnName(), kv.first, /* can_replace = */ true);
auto column_name = kv.second->getColumnName();
const auto & dag_node = actions->findInIndex(column_name);
const auto & alias = actions->addAlias(dag_node, kv.first);
actions->addOrReplaceInIndex(alias);
}
}
/// Remove all intermediate columns.
actions_chain.addStep();
actions_chain.getLastStep().required_output.assign(stage.output_columns.begin(), stage.output_columns.end());
actions_chain.getLastStep().required_output.clear();
ActionsDAG::NodeRawConstPtrs new_index;
for (const auto & name : stage.output_columns)
actions_chain.getLastStep().addRequiredOutput(name);
actions_chain.getLastActions();
actions_chain.finalize();
@ -755,7 +763,10 @@ QueryPipelinePtr MutationsInterpreter::addStreamsForLaterStages(const std::vecto
}
}
auto pipeline = plan.buildQueryPipeline(QueryPlanOptimizationSettings(context.getSettingsRef()));
auto pipeline = plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(context),
BuildQueryPipelineSettings::fromContext(context));
pipeline->addSimpleTransform([&](const Block & header)
{
return std::make_shared<MaterializingTransform>(header);

View File

@ -89,7 +89,7 @@ struct StorageID
const String & config_prefix);
/// If dictionary has UUID, then use it as dictionary name in ExternalLoader to allow dictionary renaming.
/// DatabaseCatalog::resolveDictionaryName(...) should be used to access such dictionaries by name.
/// ExternalDictnariesLoader::resolveDictionaryName(...) should be used to access such dictionaries by name.
String getInternalDictionaryName() const;
private:

View File

@ -51,7 +51,7 @@ void SubqueryForSet::addJoinActions(ExpressionActionsPtr actions)
auto new_dag = ActionsDAG::merge(
std::move(*joined_block_actions->getActionsDAG().clone()),
std::move(*actions->getActionsDAG().clone()));
joined_block_actions = std::make_shared<ExpressionActions>(new_dag);
joined_block_actions = std::make_shared<ExpressionActions>(new_dag, actions->getSettings());
}
}

View File

@ -135,8 +135,7 @@ void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_colum
const auto & dict_name = dict_name_ast->value.safeGet<String>();
const auto & attr_name = attr_name_ast->value.safeGet<String>();
String resolved_name = DatabaseCatalog::instance().resolveDictionaryName(dict_name);
const auto & dict_ptr = context.getExternalDictionariesLoader().getDictionary(resolved_name);
const auto & dict_ptr = context.getExternalDictionariesLoader().getDictionary(dict_name, context);
if (!dict_ptr->isInjective(attr_name))
{
++i;

View File

@ -21,9 +21,12 @@ ActionsDAGPtr addMissingDefaults(
const ColumnsDescription & columns,
const Context & context)
{
auto actions = std::make_shared<ActionsDAG>(header.getColumnsWithTypeAndName());
auto & index = actions->getIndex();
/// For missing columns of nested structure, you need to create not a column of empty arrays, but a column of arrays of correct lengths.
/// First, remember the offset columns for all arrays in the block.
std::map<String, Names> nested_groups;
std::map<String, ActionsDAG::NodeRawConstPtrs> nested_groups;
for (size_t i = 0, size = header.columns(); i < size; ++i)
{
@ -35,14 +38,12 @@ ActionsDAGPtr addMissingDefaults(
auto & group = nested_groups[offsets_name];
if (group.empty())
group.push_back({});
group.push_back(nullptr);
group.push_back(elem.name);
group.push_back(actions->getInputs()[i]);
}
}
auto actions = std::make_shared<ActionsDAG>(header.getColumnsWithTypeAndName());
FunctionOverloadResolverPtr func_builder_replicate = FunctionFactory::instance().get("replicate", context);
/// We take given columns from input block and missed columns without default value
@ -61,11 +62,11 @@ ActionsDAGPtr addMissingDefaults(
DataTypePtr nested_type = typeid_cast<const DataTypeArray &>(*column.type).getNestedType();
ColumnPtr nested_column = nested_type->createColumnConstWithDefaultValue(0);
const auto & constant = actions->addColumn({std::move(nested_column), nested_type, column.name}, true);
const auto & constant = actions->addColumn({std::move(nested_column), nested_type, column.name});
auto & group = nested_groups[offsets_name];
group[0] = constant.result_name;
actions->addFunction(func_builder_replicate, group, constant.result_name, context, true);
group[0] = &constant;
index.push_back(&actions->addFunction(func_builder_replicate, group, constant.result_name));
continue;
}
@ -74,7 +75,8 @@ ActionsDAGPtr addMissingDefaults(
* it can be full (or the interpreter may decide that it is constant everywhere).
*/
auto new_column = column.type->createColumnConstWithDefaultValue(0);
actions->addColumn({std::move(new_column), column.type, column.name}, true, true);
const auto * col = &actions->addColumn({std::move(new_column), column.type, column.name});
index.push_back(&actions->materializeNode(*col));
}
/// Computes explicitly specified values by default and materialized columns.

View File

@ -122,7 +122,7 @@ void performRequiredConversions(Block & block, const NamesAndTypesList & require
if (auto dag = createExpressions(block, conversion_expr_list, true, required_columns, context))
{
auto expression = std::make_shared<ExpressionActions>(std::move(dag));
auto expression = std::make_shared<ExpressionActions>(std::move(dag), ExpressionActionsSettings::fromContext(context));
expression->execute(block);
}
}

View File

@ -50,6 +50,7 @@ SRCS(
EmbeddedDictionaries.cpp
ExecuteScalarSubqueriesVisitor.cpp
ExpressionActions.cpp
ExpressionActionsSettings.cpp
ExpressionAnalyzer.cpp
ExternalDictionariesLoader.cpp
ExternalLoader.cpp

View File

@ -212,6 +212,7 @@ void QueryPipeline::setOutputFormat(ProcessorPtr output)
QueryPipeline QueryPipeline::unitePipelines(
std::vector<std::unique_ptr<QueryPipeline>> pipelines,
const Block & common_header,
const ExpressionActionsSettings & settings,
size_t max_threads_limit,
Processors * collected_processors)
{
@ -234,7 +235,7 @@ QueryPipeline QueryPipeline::unitePipelines(
pipeline.getHeader().getColumnsWithTypeAndName(),
common_header.getColumnsWithTypeAndName(),
ActionsDAG::MatchColumnsMode::Position);
auto actions = std::make_shared<ExpressionActions>(actions_dag);
auto actions = std::make_shared<ExpressionActions>(actions_dag, settings);
pipeline.addSimpleTransform([&](const Block & header)
{

View File

@ -28,6 +28,8 @@ using SubqueriesForSets = std::unordered_map<String, SubqueryForSet>;
struct SizeLimits;
struct ExpressionActionsSettings;
class QueryPipeline
{
public:
@ -89,6 +91,7 @@ public:
static QueryPipeline unitePipelines(
std::vector<std::unique_ptr<QueryPipeline>> pipelines,
const Block & common_header,
const ExpressionActionsSettings & settings,
size_t max_threads_limit = 0,
Processors * collected_processors = nullptr);

View File

@ -28,7 +28,7 @@ AddingDelayedSourceStep::AddingDelayedSourceStep(
{
}
void AddingDelayedSourceStep::transformPipeline(QueryPipeline & pipeline)
void AddingDelayedSourceStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &)
{
source->setQueryPlanStep(this);
pipeline.addDelayedStream(source);

View File

@ -19,7 +19,7 @@ public:
String getName() const override { return "AddingDelayedSource"; }
void transformPipeline(QueryPipeline & pipeline) override;
void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override;
private:
ProcessorPtr source;

View File

@ -46,7 +46,7 @@ AggregatingStep::AggregatingStep(
{
}
void AggregatingStep::transformPipeline(QueryPipeline & pipeline)
void AggregatingStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &)
{
QueryPipelineProcessorsCollector collector(pipeline, this);

View File

@ -27,7 +27,7 @@ public:
String getName() const override { return "Aggregating"; }
void transformPipeline(QueryPipeline & pipeline) override;
void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override;
void describeActions(FormatSettings &) const override;
void describePipeline(FormatSettings & settings) const override;

View File

@ -46,7 +46,7 @@ void ArrayJoinStep::updateInputStream(DataStream input_stream, Block result_head
res_header = std::move(result_header);
}
void ArrayJoinStep::transformPipeline(QueryPipeline & pipeline)
void ArrayJoinStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings & settings)
{
pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type)
{
@ -60,7 +60,7 @@ void ArrayJoinStep::transformPipeline(QueryPipeline & pipeline)
pipeline.getHeader().getColumnsWithTypeAndName(),
res_header.getColumnsWithTypeAndName(),
ActionsDAG::MatchColumnsMode::Name);
auto actions = std::make_shared<ExpressionActions>(actions_dag);
auto actions = std::make_shared<ExpressionActions>(actions_dag, settings.getActionsSettings());
pipeline.addSimpleTransform([&](const Block & header)
{

View File

@ -13,7 +13,7 @@ public:
explicit ArrayJoinStep(const DataStream & input_stream_, ArrayJoinActionPtr array_join_);
String getName() const override { return "ArrayJoin"; }
void transformPipeline(QueryPipeline & pipeline) override;
void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings & settings) override;
void describeActions(FormatSettings & settings) const override;

View File

@ -0,0 +1,21 @@
#include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
#include <Core/Settings.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/Context.h>
namespace DB
{
BuildQueryPipelineSettings BuildQueryPipelineSettings::fromSettings(const Settings & from)
{
BuildQueryPipelineSettings settings;
settings.actions_settings = ExpressionActionsSettings::fromSettings(from);
return settings;
}
BuildQueryPipelineSettings BuildQueryPipelineSettings::fromContext(const Context & from)
{
return fromSettings(from.getSettingsRef());
}
}

View File

@ -0,0 +1,21 @@
#pragma once
#include <cstddef>
#include <Interpreters/ExpressionActionsSettings.h>
namespace DB
{
struct Settings;
class Context;
struct BuildQueryPipelineSettings
{
ExpressionActionsSettings actions_settings;
const ExpressionActionsSettings & getActionsSettings() const { return actions_settings; }
static BuildQueryPipelineSettings fromSettings(const Settings & from);
static BuildQueryPipelineSettings fromContext(const Context & from);
};
}

View File

@ -2,6 +2,7 @@
#include <Processors/QueryPipeline.h>
#include <Processors/Transforms/CreatingSetsTransform.h>
#include <IO/Operators.h>
#include <Interpreters/ExpressionActions.h>
namespace DB
{
@ -42,7 +43,7 @@ CreatingSetStep::CreatingSetStep(
{
}
void CreatingSetStep::transformPipeline(QueryPipeline & pipeline)
void CreatingSetStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &)
{
pipeline.addCreatingSetsTransform(getOutputStream().header, std::move(subquery_for_set), network_transfer_limits, context);
}
@ -72,7 +73,7 @@ CreatingSetsStep::CreatingSetsStep(DataStreams input_streams_)
assertBlocksHaveEqualStructure(output_stream->header, input_streams[i].header, "CreatingSets");
}
QueryPipelinePtr CreatingSetsStep::updatePipeline(QueryPipelines pipelines)
QueryPipelinePtr CreatingSetsStep::updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings & settings)
{
if (pipelines.empty())
throw Exception("CreatingSetsStep cannot be created with no inputs", ErrorCodes::LOGICAL_ERROR);
@ -88,7 +89,7 @@ QueryPipelinePtr CreatingSetsStep::updatePipeline(QueryPipelines pipelines)
if (pipelines.size() > 1)
{
QueryPipelineProcessorsCollector collector(delayed_pipeline, this);
delayed_pipeline = QueryPipeline::unitePipelines(std::move(pipelines), output_stream->header);
delayed_pipeline = QueryPipeline::unitePipelines(std::move(pipelines), output_stream->header, settings.getActionsSettings());
processors = collector.detachProcessors();
}
else

Some files were not shown because too many files have changed in this diff Show More