mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-25 03:00:49 +00:00
Merge branch 'master' into fix-bad-cast
This commit is contained in:
commit
cecf03f4cb
@ -13,3 +13,6 @@ ClickHouse® is an open-source column-oriented database management system that a
|
||||
* [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation.
|
||||
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
|
||||
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
|
||||
|
||||
## Upcoming Events
|
||||
* [ClickHouse Meetup by ByteDance (online)](https://www.meetup.com/ByteDanceDev-group/events/279543467/) on 23 July 2021.
|
||||
|
@ -194,6 +194,10 @@ continue
|
||||
jobs
|
||||
pstree -aspgT
|
||||
|
||||
server_exit_code=0
|
||||
wait $server_pid || server_exit_code=$?
|
||||
echo "Server exit code is $server_exit_code"
|
||||
|
||||
# Make files with status and description we'll show for this check on Github.
|
||||
task_exit_code=$fuzzer_exit_code
|
||||
if [ "$server_died" == 1 ]
|
||||
|
@ -1196,7 +1196,7 @@ create table changes engine File(TSV, 'metrics/changes.tsv') as
|
||||
if(left > right, left / right, right / left) times_diff
|
||||
from metrics
|
||||
group by metric
|
||||
having abs(diff) > 0.05 and isFinite(diff)
|
||||
having abs(diff) > 0.05 and isFinite(diff) and isFinite(times_diff)
|
||||
)
|
||||
order by diff desc
|
||||
;
|
||||
|
@ -30,21 +30,25 @@ Other common parameters are inherited from clickhouse-server config (`listen_hos
|
||||
|
||||
Internal coordination settings are located in `<keeper_server>.<coordination_settings>` section:
|
||||
|
||||
- `operation_timeout_ms` — timeout for a single client operation
|
||||
- `session_timeout_ms` — timeout for client session
|
||||
- `dead_session_check_period_ms` — how often clickhouse-keeper check dead sessions and remove them
|
||||
- `heart_beat_interval_ms` — how often a clickhouse-keeper leader will send heartbeats to followers
|
||||
- `election_timeout_lower_bound_ms` — if follower didn't receive heartbeats from the leader in this interval, then it can initiate leader election
|
||||
- `election_timeout_upper_bound_ms` — if follower didn't receive heartbeats from the leader in this interval, then it must initiate leader election
|
||||
- `rotate_log_storage_interval` — how many logs to store in a single file
|
||||
- `reserved_log_items` — how many coordination logs to store before compaction
|
||||
- `snapshot_distance` — how often clickhouse-keeper will create new snapshots (in the number of logs)
|
||||
- `snapshots_to_keep` — how many snapshots to keep
|
||||
- `stale_log_gap` — the threshold when leader consider follower as stale and send snapshot to it instead of logs
|
||||
- `force_sync` — call `fsync` on each write to coordination log
|
||||
- `raft_logs_level` — text logging level about coordination (trace, debug, and so on)
|
||||
- `shutdown_timeout` — wait to finish internal connections and shutdown
|
||||
- `startup_timeout` — if the server doesn't connect to other quorum participants in the specified timeout it will terminate
|
||||
- `operation_timeout_ms` — timeout for a single client operation (default: 10000)
|
||||
- `session_timeout_ms` — timeout for client session (default: 30000)
|
||||
- `dead_session_check_period_ms` — how often clickhouse-keeper check dead sessions and remove them (default: 500)
|
||||
- `heart_beat_interval_ms` — how often a clickhouse-keeper leader will send heartbeats to followers (default: 500)
|
||||
- `election_timeout_lower_bound_ms` — if follower didn't receive heartbeats from the leader in this interval, then it can initiate leader election (default: 1000)
|
||||
- `election_timeout_upper_bound_ms` — if follower didn't receive heartbeats from the leader in this interval, then it must initiate leader election (default: 2000)
|
||||
- `rotate_log_storage_interval` — how many log records to store in a single file (default: 100000)
|
||||
- `reserved_log_items` — how many coordination log records to store before compaction (default: 100000)
|
||||
- `snapshot_distance` — how often clickhouse-keeper will create new snapshots (in the number of records in logs) (default: 100000)
|
||||
- `snapshots_to_keep` — how many snapshots to keep (default: 3)
|
||||
- `stale_log_gap` — the threshold when leader consider follower as stale and send snapshot to it instead of logs (default: 10000)
|
||||
- `fresh_log_gap` - when node became fresh (default: 200)
|
||||
- `max_requests_batch_size` - max size of batch in requests count before it will be sent to RAFT (default: 100)
|
||||
- `force_sync` — call `fsync` on each write to coordination log (default: true)
|
||||
- `quorum_reads` - execute read requests as writes through whole RAFT consesus with similar speed (default: false)
|
||||
- `raft_logs_level` — text logging level about coordination (trace, debug, and so on) (default: system default)
|
||||
- `auto_forwarding` - allow to forward write requests from followers to leader (default: true)
|
||||
- `shutdown_timeout` — wait to finish internal connections and shutdown (ms) (default: 5000)
|
||||
- `startup_timeout` — if the server doesn't connect to other quorum participants in the specified timeout it will terminate (ms) (default: 30000)
|
||||
|
||||
Quorum configuration is located in `<keeper_server>.<raft_configuration>` section and contain servers description. The only parameter for the whole quorum is `secure`, which enables encrypted connection for communication between quorum participants. The main parameters for each `<server>` are:
|
||||
|
||||
|
@ -34,6 +34,7 @@ Configuration template:
|
||||
<min_part_size>...</min_part_size>
|
||||
<min_part_size_ratio>...</min_part_size_ratio>
|
||||
<method>...</method>
|
||||
<level>...</level>
|
||||
</case>
|
||||
...
|
||||
</compression>
|
||||
@ -43,7 +44,8 @@ Configuration template:
|
||||
|
||||
- `min_part_size` – The minimum size of a data part.
|
||||
- `min_part_size_ratio` – The ratio of the data part size to the table size.
|
||||
- `method` – Compression method. Acceptable values: `lz4` or `zstd`.
|
||||
- `method` – Compression method. Acceptable values: `lz4`, `lz4hc`, `zstd`.
|
||||
- `level` – Compression level. See [Codecs](../../sql-reference/statements/create/table/#create-query-general-purpose-codecs).
|
||||
|
||||
You can configure multiple `<case>` sections.
|
||||
|
||||
@ -62,6 +64,7 @@ If no conditions met for a data part, ClickHouse uses the `lz4` compression.
|
||||
<min_part_size>10000000000</min_part_size>
|
||||
<min_part_size_ratio>0.01</min_part_size_ratio>
|
||||
<method>zstd</method>
|
||||
<level>1</level>
|
||||
</case>
|
||||
</compression>
|
||||
```
|
||||
@ -713,7 +716,7 @@ Keys for server/client settings:
|
||||
- extendedVerification – Automatically extended verification of certificates after the session ends. Acceptable values: `true`, `false`.
|
||||
- requireTLSv1 – Require a TLSv1 connection. Acceptable values: `true`, `false`.
|
||||
- requireTLSv1_1 – Require a TLSv1.1 connection. Acceptable values: `true`, `false`.
|
||||
- requireTLSv1 – Require a TLSv1.2 connection. Acceptable values: `true`, `false`.
|
||||
- requireTLSv1_2 – Require a TLSv1.2 connection. Acceptable values: `true`, `false`.
|
||||
- fips – Activates OpenSSL FIPS mode. Supported if the library’s OpenSSL version supports FIPS.
|
||||
- privateKeyPassphraseHandler – Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: `<privateKeyPassphraseHandler>`, `<name>KeyFileHandler</name>`, `<options><password>test</password></options>`, `</privateKeyPassphraseHandler>`.
|
||||
- invalidCertificateHandler – Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: `<invalidCertificateHandler> <name>ConsoleCertificateHandler</name> </invalidCertificateHandler>` .
|
||||
|
@ -2024,13 +2024,13 @@ Default value: 16.
|
||||
|
||||
## merge_selecting_sleep_ms {#merge_selecting_sleep_ms}
|
||||
|
||||
Sleep time for merge selecting when no part selected, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters
|
||||
Sleep time for merge selecting when no part is selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to Zookeeper in large-scale clusters.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Any positive integer.
|
||||
|
||||
Default value: 5000
|
||||
Default value: `5000`.
|
||||
|
||||
## parallel_distributed_insert_select {#parallel_distributed_insert_select}
|
||||
|
||||
|
@ -598,7 +598,7 @@ SOURCE(CLICKHOUSE(
|
||||
table 'ids'
|
||||
where 'id=10'
|
||||
secure 1
|
||||
))
|
||||
));
|
||||
```
|
||||
|
||||
Setting fields:
|
||||
|
@ -87,7 +87,7 @@ SELECT
|
||||
dictGetOrDefault('ext-dict-test', 'c1', number + 1, toUInt32(number * 10)) AS val,
|
||||
toTypeName(val) AS type
|
||||
FROM system.numbers
|
||||
LIMIT 3
|
||||
LIMIT 3;
|
||||
```
|
||||
|
||||
``` text
|
||||
|
@ -464,7 +464,7 @@ SSLのサポートは以下によって提供されます `libpoco` 図書館
|
||||
- extendedVerification – Automatically extended verification of certificates after the session ends. Acceptable values: `true`, `false`.
|
||||
- requireTLSv1 – Require a TLSv1 connection. Acceptable values: `true`, `false`.
|
||||
- requireTLSv1_1 – Require a TLSv1.1 connection. Acceptable values: `true`, `false`.
|
||||
- requireTLSv1 – Require a TLSv1.2 connection. Acceptable values: `true`, `false`.
|
||||
- requireTLSv1_2 – Require a TLSv1.2 connection. Acceptable values: `true`, `false`.
|
||||
- fips – Activates OpenSSL FIPS mode. Supported if the library's OpenSSL version supports FIPS.
|
||||
- privateKeyPassphraseHandler – Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: `<privateKeyPassphraseHandler>`, `<name>KeyFileHandler</name>`, `<options><password>test</password></options>`, `</privateKeyPassphraseHandler>`.
|
||||
- invalidCertificateHandler – Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: `<invalidCertificateHandler> <name>ConsoleCertificateHandler</name> </invalidCertificateHandler>` .
|
||||
|
@ -34,6 +34,7 @@ ClickHouse перезагружает встроенные словари с з
|
||||
<min_part_size>...</min_part_size>
|
||||
<min_part_size_ratio>...</min_part_size_ratio>
|
||||
<method>...</method>
|
||||
<level>...</level>
|
||||
</case>
|
||||
...
|
||||
</compression>
|
||||
@ -43,7 +44,8 @@ ClickHouse перезагружает встроенные словари с з
|
||||
|
||||
- `min_part_size` - Минимальный размер части таблицы.
|
||||
- `min_part_size_ratio` - Отношение размера минимальной части таблицы к полному размеру таблицы.
|
||||
- `method` - Метод сжатия. Возможные значения: `lz4`, `zstd`.
|
||||
- `method` - Метод сжатия. Возможные значения: `lz4`, `lz4hc`, `zstd`.
|
||||
- `level` – Уровень сжатия. См. [Кодеки](../../sql-reference/statements/create/table/#create-query-common-purpose-codecs).
|
||||
|
||||
Можно сконфигурировать несколько разделов `<case>`.
|
||||
|
||||
@ -62,6 +64,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
|
||||
<min_part_size>10000000000</min_part_size>
|
||||
<min_part_size_ratio>0.01</min_part_size_ratio>
|
||||
<method>zstd</method>
|
||||
<level>1</level>
|
||||
</case>
|
||||
</compression>
|
||||
```
|
||||
|
@ -1838,7 +1838,7 @@ ClickHouse генерирует исключение
|
||||
|
||||
Тип: unsigned int
|
||||
|
||||
озможные значения: 32 (32 байта) - 1073741824 (1 GiB)
|
||||
Возможные значения: 32 (32 байта) - 1073741824 (1 GiB)
|
||||
|
||||
Значение по умолчанию: 32768 (32 KiB)
|
||||
|
||||
@ -1852,6 +1852,16 @@ ClickHouse генерирует исключение
|
||||
|
||||
Значение по умолчанию: 16.
|
||||
|
||||
## merge_selecting_sleep_ms {#merge_selecting_sleep_ms}
|
||||
|
||||
Время ожидания для слияния выборки, если ни один кусок не выбран. Снижение времени ожидания приводит к частому выбору задач в пуле `background_schedule_pool` и увеличению количества запросов к Zookeeper в крупных кластерах.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- Положительное целое число.
|
||||
|
||||
Значение по умолчанию: `5000`.
|
||||
|
||||
## parallel_distributed_insert_select {#parallel_distributed_insert_select}
|
||||
|
||||
Включает параллельную обработку распределённых запросов `INSERT ... SELECT`.
|
||||
|
@ -581,6 +581,7 @@ SOURCE(MYSQL(
|
||||
<db>default</db>
|
||||
<table>ids</table>
|
||||
<where>id=10</where>
|
||||
<secure>1</secure>
|
||||
</clickhouse>
|
||||
</source>
|
||||
```
|
||||
@ -596,7 +597,8 @@ SOURCE(CLICKHOUSE(
|
||||
db 'default'
|
||||
table 'ids'
|
||||
where 'id=10'
|
||||
))
|
||||
secure 1
|
||||
));
|
||||
```
|
||||
|
||||
Поля настройки:
|
||||
@ -609,6 +611,7 @@ SOURCE(CLICKHOUSE(
|
||||
- `table` — имя таблицы.
|
||||
- `where` — условие выбора. Может отсутствовать.
|
||||
- `invalidate_query` — запрос для проверки статуса словаря. Необязательный параметр. Читайте подробнее в разделе [Обновление словарей](external-dicts-dict-lifetime.md).
|
||||
- `secure` - флаг, разрешающий или не разрешающий защищённое SSL-соединение.
|
||||
|
||||
### MongoDB {#dicts-external_dicts_dict_sources-mongodb}
|
||||
|
||||
@ -769,4 +772,3 @@ Setting fields:
|
||||
- `table` – Имя таблицы.
|
||||
- `where` – Условие выборки. Синтаксис для условий такой же как для `WHERE` выражения в PostgreSQL, для примера, `id > 10 AND id < 20`. Необязательный параметр.
|
||||
- `invalidate_query` – Запрос для проверки условия загрузки словаря. Необязательный параметр. Читайте больше в разделе [Обновление словарей](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
|
||||
|
||||
|
@ -23,8 +23,8 @@ dictGetOrNull('dict_name', attr_name, id_expr)
|
||||
**Аргументы**
|
||||
|
||||
- `dict_name` — имя словаря. [Строковый литерал](../syntax.md#syntax-string-literal).
|
||||
- `attr_names` — имя столбца словаря, [Строковый литерал](../syntax.md#syntax-string-literal), или кортеж [Tuple](../../sql-reference/data-types/tuple.md) таких имен.
|
||||
- `id_expr` — значение ключа словаря. [Выражение](../syntax.md#syntax-expressions), возвращающее значение типа [UInt64](../../sql-reference/functions/ext-dict-functions.md) или [Tuple](../../sql-reference/functions/ext-dict-functions.md), в зависимости от конфигурации словаря.
|
||||
- `attr_names` — имя столбца словаря. [Строковый литерал](../syntax.md#syntax-string-literal), или кортеж [Tuple](../../sql-reference/data-types/tuple.md) таких имен.
|
||||
- `id_expr` — значение ключа словаря. [Expression](../../sql-reference/syntax.md#syntax-expressions) возвращает пару "ключ-значение" словаря или [Tuple](../../sql-reference/functions/ext-dict-functions.md), в зависимости от конфигурации словаря.
|
||||
- `default_value_expr` — значение, возвращаемое в том случае, когда словарь не содержит строки с заданным ключом `id_expr`. [Выражение](../syntax.md#syntax-expressions), возвращающее значение с типом данных, сконфигурированным для атрибута `attr_names`, или кортеж [Tuple](../../sql-reference/data-types/tuple.md) таких выражений.
|
||||
|
||||
**Возвращаемое значение**
|
||||
@ -87,7 +87,7 @@ SELECT
|
||||
dictGetOrDefault('ext-dict-test', 'c1', number + 1, toUInt32(number * 10)) AS val,
|
||||
toTypeName(val) AS type
|
||||
FROM system.numbers
|
||||
LIMIT 3
|
||||
LIMIT 3;
|
||||
```
|
||||
|
||||
``` text
|
||||
@ -237,7 +237,7 @@ dictHas('dict_name', id)
|
||||
**Аргументы**
|
||||
|
||||
- `dict_name` — имя словаря. [Строковый литерал](../syntax.md#syntax-string-literal).
|
||||
- `id_expr` — значение ключа словаря. [Выражение](../syntax.md#syntax-expressions), возвращающее значение типа [UInt64](../../sql-reference/functions/ext-dict-functions.md) или [Tuple](../../sql-reference/functions/ext-dict-functions.md) в зависимости от конфигурации словаря.
|
||||
- `id_expr` — значение ключа словаря. [Expression](../../sql-reference/syntax.md#syntax-expressions) возвращает пару "ключ-значение" словаря или [Tuple](../../sql-reference/functions/ext-dict-functions.md) в зависимости от конфигурации словаря.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
@ -337,7 +337,7 @@ SELECT dictGetChildren('hierarchy_flat_dictionary', number) FROM system.numbers
|
||||
|
||||
## dictGetDescendant {#dictgetdescendant}
|
||||
|
||||
Возвращает всех потомков, как если бы функция [dictGetChildren](#dictgetchildren) была выполнена `level` раз рекурсивно.
|
||||
Возвращает всех потомков, как если бы функция [dictGetChildren](#dictgetchildren) была выполнена `level` раз рекурсивно.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
|
@ -8,9 +8,9 @@ toc_title: "\u6982\u8FF0"
|
||||
|
||||
# ClickHouse指南 {#clickhouse-guides}
|
||||
|
||||
详细的一步一步的说明,帮助解决使用ClickHouse的各种任务列表:
|
||||
列出了如何使用 Clickhouse 解决各种任务的详细说明:
|
||||
|
||||
- [简单集群设置教程](../getting-started/tutorial.md)
|
||||
- [关于简单集群设置的教程](../getting-started/tutorial.md)
|
||||
- [在ClickHouse中应用CatBoost模型](apply-catboost-model.md)
|
||||
|
||||
[原始文章](https://clickhouse.tech/docs/en/guides/) <!--hide-->
|
||||
|
@ -462,7 +462,7 @@ SSL客户端/服务器配置。
|
||||
- extendedVerification – Automatically extended verification of certificates after the session ends. Acceptable values: `true`, `false`.
|
||||
- requireTLSv1 – Require a TLSv1 connection. Acceptable values: `true`, `false`.
|
||||
- requireTLSv1_1 – Require a TLSv1.1 connection. Acceptable values: `true`, `false`.
|
||||
- requireTLSv1 – Require a TLSv1.2 connection. Acceptable values: `true`, `false`.
|
||||
- requireTLSv1_2 – Require a TLSv1.2 connection. Acceptable values: `true`, `false`.
|
||||
- fips – Activates OpenSSL FIPS mode. Supported if the library’s OpenSSL version supports FIPS.
|
||||
- privateKeyPassphraseHandler – Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: `<privateKeyPassphraseHandler>`, `<name>KeyFileHandler</name>`, `<options><password>test</password></options>`, `</privateKeyPassphraseHandler>`.
|
||||
- invalidCertificateHandler – Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: `<invalidCertificateHandler> <name>ConsoleCertificateHandler</name> </invalidCertificateHandler>` .
|
||||
|
@ -26,6 +26,9 @@
|
||||
#include <boost/algorithm/string/replace.hpp>
|
||||
#include <Poco/String.h>
|
||||
#include <Poco/Util/Application.h>
|
||||
#include <Processors/Formats/IInputFormat.h>
|
||||
#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
|
||||
#include <Processors/QueryPipeline.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <common/find_symbols.h>
|
||||
#include <common/LineReader.h>
|
||||
@ -55,8 +58,7 @@
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/UseSSL.h>
|
||||
#include <IO/WriteBufferFromOStream.h>
|
||||
#include <DataStreams/AsynchronousBlockInputStream.h>
|
||||
#include <DataStreams/AddingDefaultsBlockInputStream.h>
|
||||
#include <Processors/Transforms/AddingDefaultsTransform.h>
|
||||
#include <DataStreams/InternalTextLogsRowOutputStream.h>
|
||||
#include <DataStreams/NullBlockOutputStream.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
@ -80,6 +82,7 @@
|
||||
#include <Functions/registerFunctions.h>
|
||||
#include <AggregateFunctions/registerAggregateFunctions.h>
|
||||
#include <Formats/registerFormats.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Common/Config/configReadClient.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <common/argsToConfig.h>
|
||||
@ -1925,19 +1928,24 @@ private:
|
||||
current_format = insert->format;
|
||||
}
|
||||
|
||||
BlockInputStreamPtr block_input = context->getInputFormat(current_format, buf, sample, insert_format_max_block_size);
|
||||
auto source = FormatFactory::instance().getInput(current_format, buf, sample, context, insert_format_max_block_size);
|
||||
Pipe pipe(source);
|
||||
|
||||
if (columns_description.hasDefaults())
|
||||
block_input = std::make_shared<AddingDefaultsBlockInputStream>(block_input, columns_description, context);
|
||||
|
||||
BlockInputStreamPtr async_block_input = std::make_shared<AsynchronousBlockInputStream>(block_input);
|
||||
|
||||
async_block_input->readPrefix();
|
||||
|
||||
while (true)
|
||||
{
|
||||
Block block = async_block_input->read();
|
||||
pipe.addSimpleTransform([&](const Block & header)
|
||||
{
|
||||
return std::make_shared<AddingDefaultsTransform>(header, columns_description, *source, context);
|
||||
});
|
||||
}
|
||||
|
||||
QueryPipeline pipeline;
|
||||
pipeline.init(std::move(pipe));
|
||||
PullingAsyncPipelineExecutor executor(pipeline);
|
||||
|
||||
Block block;
|
||||
while (executor.pull(block))
|
||||
{
|
||||
/// Check if server send Log packet
|
||||
receiveLogs();
|
||||
|
||||
@ -1949,18 +1957,18 @@ private:
|
||||
* We're exiting with error, so it makes sense to kill the
|
||||
* input stream without waiting for it to complete.
|
||||
*/
|
||||
async_block_input->cancel(true);
|
||||
executor.cancel();
|
||||
return;
|
||||
}
|
||||
|
||||
connection->sendData(block);
|
||||
processed_rows += block.rows();
|
||||
|
||||
if (!block)
|
||||
break;
|
||||
if (block)
|
||||
{
|
||||
connection->sendData(block);
|
||||
processed_rows += block.rows();
|
||||
}
|
||||
}
|
||||
|
||||
async_block_input->readSuffix();
|
||||
connection->sendData({});
|
||||
}
|
||||
|
||||
|
||||
|
@ -1702,14 +1702,15 @@ void ClusterCopier::dropParticularPartitionPieceFromAllHelpingTables(const TaskT
|
||||
LOG_INFO(log, "All helping tables dropped partition {}", partition_name);
|
||||
}
|
||||
|
||||
String ClusterCopier::getRemoteCreateTable(const DatabaseAndTableName & table, Connection & connection, const Settings & settings)
|
||||
String ClusterCopier::getRemoteCreateTable(
|
||||
const DatabaseAndTableName & table, Connection & connection, const Settings & settings)
|
||||
{
|
||||
auto remote_context = Context::createCopy(context);
|
||||
remote_context->setSettings(settings);
|
||||
|
||||
String query = "SHOW CREATE TABLE " + getQuotedTable(table);
|
||||
Block block = getBlockWithAllStreamData(std::make_shared<RemoteBlockInputStream>(
|
||||
connection, query, InterpreterShowCreateQuery::getSampleBlock(), remote_context));
|
||||
Block block = getBlockWithAllStreamData(
|
||||
std::make_shared<RemoteBlockInputStream>(connection, query, InterpreterShowCreateQuery::getSampleBlock(), remote_context));
|
||||
|
||||
return typeid_cast<const ColumnString &>(*block.safeGetByPosition(0).column).getDataAt(0).toString();
|
||||
}
|
||||
@ -1719,10 +1720,8 @@ ASTPtr ClusterCopier::getCreateTableForPullShard(const ConnectionTimeouts & time
|
||||
{
|
||||
/// Fetch and parse (possibly) new definition
|
||||
auto connection_entry = task_shard.info.pool->get(timeouts, &task_cluster->settings_pull, true);
|
||||
String create_query_pull_str = getRemoteCreateTable(
|
||||
task_shard.task_table.table_pull,
|
||||
*connection_entry,
|
||||
task_cluster->settings_pull);
|
||||
String create_query_pull_str
|
||||
= getRemoteCreateTable(task_shard.task_table.table_pull, *connection_entry, task_cluster->settings_pull);
|
||||
|
||||
ParserCreateQuery parser_create_query;
|
||||
const auto & settings = getContext()->getSettingsRef();
|
||||
@ -1953,8 +1952,8 @@ UInt64 ClusterCopier::executeQueryOnCluster(
|
||||
/// For unknown reason global context is passed to IStorage::read() method
|
||||
/// So, task_identifier is passed as constructor argument. It is more obvious.
|
||||
auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
|
||||
*connections.back(), query, header, getContext(),
|
||||
/*throttler=*/nullptr, Scalars(), Tables(), QueryProcessingStage::Complete);
|
||||
*connections.back(), query, header, getContext(),
|
||||
/*throttler=*/nullptr, Scalars(), Tables(), QueryProcessingStage::Complete);
|
||||
|
||||
try
|
||||
{
|
||||
|
@ -51,10 +51,14 @@ namespace
|
||||
void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
|
||||
{
|
||||
LOG_TRACE(log, "Request URI: {}", request.getURI());
|
||||
LOG_TRACE(log, "Ololo");
|
||||
try
|
||||
{
|
||||
HTMLForm params(getContext()->getSettingsRef(), request);
|
||||
|
||||
LOG_TRACE(log, "parsed params");
|
||||
if (!params.has("method"))
|
||||
{
|
||||
LOG_TRACE(log, "No 'method' in request URL");
|
||||
processError(response, "No 'method' in request URL");
|
||||
return;
|
||||
}
|
||||
@ -256,6 +260,13 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
|
||||
{
|
||||
tryLogCurrentException(log);
|
||||
}
|
||||
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -59,6 +59,7 @@
|
||||
#include <TableFunctions/registerTableFunctions.h>
|
||||
#include <Formats/registerFormats.h>
|
||||
#include <Storages/registerStorages.h>
|
||||
#include <DataStreams/ConnectionCollector.h>
|
||||
#include <Dictionaries/registerDictionaries.h>
|
||||
#include <Disks/registerDisks.h>
|
||||
#include <Common/Config/ConfigReloader.h>
|
||||
@ -503,6 +504,8 @@ if (ThreadFuzzer::instance().isEffective())
|
||||
// ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well.
|
||||
GlobalThreadPool::initialize(config().getUInt("max_thread_pool_size", 10000));
|
||||
|
||||
ConnectionCollector::init(global_context, config().getUInt("max_threads_for_connection_collector", 10));
|
||||
|
||||
bool has_zookeeper = config().has("zookeeper");
|
||||
|
||||
zkutil::ZooKeeperNodeCache main_config_zk_node_cache([&] { return global_context->getZooKeeper(); });
|
||||
|
@ -45,6 +45,7 @@ SRCS(
|
||||
SettingsProfilesCache.cpp
|
||||
User.cpp
|
||||
UsersConfigAccessStorage.cpp
|
||||
tests/gtest_access_rights_ops.cpp
|
||||
|
||||
)
|
||||
|
||||
|
@ -95,18 +95,18 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
|
||||
// nullability themselves. Another special case is functions from Nothing
|
||||
// that are rewritten to AggregateFunctionNothing, in this case
|
||||
// nested_function is nullptr.
|
||||
if (nested_function && nested_function->isOnlyWindowFunction())
|
||||
if (!nested_function || !nested_function->isOnlyWindowFunction())
|
||||
{
|
||||
return nested_function;
|
||||
return combinator->transformAggregateFunction(nested_function,
|
||||
out_properties, type_without_low_cardinality, parameters);
|
||||
}
|
||||
|
||||
return combinator->transformAggregateFunction(nested_function, out_properties, type_without_low_cardinality, parameters);
|
||||
}
|
||||
|
||||
auto res = getImpl(name, type_without_low_cardinality, parameters, out_properties, false);
|
||||
if (!res)
|
||||
auto with_original_arguments = getImpl(name, type_without_low_cardinality, parameters, out_properties, false);
|
||||
|
||||
if (!with_original_arguments)
|
||||
throw Exception("Logical error: AggregateFunctionFactory returned nullptr", ErrorCodes::LOGICAL_ERROR);
|
||||
return res;
|
||||
return with_original_arguments;
|
||||
}
|
||||
|
||||
|
||||
|
@ -28,6 +28,8 @@ HedgedConnections::HedgedConnections(
|
||||
std::shared_ptr<QualifiedTableName> table_to_check_)
|
||||
: hedged_connections_factory(pool_, &settings_, timeouts_, table_to_check_)
|
||||
, settings(settings_)
|
||||
, drain_timeout(settings.drain_timeout)
|
||||
, allow_changing_replica_until_first_data_packet(settings.allow_changing_replica_until_first_data_packet)
|
||||
, throttler(throttler_)
|
||||
{
|
||||
std::vector<Connection *> connections = hedged_connections_factory.getManyConnections(pool_mode);
|
||||
@ -251,7 +253,7 @@ Packet HedgedConnections::drain()
|
||||
|
||||
while (!epoll.empty())
|
||||
{
|
||||
ReplicaLocation location = getReadyReplicaLocation();
|
||||
ReplicaLocation location = getReadyReplicaLocation(DrainCallback{drain_timeout});
|
||||
Packet packet = receivePacketFromReplica(location);
|
||||
switch (packet.type)
|
||||
{
|
||||
@ -278,10 +280,10 @@ Packet HedgedConnections::drain()
|
||||
Packet HedgedConnections::receivePacket()
|
||||
{
|
||||
std::lock_guard lock(cancel_mutex);
|
||||
return receivePacketUnlocked({});
|
||||
return receivePacketUnlocked({}, false /* is_draining */);
|
||||
}
|
||||
|
||||
Packet HedgedConnections::receivePacketUnlocked(AsyncCallback async_callback)
|
||||
Packet HedgedConnections::receivePacketUnlocked(AsyncCallback async_callback, bool /* is_draining */)
|
||||
{
|
||||
if (!sent_query)
|
||||
throw Exception("Cannot receive packets: no query sent.", ErrorCodes::LOGICAL_ERROR);
|
||||
@ -396,7 +398,7 @@ Packet HedgedConnections::receivePacketFromReplica(const ReplicaLocation & repli
|
||||
{
|
||||
/// If we are allowed to change replica until the first data packet,
|
||||
/// just restart timeout (if it hasn't expired yet). Otherwise disable changing replica with this offset.
|
||||
if (settings.allow_changing_replica_until_first_data_packet && !replica.is_change_replica_timeout_expired)
|
||||
if (allow_changing_replica_until_first_data_packet && !replica.is_change_replica_timeout_expired)
|
||||
replica.change_replica_timeout.setRelative(hedged_connections_factory.getConnectionTimeouts().receive_data_timeout);
|
||||
else
|
||||
disableChangingReplica(replica_location);
|
||||
|
@ -97,7 +97,7 @@ public:
|
||||
|
||||
Packet receivePacket() override;
|
||||
|
||||
Packet receivePacketUnlocked(AsyncCallback async_callback) override;
|
||||
Packet receivePacketUnlocked(AsyncCallback async_callback, bool is_draining) override;
|
||||
|
||||
void disconnect() override;
|
||||
|
||||
@ -189,6 +189,12 @@ private:
|
||||
|
||||
Epoll epoll;
|
||||
const Settings & settings;
|
||||
|
||||
/// The following two fields are from settings but can be referenced outside the lifetime of
|
||||
/// settings when connection is drained asynchronously.
|
||||
Poco::Timespan drain_timeout;
|
||||
bool allow_changing_replica_until_first_data_packet;
|
||||
|
||||
ThrottlerPtr throttler;
|
||||
bool sent_query = false;
|
||||
bool cancelled = false;
|
||||
|
31
src/Client/IConnections.cpp
Normal file
31
src/Client/IConnections.cpp
Normal file
@ -0,0 +1,31 @@
|
||||
#include <Client/IConnections.h>
|
||||
#include <Poco/Net/SocketImpl.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int SOCKET_TIMEOUT;
|
||||
}
|
||||
|
||||
/// This wrapper struct allows us to use Poco's socket polling code with a raw fd.
|
||||
/// The only difference from Poco::Net::SocketImpl is that we don't close the fd in the destructor.
|
||||
struct PocoSocketWrapper : public Poco::Net::SocketImpl
|
||||
{
|
||||
explicit PocoSocketWrapper(int fd)
|
||||
{
|
||||
reset(fd);
|
||||
}
|
||||
|
||||
// Do not close fd.
|
||||
~PocoSocketWrapper() override { reset(-1); }
|
||||
};
|
||||
|
||||
void IConnections::DrainCallback::operator()(int fd, Poco::Timespan, const std::string fd_description) const
|
||||
{
|
||||
if (!PocoSocketWrapper(fd).poll(drain_timeout, Poco::Net::Socket::SELECT_READ))
|
||||
throw Exception(ErrorCodes::SOCKET_TIMEOUT, "Read timeout while draining from {}", fd_description);
|
||||
}
|
||||
|
||||
}
|
@ -10,6 +10,12 @@ namespace DB
|
||||
class IConnections : boost::noncopyable
|
||||
{
|
||||
public:
|
||||
struct DrainCallback
|
||||
{
|
||||
Poco::Timespan drain_timeout;
|
||||
void operator()(int fd, Poco::Timespan, const std::string fd_description = "") const;
|
||||
};
|
||||
|
||||
/// Send all scalars to replicas.
|
||||
virtual void sendScalarsData(Scalars & data) = 0;
|
||||
/// Send all content of external tables to replicas.
|
||||
@ -30,7 +36,7 @@ public:
|
||||
virtual Packet receivePacket() = 0;
|
||||
|
||||
/// Version of `receivePacket` function without locking.
|
||||
virtual Packet receivePacketUnlocked(AsyncCallback async_callback) = 0;
|
||||
virtual Packet receivePacketUnlocked(AsyncCallback async_callback, bool is_draining) = 0;
|
||||
|
||||
/// Break all active connections.
|
||||
virtual void disconnect() = 0;
|
||||
|
@ -18,7 +18,7 @@ namespace ErrorCodes
|
||||
|
||||
|
||||
MultiplexedConnections::MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler)
|
||||
: settings(settings_)
|
||||
: settings(settings_), drain_timeout(settings.drain_timeout), receive_timeout(settings.receive_timeout)
|
||||
{
|
||||
connection.setThrottler(throttler);
|
||||
|
||||
@ -30,9 +30,8 @@ MultiplexedConnections::MultiplexedConnections(Connection & connection, const Se
|
||||
}
|
||||
|
||||
MultiplexedConnections::MultiplexedConnections(
|
||||
std::vector<IConnectionPool::Entry> && connections,
|
||||
const Settings & settings_, const ThrottlerPtr & throttler)
|
||||
: settings(settings_)
|
||||
std::vector<IConnectionPool::Entry> && connections, const Settings & settings_, const ThrottlerPtr & throttler)
|
||||
: settings(settings_), drain_timeout(settings.drain_timeout), receive_timeout(settings.receive_timeout)
|
||||
{
|
||||
/// If we didn't get any connections from pool and getMany() did not throw exceptions, this means that
|
||||
/// `skip_unavailable_shards` was set. Then just return.
|
||||
@ -168,7 +167,7 @@ void MultiplexedConnections::sendReadTaskResponse(const String & response)
|
||||
Packet MultiplexedConnections::receivePacket()
|
||||
{
|
||||
std::lock_guard lock(cancel_mutex);
|
||||
Packet packet = receivePacketUnlocked({});
|
||||
Packet packet = receivePacketUnlocked({}, false /* is_draining */);
|
||||
return packet;
|
||||
}
|
||||
|
||||
@ -216,7 +215,7 @@ Packet MultiplexedConnections::drain()
|
||||
|
||||
while (hasActiveConnections())
|
||||
{
|
||||
Packet packet = receivePacketUnlocked({});
|
||||
Packet packet = receivePacketUnlocked(DrainCallback{drain_timeout}, true /* is_draining */);
|
||||
|
||||
switch (packet.type)
|
||||
{
|
||||
@ -264,14 +263,14 @@ std::string MultiplexedConnections::dumpAddressesUnlocked() const
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callback)
|
||||
Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callback, bool is_draining)
|
||||
{
|
||||
if (!sent_query)
|
||||
throw Exception("Cannot receive packets: no query sent.", ErrorCodes::LOGICAL_ERROR);
|
||||
if (!hasActiveConnections())
|
||||
throw Exception("No more packets are available.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
ReplicaState & state = getReplicaForReading();
|
||||
ReplicaState & state = getReplicaForReading(is_draining);
|
||||
current_connection = state.connection;
|
||||
if (current_connection == nullptr)
|
||||
throw Exception("Logical error: no available replica", ErrorCodes::NO_AVAILABLE_REPLICA);
|
||||
@ -323,9 +322,10 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac
|
||||
return packet;
|
||||
}
|
||||
|
||||
MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForReading()
|
||||
MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForReading(bool is_draining)
|
||||
{
|
||||
if (replica_states.size() == 1)
|
||||
/// Fast path when we only focus on one replica and are not draining the connection.
|
||||
if (replica_states.size() == 1 && !is_draining)
|
||||
return replica_states[0];
|
||||
|
||||
Poco::Net::Socket::SocketList read_list;
|
||||
@ -353,10 +353,26 @@ MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForRead
|
||||
read_list.push_back(*connection->socket);
|
||||
}
|
||||
|
||||
int n = Poco::Net::Socket::select(read_list, write_list, except_list, settings.receive_timeout);
|
||||
int n = Poco::Net::Socket::select(
|
||||
read_list,
|
||||
write_list,
|
||||
except_list,
|
||||
is_draining ? drain_timeout : receive_timeout);
|
||||
|
||||
if (n == 0)
|
||||
throw Exception("Timeout exceeded while reading from " + dumpAddressesUnlocked(), ErrorCodes::TIMEOUT_EXCEEDED);
|
||||
{
|
||||
auto err_msg = fmt::format("Timeout exceeded while reading from {}", dumpAddressesUnlocked());
|
||||
for (ReplicaState & state : replica_states)
|
||||
{
|
||||
Connection * connection = state.connection;
|
||||
if (connection != nullptr)
|
||||
{
|
||||
connection->disconnect();
|
||||
invalidateReplica(state);
|
||||
}
|
||||
}
|
||||
throw Exception(err_msg, ErrorCodes::TIMEOUT_EXCEEDED);
|
||||
}
|
||||
}
|
||||
|
||||
/// TODO Absolutely wrong code: read_list could be empty; motivation of rand is unclear.
|
||||
|
@ -61,7 +61,7 @@ public:
|
||||
bool hasActiveConnections() const override { return active_connection_count > 0; }
|
||||
|
||||
private:
|
||||
Packet receivePacketUnlocked(AsyncCallback async_callback) override;
|
||||
Packet receivePacketUnlocked(AsyncCallback async_callback, bool is_draining) override;
|
||||
|
||||
/// Internal version of `dumpAddresses` function without locking.
|
||||
std::string dumpAddressesUnlocked() const;
|
||||
@ -74,7 +74,7 @@ private:
|
||||
};
|
||||
|
||||
/// Get a replica where you can read the data.
|
||||
ReplicaState & getReplicaForReading();
|
||||
ReplicaState & getReplicaForReading(bool is_draining);
|
||||
|
||||
/// Mark the replica as invalid.
|
||||
void invalidateReplica(ReplicaState & replica_state);
|
||||
@ -82,6 +82,11 @@ private:
|
||||
private:
|
||||
const Settings & settings;
|
||||
|
||||
/// The following two fields are from settings but can be referenced outside the lifetime of
|
||||
/// settings when connection is drained asynchronously.
|
||||
Poco::Timespan drain_timeout;
|
||||
Poco::Timespan receive_timeout;
|
||||
|
||||
/// The current number of valid connections to the replicas of this shard.
|
||||
size_t active_connection_count = 0;
|
||||
|
||||
|
@ -124,6 +124,10 @@ class FindResultImpl : public FindResultImplBase, public FindResultImplOffsetBas
|
||||
Mapped * value;
|
||||
|
||||
public:
|
||||
FindResultImpl()
|
||||
: FindResultImplBase(false), FindResultImplOffsetBase<need_offset>(0)
|
||||
{}
|
||||
|
||||
FindResultImpl(Mapped * value_, bool found_, size_t off)
|
||||
: FindResultImplBase(found_), FindResultImplOffsetBase<need_offset>(off), value(value_) {}
|
||||
Mapped & getMapped() const { return *value; }
|
||||
|
@ -71,6 +71,10 @@
|
||||
M(PartsInMemory, "In-memory parts.") \
|
||||
M(MMappedFiles, "Total number of mmapped files.") \
|
||||
M(MMappedFileBytes, "Sum size of mmapped file regions.") \
|
||||
M(AsyncDrainedConnections, "Number of connections drained asynchronously.") \
|
||||
M(ActiveAsyncDrainedConnections, "Number of active connections drained asynchronously.") \
|
||||
M(SyncDrainedConnections, "Number of connections drained synchronously.") \
|
||||
M(ActiveSyncDrainedConnections, "Number of active connections drained synchronously.") \
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
|
@ -21,7 +21,7 @@ struct Settings;
|
||||
M(Milliseconds, dead_session_check_period_ms, 500, "How often leader will check sessions to consider them dead and remove", 0) \
|
||||
M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \
|
||||
M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \
|
||||
M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Lower bound of election timer (avoid too often leader elections)", 0) \
|
||||
M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Upper bound of election timer (avoid too often leader elections)", 0) \
|
||||
M(UInt64, reserved_log_items, 100000, "How many log items to store (don't remove during compaction)", 0) \
|
||||
M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \
|
||||
M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \
|
||||
|
@ -11,6 +11,7 @@
|
||||
#define DBMS_DEFAULT_CONNECT_TIMEOUT_WITH_FAILOVER_SECURE_MS 100
|
||||
#define DBMS_DEFAULT_SEND_TIMEOUT_SEC 300
|
||||
#define DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC 300
|
||||
#define DBMS_DEFAULT_DRAIN_TIMEOUT_SEC 3
|
||||
/// Timeouts for hedged requests.
|
||||
#define DBMS_DEFAULT_HEDGED_CONNECTION_TIMEOUT_MS 100
|
||||
#define DBMS_DEFAULT_RECEIVE_DATA_TIMEOUT_MS 2000
|
||||
|
@ -54,6 +54,7 @@ class IColumn;
|
||||
M(Milliseconds, connect_timeout_with_failover_secure_ms, DBMS_DEFAULT_CONNECT_TIMEOUT_WITH_FAILOVER_SECURE_MS, "Connection timeout for selecting first healthy replica (for secure connections).", 0) \
|
||||
M(Seconds, receive_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "", 0) \
|
||||
M(Seconds, send_timeout, DBMS_DEFAULT_SEND_TIMEOUT_SEC, "", 0) \
|
||||
M(Seconds, drain_timeout, DBMS_DEFAULT_DRAIN_TIMEOUT_SEC, "", 0) \
|
||||
M(Seconds, tcp_keep_alive_timeout, 0, "The time in seconds the connection needs to remain idle before TCP starts sending keepalive probes", 0) \
|
||||
M(Milliseconds, hedged_connection_timeout_ms, DBMS_DEFAULT_HEDGED_CONNECTION_TIMEOUT_MS, "Connection timeout for establishing connection with replica for Hedged requests", 0) \
|
||||
M(Milliseconds, receive_data_timeout_ms, DBMS_DEFAULT_RECEIVE_DATA_TIMEOUT_MS, "Connection timeout for receiving first packet of data or packet with positive progress from replica", 0) \
|
||||
@ -233,6 +234,8 @@ class IColumn;
|
||||
M(Milliseconds, sleep_in_send_tables_status_ms, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \
|
||||
M(Milliseconds, sleep_in_send_data_ms, 0, "Time to sleep in sending data in TCPHandler", 0) \
|
||||
M(UInt64, unknown_packet_in_send_data, 0, "Send unknown packet instead of data Nth data packet", 0) \
|
||||
/** Settings for testing connection collector */ \
|
||||
M(Milliseconds, sleep_in_receive_cancel_ms, 0, "Time to sleep in receiving cancel in TCPHandler", 0) \
|
||||
\
|
||||
M(Bool, insert_allow_materialized_columns, 0, "If setting is enabled, Allow materialized columns in INSERT.", 0) \
|
||||
M(Seconds, http_connection_timeout, DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT, "HTTP connection timeout.", 0) \
|
||||
|
@ -42,15 +42,15 @@ struct SortColumnDescription
|
||||
bool with_fill;
|
||||
FillColumnDescription fill_description;
|
||||
|
||||
SortColumnDescription(
|
||||
size_t column_number_, int direction_, int nulls_direction_,
|
||||
explicit SortColumnDescription(
|
||||
size_t column_number_, int direction_ = 1, int nulls_direction_ = 1,
|
||||
const std::shared_ptr<Collator> & collator_ = nullptr,
|
||||
bool with_fill_ = false, const FillColumnDescription & fill_description_ = {})
|
||||
: column_number(column_number_), direction(direction_), nulls_direction(nulls_direction_), collator(collator_)
|
||||
, with_fill(with_fill_), fill_description(fill_description_) {}
|
||||
|
||||
SortColumnDescription(
|
||||
const std::string & column_name_, int direction_, int nulls_direction_,
|
||||
explicit SortColumnDescription(
|
||||
const std::string & column_name_, int direction_ = 1, int nulls_direction_ = 1,
|
||||
const std::shared_ptr<Collator> & collator_ = nullptr,
|
||||
bool with_fill_ = false, const FillColumnDescription & fill_description_ = {})
|
||||
: column_name(column_name_), column_number(0), direction(direction_), nulls_direction(nulls_direction_)
|
||||
|
115
src/DataStreams/ConnectionCollector.cpp
Normal file
115
src/DataStreams/ConnectionCollector.cpp
Normal file
@ -0,0 +1,115 @@
|
||||
#include <DataStreams/ConnectionCollector.h>
|
||||
|
||||
#include <Core/BackgroundSchedulePool.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric AsyncDrainedConnections;
|
||||
extern const Metric ActiveAsyncDrainedConnections;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int UNKNOWN_PACKET_FROM_SERVER;
|
||||
}
|
||||
|
||||
std::unique_ptr<ConnectionCollector> ConnectionCollector::connection_collector;
|
||||
|
||||
static constexpr UInt64 max_connection_draining_tasks_per_thread = 20;
|
||||
|
||||
ConnectionCollector::ConnectionCollector(ContextMutablePtr global_context_, size_t max_threads)
|
||||
: WithMutableContext(global_context_), pool(max_threads, max_threads, max_threads * max_connection_draining_tasks_per_thread)
|
||||
{
|
||||
}
|
||||
|
||||
ConnectionCollector & ConnectionCollector::init(ContextMutablePtr global_context_, size_t max_threads)
|
||||
{
|
||||
if (connection_collector)
|
||||
{
|
||||
throw Exception("Connection collector is initialized twice. This is a bug.", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
connection_collector.reset(new ConnectionCollector(global_context_, max_threads));
|
||||
return *connection_collector;
|
||||
}
|
||||
|
||||
struct AsyncDrainTask
|
||||
{
|
||||
const ConnectionPoolWithFailoverPtr pool;
|
||||
std::shared_ptr<IConnections> shared_connections;
|
||||
void operator()() const
|
||||
{
|
||||
ConnectionCollector::drainConnections(*shared_connections);
|
||||
}
|
||||
|
||||
// We don't have std::unique_function yet. Wrap it in shared_ptr to make the functor copyable.
|
||||
std::shared_ptr<CurrentMetrics::Increment> metric_increment
|
||||
= std::make_shared<CurrentMetrics::Increment>(CurrentMetrics::ActiveAsyncDrainedConnections);
|
||||
};
|
||||
|
||||
std::shared_ptr<IConnections> ConnectionCollector::enqueueConnectionCleanup(
|
||||
const ConnectionPoolWithFailoverPtr & pool, std::shared_ptr<IConnections> connections) noexcept
|
||||
{
|
||||
if (!connections)
|
||||
return nullptr;
|
||||
|
||||
if (connection_collector)
|
||||
{
|
||||
if (connection_collector->pool.trySchedule(AsyncDrainTask{pool, connections}))
|
||||
{
|
||||
CurrentMetrics::add(CurrentMetrics::AsyncDrainedConnections, 1);
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
return connections;
|
||||
}
|
||||
|
||||
void ConnectionCollector::drainConnections(IConnections & connections) noexcept
|
||||
{
|
||||
bool is_drained = false;
|
||||
try
|
||||
{
|
||||
Packet packet = connections.drain();
|
||||
is_drained = true;
|
||||
switch (packet.type)
|
||||
{
|
||||
case Protocol::Server::EndOfStream:
|
||||
case Protocol::Server::Log:
|
||||
break;
|
||||
|
||||
case Protocol::Server::Exception:
|
||||
packet.exception->rethrow();
|
||||
break;
|
||||
|
||||
default:
|
||||
throw Exception(
|
||||
ErrorCodes::UNKNOWN_PACKET_FROM_SERVER,
|
||||
"Unknown packet {} from one of the following replicas: {}",
|
||||
toString(packet.type),
|
||||
connections.dumpAddresses());
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(&Poco::Logger::get("ConnectionCollector"), __PRETTY_FUNCTION__);
|
||||
if (!is_drained)
|
||||
{
|
||||
try
|
||||
{
|
||||
connections.disconnect();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(&Poco::Logger::get("ConnectionCollector"), __PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
30
src/DataStreams/ConnectionCollector.h
Normal file
30
src/DataStreams/ConnectionCollector.h
Normal file
@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include <Client/IConnections.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <Common/ThreadPool.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ConnectionPoolWithFailover;
|
||||
using ConnectionPoolWithFailoverPtr = std::shared_ptr<ConnectionPoolWithFailover>;
|
||||
|
||||
class ConnectionCollector : boost::noncopyable, WithMutableContext
|
||||
{
|
||||
public:
|
||||
static ConnectionCollector & init(ContextMutablePtr global_context_, size_t max_threads);
|
||||
static std::shared_ptr<IConnections>
|
||||
enqueueConnectionCleanup(const ConnectionPoolWithFailoverPtr & pool, std::shared_ptr<IConnections> connections) noexcept;
|
||||
static void drainConnections(IConnections & connections) noexcept;
|
||||
|
||||
private:
|
||||
explicit ConnectionCollector(ContextMutablePtr global_context_, size_t max_threads);
|
||||
|
||||
static constexpr size_t reschedule_time_ms = 1000;
|
||||
ThreadPool pool;
|
||||
static std::unique_ptr<ConnectionCollector> connection_collector;
|
||||
};
|
||||
|
||||
}
|
@ -1,46 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct BlockIO;
|
||||
class Context;
|
||||
struct StorageInMemoryMetadata;
|
||||
using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
|
||||
|
||||
/** Prepares an input stream which produce data containing in INSERT query
|
||||
* Head of inserting data could be stored in INSERT ast directly
|
||||
* Remaining (tail) data could be stored in input_buffer_tail_part
|
||||
*/
|
||||
class InputStreamFromASTInsertQuery : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
InputStreamFromASTInsertQuery(
|
||||
const ASTPtr & ast,
|
||||
ReadBuffer * input_buffer_tail_part,
|
||||
const Block & header,
|
||||
ContextPtr context,
|
||||
const ASTPtr & input_function);
|
||||
|
||||
Block readImpl() override { return res_stream->read(); }
|
||||
void readPrefixImpl() override { return res_stream->readPrefix(); }
|
||||
void readSuffixImpl() override { return res_stream->readSuffix(); }
|
||||
|
||||
String getName() const override { return "InputStreamFromASTInsertQuery"; }
|
||||
|
||||
Block getHeader() const override { return res_stream->getHeader(); }
|
||||
|
||||
private:
|
||||
std::unique_ptr<ReadBuffer> input_buffer_ast_part;
|
||||
std::unique_ptr<ReadBuffer> input_buffer_contacenated;
|
||||
|
||||
BlockInputStreamPtr res_stream;
|
||||
};
|
||||
|
||||
}
|
@ -5,27 +5,28 @@ namespace DB
|
||||
{
|
||||
|
||||
RemoteBlockInputStream::RemoteBlockInputStream(
|
||||
Connection & connection,
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_)
|
||||
Connection & connection,
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_)
|
||||
: query_executor(connection, query_, header_, context_, throttler, scalars_, external_tables_, stage_)
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
RemoteBlockInputStream::RemoteBlockInputStream(
|
||||
std::vector<IConnectionPool::Entry> && connections,
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_)
|
||||
: query_executor(std::move(connections), query_, header_, context_, throttler, scalars_, external_tables_, stage_)
|
||||
const ConnectionPoolWithFailoverPtr & pool,
|
||||
std::vector<IConnectionPool::Entry> && connections,
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_)
|
||||
: query_executor(pool, std::move(connections), query_, header_, context_, throttler, scalars_, external_tables_, stage_)
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
RemoteBlockInputStream::RemoteBlockInputStream(
|
||||
const ConnectionPoolWithFailoverPtr & pool,
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_)
|
||||
const ConnectionPoolWithFailoverPtr & pool,
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_)
|
||||
: query_executor(pool, query_, header_, context_, throttler, scalars_, external_tables_, stage_)
|
||||
{
|
||||
init();
|
||||
@ -38,11 +39,6 @@ void RemoteBlockInputStream::init()
|
||||
query_executor.setLogger(log);
|
||||
}
|
||||
|
||||
void RemoteBlockInputStream::readPrefix()
|
||||
{
|
||||
query_executor.sendQuery();
|
||||
}
|
||||
|
||||
void RemoteBlockInputStream::cancel(bool kill)
|
||||
{
|
||||
if (kill)
|
||||
|
@ -24,24 +24,25 @@ class RemoteBlockInputStream : public IBlockInputStream
|
||||
public:
|
||||
/// Takes already set connection.
|
||||
RemoteBlockInputStream(
|
||||
Connection & connection,
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
|
||||
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
|
||||
Connection & connection,
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
|
||||
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
|
||||
|
||||
/// Accepts several connections already taken from pool.
|
||||
RemoteBlockInputStream(
|
||||
std::vector<IConnectionPool::Entry> && connections,
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
|
||||
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
|
||||
const ConnectionPoolWithFailoverPtr & pool,
|
||||
std::vector<IConnectionPool::Entry> && connections,
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
|
||||
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
|
||||
|
||||
/// Takes a pool and gets one or several connections from it.
|
||||
RemoteBlockInputStream(
|
||||
const ConnectionPoolWithFailoverPtr & pool,
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
|
||||
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
|
||||
const ConnectionPoolWithFailoverPtr & pool,
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
|
||||
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
|
||||
|
||||
/// Set the query_id. For now, used by performance test to later find the query
|
||||
/// in the server query_log. Must be called before sending the query to the server.
|
||||
@ -52,9 +53,6 @@ public:
|
||||
|
||||
void setMainTable(StorageID main_table_) { query_executor.setMainTable(std::move(main_table_)); }
|
||||
|
||||
/// Sends query (initiates calculation) before read()
|
||||
void readPrefix() override;
|
||||
|
||||
/// Prevent default progress notification because progress' callback is called by its own.
|
||||
void progress(const Progress & /*value*/) override {}
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <DataStreams/ConnectionCollector.h>
|
||||
#include <DataStreams/RemoteQueryExecutor.h>
|
||||
#include <DataStreams/RemoteQueryExecutorReadContext.h>
|
||||
|
||||
@ -17,6 +18,12 @@
|
||||
#include <Client/HedgedConnections.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric SyncDrainedConnections;
|
||||
extern const Metric ActiveSyncDrainedConnections;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -33,36 +40,37 @@ RemoteQueryExecutor::RemoteQueryExecutor(
|
||||
ThrottlerPtr throttler, const Scalars & scalars_, const Tables & external_tables_,
|
||||
QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
|
||||
: header(header_), query(query_), context(context_)
|
||||
, scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_)
|
||||
, scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_), sync_draining(true)
|
||||
{
|
||||
create_connections = [this, &connection, throttler]()
|
||||
{
|
||||
return std::make_unique<MultiplexedConnections>(connection, context->getSettingsRef(), throttler);
|
||||
return std::make_shared<MultiplexedConnections>(connection, context->getSettingsRef(), throttler);
|
||||
};
|
||||
}
|
||||
|
||||
RemoteQueryExecutor::RemoteQueryExecutor(
|
||||
const ConnectionPoolWithFailoverPtr & pool_,
|
||||
std::vector<IConnectionPool::Entry> && connections_,
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_,
|
||||
QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
|
||||
: header(header_), query(query_), context(context_)
|
||||
, scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_)
|
||||
, scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_), pool(pool_)
|
||||
{
|
||||
create_connections = [this, connections_, throttler]() mutable {
|
||||
return std::make_unique<MultiplexedConnections>(std::move(connections_), context->getSettingsRef(), throttler);
|
||||
return std::make_shared<MultiplexedConnections>(std::move(connections_), context->getSettingsRef(), throttler);
|
||||
};
|
||||
}
|
||||
|
||||
RemoteQueryExecutor::RemoteQueryExecutor(
|
||||
const ConnectionPoolWithFailoverPtr & pool,
|
||||
const ConnectionPoolWithFailoverPtr & pool_,
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_,
|
||||
QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
|
||||
: header(header_), query(query_), context(context_)
|
||||
, scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_)
|
||||
, scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_), pool(pool_)
|
||||
{
|
||||
create_connections = [this, pool, throttler]()->std::unique_ptr<IConnections>
|
||||
create_connections = [this, throttler]()->std::shared_ptr<IConnections>
|
||||
{
|
||||
const Settings & current_settings = context->getSettingsRef();
|
||||
auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
|
||||
@ -74,7 +82,7 @@ RemoteQueryExecutor::RemoteQueryExecutor(
|
||||
if (main_table)
|
||||
table_to_check = std::make_shared<QualifiedTableName>(main_table.getQualifiedName());
|
||||
|
||||
return std::make_unique<HedgedConnections>(pool, current_settings, timeouts, throttler, pool_mode, table_to_check);
|
||||
return std::make_shared<HedgedConnections>(pool, current_settings, timeouts, throttler, pool_mode, table_to_check);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -89,7 +97,7 @@ RemoteQueryExecutor::RemoteQueryExecutor(
|
||||
else
|
||||
connection_entries = pool->getMany(timeouts, ¤t_settings, pool_mode);
|
||||
|
||||
return std::make_unique<MultiplexedConnections>(std::move(connection_entries), current_settings, throttler);
|
||||
return std::make_shared<MultiplexedConnections>(std::move(connection_entries), current_settings, throttler);
|
||||
};
|
||||
}
|
||||
|
||||
@ -406,32 +414,18 @@ void RemoteQueryExecutor::finish(std::unique_ptr<ReadContext> * read_context)
|
||||
|
||||
/// Send the request to abort the execution of the request, if not already sent.
|
||||
tryCancel("Cancelling query because enough data has been read", read_context);
|
||||
|
||||
/// Get the remaining packets so that there is no out of sync in the connections to the replicas.
|
||||
Packet packet = connections->drain();
|
||||
switch (packet.type)
|
||||
{
|
||||
case Protocol::Server::EndOfStream:
|
||||
finished = true;
|
||||
break;
|
||||
|
||||
case Protocol::Server::Log:
|
||||
/// Pass logs from remote server to client
|
||||
if (auto log_queue = CurrentThread::getInternalTextLogsQueue())
|
||||
log_queue->pushBlock(std::move(packet.block));
|
||||
break;
|
||||
|
||||
case Protocol::Server::Exception:
|
||||
got_exception_from_replica = true;
|
||||
packet.exception->rethrow();
|
||||
break;
|
||||
|
||||
default:
|
||||
got_unknown_packet_from_replica = true;
|
||||
throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from one of the following replicas: {}",
|
||||
toString(packet.type),
|
||||
connections->dumpAddresses());
|
||||
/// Finish might be called in multiple threads. Make sure we release connections in thread-safe way.
|
||||
std::lock_guard guard(connection_draining_mutex);
|
||||
if (auto conn = ConnectionCollector::enqueueConnectionCleanup(pool, connections))
|
||||
{
|
||||
/// Drain connections synchronously.
|
||||
CurrentMetrics::Increment metric_increment(CurrentMetrics::ActiveSyncDrainedConnections);
|
||||
ConnectionCollector::drainConnections(*conn);
|
||||
CurrentMetrics::add(CurrentMetrics::SyncDrainedConnections, 1);
|
||||
}
|
||||
}
|
||||
finished = true;
|
||||
}
|
||||
|
||||
void RemoteQueryExecutor::cancel(std::unique_ptr<ReadContext> * read_context)
|
||||
@ -506,20 +500,18 @@ void RemoteQueryExecutor::sendExternalTables()
|
||||
|
||||
void RemoteQueryExecutor::tryCancel(const char * reason, std::unique_ptr<ReadContext> * read_context)
|
||||
{
|
||||
{
|
||||
/// Flag was_cancelled is atomic because it is checked in read().
|
||||
std::lock_guard guard(was_cancelled_mutex);
|
||||
/// Flag was_cancelled is atomic because it is checked in read().
|
||||
std::lock_guard guard(was_cancelled_mutex);
|
||||
|
||||
if (was_cancelled)
|
||||
return;
|
||||
if (was_cancelled)
|
||||
return;
|
||||
|
||||
was_cancelled = true;
|
||||
was_cancelled = true;
|
||||
|
||||
if (read_context && *read_context)
|
||||
(*read_context)->cancel();
|
||||
if (read_context && *read_context)
|
||||
(*read_context)->cancel();
|
||||
|
||||
connections->sendCancel();
|
||||
}
|
||||
connections->sendCancel();
|
||||
|
||||
if (log)
|
||||
LOG_TRACE(log, "({}) {}", connections->dumpAddresses(), reason);
|
||||
|
@ -36,6 +36,7 @@ public:
|
||||
using ReadContext = RemoteQueryExecutorReadContext;
|
||||
|
||||
/// Takes already set connection.
|
||||
/// We don't own connection, thus we have to drain it synchronously.
|
||||
RemoteQueryExecutor(
|
||||
Connection & connection,
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
@ -44,6 +45,7 @@ public:
|
||||
|
||||
/// Accepts several connections already taken from pool.
|
||||
RemoteQueryExecutor(
|
||||
const ConnectionPoolWithFailoverPtr & pool,
|
||||
std::vector<IConnectionPool::Entry> && connections_,
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
|
||||
@ -107,9 +109,6 @@ private:
|
||||
Block totals;
|
||||
Block extremes;
|
||||
|
||||
std::function<std::unique_ptr<IConnections>()> create_connections;
|
||||
std::unique_ptr<IConnections> connections;
|
||||
|
||||
const String query;
|
||||
String query_id;
|
||||
ContextPtr context;
|
||||
@ -125,6 +124,15 @@ private:
|
||||
/// Initiator identifier for distributed task processing
|
||||
std::shared_ptr<TaskIterator> task_iterator;
|
||||
|
||||
/// Drain connection synchronously when finishing.
|
||||
bool sync_draining = false;
|
||||
|
||||
std::function<std::shared_ptr<IConnections>()> create_connections;
|
||||
/// Hold a shared reference to the connection pool so that asynchronous connection draining will
|
||||
/// work safely. Make sure it's the first member so that we don't destruct it too early.
|
||||
const ConnectionPoolWithFailoverPtr pool;
|
||||
std::shared_ptr<IConnections> connections;
|
||||
|
||||
/// Streams for reading from temporary tables and following sending of data
|
||||
/// to remote servers for GLOBAL-subqueries
|
||||
std::vector<ExternalTablesData> external_tables_data;
|
||||
@ -151,6 +159,10 @@ private:
|
||||
std::atomic<bool> was_cancelled { false };
|
||||
std::mutex was_cancelled_mutex;
|
||||
|
||||
/** Thread-safe connection draining.
|
||||
*/
|
||||
std::mutex connection_draining_mutex;
|
||||
|
||||
/** An exception from replica was received. No need in receiving more packets or
|
||||
* requesting to cancel query execution
|
||||
*/
|
||||
|
@ -43,7 +43,7 @@ struct RemoteQueryExecutorRoutine
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
read_context.packet = connections.receivePacketUnlocked(ReadCallback{read_context, sink});
|
||||
read_context.packet = connections.receivePacketUnlocked(ReadCallback{read_context, sink}, false /* is_draining */);
|
||||
sink = std::move(sink).resume();
|
||||
}
|
||||
}
|
||||
@ -144,7 +144,7 @@ bool RemoteQueryExecutorReadContext::checkTimeoutImpl(bool blocking)
|
||||
|
||||
if (is_timer_alarmed && !is_socket_ready)
|
||||
{
|
||||
/// Socket receive timeout. Drain it in case or error, or it may be hide by timeout exception.
|
||||
/// Socket receive timeout. Drain it in case of error, or it may be hide by timeout exception.
|
||||
timer.drain();
|
||||
throw NetException("Timeout exceeded", ErrorCodes::SOCKET_TIMEOUT);
|
||||
}
|
||||
|
@ -14,7 +14,6 @@ NO_COMPILER_WARNINGS()
|
||||
|
||||
SRCS(
|
||||
AddingDefaultBlockOutputStream.cpp
|
||||
AddingDefaultsBlockInputStream.cpp
|
||||
AsynchronousBlockInputStream.cpp
|
||||
BlockIO.cpp
|
||||
BlockStreamProfileInfo.cpp
|
||||
@ -28,7 +27,6 @@ SRCS(
|
||||
ExpressionBlockInputStream.cpp
|
||||
IBlockInputStream.cpp
|
||||
ITTLAlgorithm.cpp
|
||||
InputStreamFromASTInsertQuery.cpp
|
||||
InternalTextLogsRowOutputStream.cpp
|
||||
LimitBlockInputStream.cpp
|
||||
MaterializingBlockInputStream.cpp
|
||||
|
44
src/Functions/initialQueryID.cpp
Normal file
44
src/Functions/initialQueryID.cpp
Normal file
@ -0,0 +1,44 @@
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class FunctionInitialQueryID : public IFunction
|
||||
{
|
||||
const String initial_query_id;
|
||||
|
||||
public:
|
||||
static constexpr auto name = "initialQueryID";
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
return std::make_shared<FunctionInitialQueryID>(context->getClientInfo().initial_query_id);
|
||||
}
|
||||
|
||||
explicit FunctionInitialQueryID(const String & initial_query_id_) : initial_query_id(initial_query_id_) {}
|
||||
|
||||
inline String getName() const override { return name; }
|
||||
|
||||
inline size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
|
||||
{
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
inline bool isDeterministic() const override { return false; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
return DataTypeString().createColumnConst(input_rows_count, initial_query_id);
|
||||
}
|
||||
};
|
||||
|
||||
void registerFunctionInitialQueryID(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionInitialQueryID>();
|
||||
factory.registerAlias("initial_query_id", FunctionInitialQueryID::name, FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
}
|
44
src/Functions/queryID.cpp
Normal file
44
src/Functions/queryID.cpp
Normal file
@ -0,0 +1,44 @@
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class FunctionQueryID : public IFunction
|
||||
{
|
||||
const String query_id;
|
||||
|
||||
public:
|
||||
static constexpr auto name = "queryID";
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
return std::make_shared<FunctionQueryID>(context->getClientInfo().current_query_id);
|
||||
}
|
||||
|
||||
explicit FunctionQueryID(const String & query_id_) : query_id(query_id_) {}
|
||||
|
||||
inline String getName() const override { return name; }
|
||||
|
||||
inline size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
|
||||
{
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
inline bool isDeterministic() const override { return false; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
return DataTypeString().createColumnConst(input_rows_count, query_id)->convertToFullColumnIfConst();
|
||||
}
|
||||
};
|
||||
|
||||
void registerFunctionQueryID(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionQueryID>();
|
||||
factory.registerAlias("query_id", FunctionQueryID::name, FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
}
|
@ -74,6 +74,8 @@ void registerFunctionFile(FunctionFactory & factory);
|
||||
void registerFunctionConnectionId(FunctionFactory & factory);
|
||||
void registerFunctionPartitionId(FunctionFactory & factory);
|
||||
void registerFunctionIsIPAddressContainedIn(FunctionFactory &);
|
||||
void registerFunctionQueryID(FunctionFactory & factory);
|
||||
void registerFunctionInitialQueryID(FunctionFactory & factory);
|
||||
|
||||
#if USE_ICU
|
||||
void registerFunctionConvertCharset(FunctionFactory &);
|
||||
@ -148,6 +150,8 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
|
||||
registerFunctionConnectionId(factory);
|
||||
registerFunctionPartitionId(factory);
|
||||
registerFunctionIsIPAddressContainedIn(factory);
|
||||
registerFunctionQueryID(factory);
|
||||
registerFunctionInitialQueryID(factory);
|
||||
|
||||
#if USE_ICU
|
||||
registerFunctionConvertCharset(factory);
|
||||
|
@ -12,48 +12,77 @@ namespace ErrorCodes
|
||||
extern const int INVALID_JOIN_ON_EXPRESSION;
|
||||
extern const int AMBIGUOUS_COLUMN_NAME;
|
||||
extern const int SYNTAX_ERROR;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast,
|
||||
const std::pair<size_t, size_t> & table_no)
|
||||
namespace
|
||||
{
|
||||
|
||||
bool isLeftIdentifier(JoinIdentifierPos pos)
|
||||
{
|
||||
/// Unknown identifiers considered as left, we will try to process it on later stages
|
||||
/// Usually such identifiers came from `ARRAY JOIN ... AS ...`
|
||||
return pos == JoinIdentifierPos::Left || pos == JoinIdentifierPos::Unknown;
|
||||
}
|
||||
|
||||
bool isRightIdentifier(JoinIdentifierPos pos)
|
||||
{
|
||||
return pos == JoinIdentifierPos::Right;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos)
|
||||
{
|
||||
ASTPtr left = left_ast->clone();
|
||||
ASTPtr right = right_ast->clone();
|
||||
|
||||
if (table_no.first == 1 || table_no.second == 2)
|
||||
if (isLeftIdentifier(table_pos.first) && isRightIdentifier(table_pos.second))
|
||||
analyzed_join.addOnKeys(left, right);
|
||||
else if (table_no.first == 2 || table_no.second == 1)
|
||||
else if (isRightIdentifier(table_pos.first) && isLeftIdentifier(table_pos.second))
|
||||
analyzed_join.addOnKeys(right, left);
|
||||
else
|
||||
throw Exception("Cannot detect left and right JOIN keys. JOIN ON section is ambiguous.",
|
||||
ErrorCodes::AMBIGUOUS_COLUMN_NAME);
|
||||
has_some = true;
|
||||
ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
|
||||
}
|
||||
|
||||
void CollectJoinOnKeysMatcher::Data::addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast,
|
||||
const std::pair<size_t, size_t> & table_no, const ASOF::Inequality & inequality)
|
||||
JoinIdentifierPosPair table_pos, const ASOF::Inequality & inequality)
|
||||
{
|
||||
if (table_no.first == 1 || table_no.second == 2)
|
||||
if (isLeftIdentifier(table_pos.first) && isRightIdentifier(table_pos.second))
|
||||
{
|
||||
asof_left_key = left_ast->clone();
|
||||
asof_right_key = right_ast->clone();
|
||||
analyzed_join.setAsofInequality(inequality);
|
||||
}
|
||||
else if (table_no.first == 2 || table_no.second == 1)
|
||||
else if (isRightIdentifier(table_pos.first) && isLeftIdentifier(table_pos.second))
|
||||
{
|
||||
asof_left_key = right_ast->clone();
|
||||
asof_right_key = left_ast->clone();
|
||||
analyzed_join.setAsofInequality(ASOF::reverseInequality(inequality));
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
|
||||
"Expressions {} and {} are from the same table but from different arguments of equal function in ASOF JOIN",
|
||||
queryToString(left_ast), queryToString(right_ast));
|
||||
}
|
||||
}
|
||||
|
||||
void CollectJoinOnKeysMatcher::Data::asofToJoinKeys()
|
||||
{
|
||||
if (!asof_left_key || !asof_right_key)
|
||||
throw Exception("No inequality in ASOF JOIN ON section.", ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
|
||||
addJoinKeys(asof_left_key, asof_right_key, {1, 2});
|
||||
addJoinKeys(asof_left_key, asof_right_key, {JoinIdentifierPos::Left, JoinIdentifierPos::Right});
|
||||
}
|
||||
|
||||
void CollectJoinOnKeysMatcher::visit(const ASTIdentifier & ident, const ASTPtr & ast, CollectJoinOnKeysMatcher::Data & data)
|
||||
{
|
||||
if (auto expr_from_table = getTableForIdentifiers(ast, false, data); expr_from_table != JoinIdentifierPos::Unknown)
|
||||
data.analyzed_join.addJoinCondition(ast, isLeftIdentifier(expr_from_table));
|
||||
else
|
||||
throw Exception("Unexpected identifier '" + ident.name() + "' in JOIN ON section",
|
||||
ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
|
||||
}
|
||||
|
||||
void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & ast, Data & data)
|
||||
@ -61,9 +90,6 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
|
||||
if (func.name == "and")
|
||||
return; /// go into children
|
||||
|
||||
if (func.name == "or")
|
||||
throw Exception("JOIN ON does not support OR. Unexpected '" + queryToString(ast) + "'", ErrorCodes::NOT_IMPLEMENTED);
|
||||
|
||||
ASOF::Inequality inequality = ASOF::getInequality(func.name);
|
||||
if (func.name == "equals" || inequality != ASOF::Inequality::None)
|
||||
{
|
||||
@ -71,32 +97,50 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
|
||||
throw Exception("Function " + func.name + " takes two arguments, got '" + func.formatForErrorMessage() + "' instead",
|
||||
ErrorCodes::SYNTAX_ERROR);
|
||||
}
|
||||
else
|
||||
throw Exception("Expected equality or inequality, got '" + queryToString(ast) + "'", ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
|
||||
|
||||
if (func.name == "equals")
|
||||
{
|
||||
ASTPtr left = func.arguments->children.at(0);
|
||||
ASTPtr right = func.arguments->children.at(1);
|
||||
auto table_numbers = getTableNumbers(ast, left, right, data);
|
||||
data.addJoinKeys(left, right, table_numbers);
|
||||
}
|
||||
else if (inequality != ASOF::Inequality::None)
|
||||
{
|
||||
if (!data.is_asof)
|
||||
throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'",
|
||||
ErrorCodes::NOT_IMPLEMENTED);
|
||||
auto table_numbers = getTableNumbers(left, right, data);
|
||||
if (table_numbers.first == table_numbers.second)
|
||||
{
|
||||
if (table_numbers.first == JoinIdentifierPos::Unknown)
|
||||
throw Exception("Ambiguous column in expression '" + queryToString(ast) + "' in JOIN ON section",
|
||||
ErrorCodes::AMBIGUOUS_COLUMN_NAME);
|
||||
data.analyzed_join.addJoinCondition(ast, isLeftIdentifier(table_numbers.first));
|
||||
return;
|
||||
}
|
||||
|
||||
if (table_numbers.first != JoinIdentifierPos::NotApplicable && table_numbers.second != JoinIdentifierPos::NotApplicable)
|
||||
{
|
||||
data.addJoinKeys(left, right, table_numbers);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (auto expr_from_table = getTableForIdentifiers(ast, false, data); expr_from_table != JoinIdentifierPos::Unknown)
|
||||
{
|
||||
data.analyzed_join.addJoinCondition(ast, isLeftIdentifier(expr_from_table));
|
||||
return;
|
||||
}
|
||||
|
||||
if (data.is_asof && inequality != ASOF::Inequality::None)
|
||||
{
|
||||
if (data.asof_left_key || data.asof_right_key)
|
||||
throw Exception("ASOF JOIN expects exactly one inequality in ON section. Unexpected '" + queryToString(ast) + "'",
|
||||
ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
|
||||
|
||||
ASTPtr left = func.arguments->children.at(0);
|
||||
ASTPtr right = func.arguments->children.at(1);
|
||||
auto table_numbers = getTableNumbers(ast, left, right, data);
|
||||
auto table_numbers = getTableNumbers(left, right, data);
|
||||
|
||||
data.addAsofJoinKeys(left, right, table_numbers, inequality);
|
||||
return;
|
||||
}
|
||||
|
||||
throw Exception("Unsupported JOIN ON conditions. Unexpected '" + queryToString(ast) + "'",
|
||||
ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
|
||||
}
|
||||
|
||||
void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vector<const ASTIdentifier *> & out)
|
||||
@ -118,32 +162,10 @@ void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vector<co
|
||||
getIdentifiers(child, out);
|
||||
}
|
||||
|
||||
std::pair<size_t, size_t> CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr & expr, const ASTPtr & left_ast, const ASTPtr & right_ast,
|
||||
Data & data)
|
||||
JoinIdentifierPosPair CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data)
|
||||
{
|
||||
std::vector<const ASTIdentifier *> left_identifiers;
|
||||
std::vector<const ASTIdentifier *> right_identifiers;
|
||||
|
||||
getIdentifiers(left_ast, left_identifiers);
|
||||
getIdentifiers(right_ast, right_identifiers);
|
||||
|
||||
if (left_identifiers.empty() || right_identifiers.empty())
|
||||
{
|
||||
throw Exception("Not equi-join ON expression: " + queryToString(expr) + ". No columns in one of equality side.",
|
||||
ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
|
||||
}
|
||||
|
||||
size_t left_idents_table = getTableForIdentifiers(left_identifiers, data);
|
||||
size_t right_idents_table = getTableForIdentifiers(right_identifiers, data);
|
||||
|
||||
if (left_idents_table && left_idents_table == right_idents_table)
|
||||
{
|
||||
auto left_name = queryToString(*left_identifiers[0]);
|
||||
auto right_name = queryToString(*right_identifiers[0]);
|
||||
|
||||
throw Exception("In expression " + queryToString(expr) + " columns " + left_name + " and " + right_name
|
||||
+ " are from the same table but from different arguments of equal function", ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
|
||||
}
|
||||
auto left_idents_table = getTableForIdentifiers(left_ast, true, data);
|
||||
auto right_idents_table = getTableForIdentifiers(right_ast, true, data);
|
||||
|
||||
return std::make_pair(left_idents_table, right_idents_table);
|
||||
}
|
||||
@ -173,11 +195,16 @@ const ASTIdentifier * CollectJoinOnKeysMatcher::unrollAliases(const ASTIdentifie
|
||||
return identifier;
|
||||
}
|
||||
|
||||
/// @returns 1 if identifiers belongs to left table, 2 for right table and 0 if unknown. Throws on table mix.
|
||||
/// @returns Left or right table identifiers belongs to.
|
||||
/// Place detected identifier into identifiers[0] if any.
|
||||
size_t CollectJoinOnKeysMatcher::getTableForIdentifiers(std::vector<const ASTIdentifier *> & identifiers, const Data & data)
|
||||
JoinIdentifierPos CollectJoinOnKeysMatcher::getTableForIdentifiers(const ASTPtr & ast, bool throw_on_table_mix, const Data & data)
|
||||
{
|
||||
size_t table_number = 0;
|
||||
std::vector<const ASTIdentifier *> identifiers;
|
||||
getIdentifiers(ast, identifiers);
|
||||
if (identifiers.empty())
|
||||
return JoinIdentifierPos::NotApplicable;
|
||||
|
||||
JoinIdentifierPos table_number = JoinIdentifierPos::Unknown;
|
||||
|
||||
for (auto & ident : identifiers)
|
||||
{
|
||||
@ -187,10 +214,20 @@ size_t CollectJoinOnKeysMatcher::getTableForIdentifiers(std::vector<const ASTIde
|
||||
|
||||
/// Column name could be cropped to a short form in TranslateQualifiedNamesVisitor.
|
||||
/// In this case it saves membership in IdentifierSemantic.
|
||||
auto opt = IdentifierSemantic::getMembership(*identifier);
|
||||
size_t membership = opt ? (*opt + 1) : 0;
|
||||
JoinIdentifierPos membership = JoinIdentifierPos::Unknown;
|
||||
if (auto opt = IdentifierSemantic::getMembership(*identifier); opt.has_value())
|
||||
{
|
||||
if (*opt == 0)
|
||||
membership = JoinIdentifierPos::Left;
|
||||
else if (*opt == 1)
|
||||
membership = JoinIdentifierPos::Right;
|
||||
else
|
||||
throw DB::Exception(ErrorCodes::AMBIGUOUS_COLUMN_NAME,
|
||||
"Position of identifier {} can't be deteminated.",
|
||||
identifier->name());
|
||||
}
|
||||
|
||||
if (!membership)
|
||||
if (membership == JoinIdentifierPos::Unknown)
|
||||
{
|
||||
const String & name = identifier->name();
|
||||
bool in_left_table = data.left_table.hasColumn(name);
|
||||
@ -211,22 +248,24 @@ size_t CollectJoinOnKeysMatcher::getTableForIdentifiers(std::vector<const ASTIde
|
||||
}
|
||||
|
||||
if (in_left_table)
|
||||
membership = 1;
|
||||
membership = JoinIdentifierPos::Left;
|
||||
if (in_right_table)
|
||||
membership = 2;
|
||||
membership = JoinIdentifierPos::Right;
|
||||
}
|
||||
|
||||
if (membership && table_number == 0)
|
||||
if (membership != JoinIdentifierPos::Unknown && table_number == JoinIdentifierPos::Unknown)
|
||||
{
|
||||
table_number = membership;
|
||||
std::swap(ident, identifiers[0]); /// move first detected identifier to the first position
|
||||
}
|
||||
|
||||
if (membership && membership != table_number)
|
||||
if (membership != JoinIdentifierPos::Unknown && membership != table_number)
|
||||
{
|
||||
throw Exception("Invalid columns in JOIN ON section. Columns "
|
||||
+ identifiers[0]->getAliasOrColumnName() + " and " + ident->getAliasOrColumnName()
|
||||
+ " are from different tables.", ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
|
||||
if (throw_on_table_mix)
|
||||
throw Exception("Invalid columns in JOIN ON section. Columns "
|
||||
+ identifiers[0]->getAliasOrColumnName() + " and " + ident->getAliasOrColumnName()
|
||||
+ " are from different tables.", ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
|
||||
return JoinIdentifierPos::Unknown;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -18,6 +18,21 @@ namespace ASOF
|
||||
enum class Inequality;
|
||||
}
|
||||
|
||||
enum class JoinIdentifierPos
|
||||
{
|
||||
/// Position can't be established, identifier not resolved
|
||||
Unknown,
|
||||
/// Left side of JOIN
|
||||
Left,
|
||||
/// Right side of JOIN
|
||||
Right,
|
||||
/// Expression not valid, e.g. doesn't contain identifiers
|
||||
NotApplicable,
|
||||
};
|
||||
|
||||
using JoinIdentifierPosPair = std::pair<JoinIdentifierPos, JoinIdentifierPos>;
|
||||
|
||||
|
||||
class CollectJoinOnKeysMatcher
|
||||
{
|
||||
public:
|
||||
@ -32,10 +47,9 @@ public:
|
||||
const bool is_asof{false};
|
||||
ASTPtr asof_left_key{};
|
||||
ASTPtr asof_right_key{};
|
||||
bool has_some{false};
|
||||
|
||||
void addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, const std::pair<size_t, size_t> & table_no);
|
||||
void addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, const std::pair<size_t, size_t> & table_no,
|
||||
void addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos);
|
||||
void addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos,
|
||||
const ASOF::Inequality & asof_inequality);
|
||||
void asofToJoinKeys();
|
||||
};
|
||||
@ -43,7 +57,17 @@ public:
|
||||
static void visit(const ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (auto * func = ast->as<ASTFunction>())
|
||||
{
|
||||
visit(*func, ast, data);
|
||||
}
|
||||
else if (auto * ident = ast->as<ASTIdentifier>())
|
||||
{
|
||||
visit(*ident, ast, data);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// visit children
|
||||
}
|
||||
}
|
||||
|
||||
static bool needChildVisit(const ASTPtr & node, const ASTPtr &)
|
||||
@ -55,11 +79,12 @@ public:
|
||||
|
||||
private:
|
||||
static void visit(const ASTFunction & func, const ASTPtr & ast, Data & data);
|
||||
static void visit(const ASTIdentifier & ident, const ASTPtr & ast, Data & data);
|
||||
|
||||
static void getIdentifiers(const ASTPtr & ast, std::vector<const ASTIdentifier *> & out);
|
||||
static std::pair<size_t, size_t> getTableNumbers(const ASTPtr & expr, const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data);
|
||||
static JoinIdentifierPosPair getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data);
|
||||
static const ASTIdentifier * unrollAliases(const ASTIdentifier * identifier, const Aliases & aliases);
|
||||
static size_t getTableForIdentifiers(std::vector<const ASTIdentifier *> & identifiers, const Data & data);
|
||||
static JoinIdentifierPos getTableForIdentifiers(const ASTPtr & ast, bool throw_on_table_mix, const Data & data);
|
||||
};
|
||||
|
||||
/// Parse JOIN ON expression and collect ASTs for joined columns.
|
||||
|
@ -812,6 +812,9 @@ void ExpressionActionsChain::JoinStep::finalize(const NameSet & required_output_
|
||||
for (const auto & name : analyzed_join->keyNamesLeft())
|
||||
required_names.emplace(name);
|
||||
|
||||
if (ASTPtr extra_condition_column = analyzed_join->joinConditionColumn(JoinTableSide::Left))
|
||||
required_names.emplace(extra_condition_column->getColumnName());
|
||||
|
||||
for (const auto & column : required_columns)
|
||||
{
|
||||
if (required_names.count(column.name) != 0)
|
||||
|
@ -190,9 +190,12 @@ HashJoin::HashJoin(std::shared_ptr<TableJoin> table_join_, const Block & right_s
|
||||
{
|
||||
LOG_DEBUG(log, "Right sample block: {}", right_sample_block.dumpStructure());
|
||||
|
||||
table_join->splitAdditionalColumns(right_sample_block, right_table_keys, sample_block_with_columns_to_add);
|
||||
JoinCommon::splitAdditionalColumns(key_names_right, right_sample_block, right_table_keys, sample_block_with_columns_to_add);
|
||||
|
||||
required_right_keys = table_join->getRequiredRightKeys(right_table_keys, required_right_keys_sources);
|
||||
|
||||
std::tie(condition_mask_column_name_left, condition_mask_column_name_right) = table_join->joinConditionColumnNames();
|
||||
|
||||
JoinCommon::removeLowCardinalityInplace(right_table_keys);
|
||||
initRightBlockStructure(data->sample_block);
|
||||
|
||||
@ -500,7 +503,7 @@ namespace
|
||||
template <ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map, bool has_null_map>
|
||||
size_t NO_INLINE insertFromBlockImplTypeCase(
|
||||
HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns,
|
||||
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool)
|
||||
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool)
|
||||
{
|
||||
[[maybe_unused]] constexpr bool mapped_one = std::is_same_v<typename Map::mapped_type, RowRef>;
|
||||
constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof;
|
||||
@ -516,6 +519,10 @@ namespace
|
||||
if (has_null_map && (*null_map)[i])
|
||||
continue;
|
||||
|
||||
/// Check condition for right table from ON section
|
||||
if (join_mask && !(*join_mask)[i])
|
||||
continue;
|
||||
|
||||
if constexpr (is_asof_join)
|
||||
Inserter<Map, KeyGetter>::insertAsof(join, map, key_getter, stored_block, i, pool, *asof_column);
|
||||
else if constexpr (mapped_one)
|
||||
@ -530,19 +537,21 @@ namespace
|
||||
template <ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map>
|
||||
size_t insertFromBlockImplType(
|
||||
HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns,
|
||||
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool)
|
||||
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool)
|
||||
{
|
||||
if (null_map)
|
||||
return insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, true>(join, map, rows, key_columns, key_sizes, stored_block, null_map, pool);
|
||||
return insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, true>(
|
||||
join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool);
|
||||
else
|
||||
return insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, false>(join, map, rows, key_columns, key_sizes, stored_block, null_map, pool);
|
||||
return insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, false>(
|
||||
join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool);
|
||||
}
|
||||
|
||||
|
||||
template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
|
||||
size_t insertFromBlockImpl(
|
||||
HashJoin & join, HashJoin::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns,
|
||||
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool)
|
||||
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
@ -553,7 +562,7 @@ namespace
|
||||
#define M(TYPE) \
|
||||
case HashJoin::Type::TYPE: \
|
||||
return insertFromBlockImplType<STRICTNESS, typename KeyGetterForType<HashJoin::Type::TYPE, std::remove_reference_t<decltype(*maps.TYPE)>>::Type>(\
|
||||
join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, pool); \
|
||||
join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool); \
|
||||
break;
|
||||
APPLY_FOR_JOIN_VARIANTS(M)
|
||||
#undef M
|
||||
@ -624,10 +633,34 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits)
|
||||
UInt8 save_nullmap = 0;
|
||||
if (isRightOrFull(kind) && null_map)
|
||||
{
|
||||
/// Save rows with NULL keys
|
||||
for (size_t i = 0; !save_nullmap && i < null_map->size(); ++i)
|
||||
save_nullmap |= (*null_map)[i];
|
||||
}
|
||||
|
||||
auto join_mask_col = JoinCommon::getColumnAsMask(block, condition_mask_column_name_right);
|
||||
|
||||
/// Save blocks that do not hold conditions in ON section
|
||||
ColumnUInt8::MutablePtr not_joined_map = nullptr;
|
||||
if (isRightOrFull(kind) && join_mask_col)
|
||||
{
|
||||
const auto & join_mask = assert_cast<const ColumnUInt8 &>(*join_mask_col).getData();
|
||||
/// Save rows that do not hold conditions
|
||||
not_joined_map = ColumnUInt8::create(block.rows(), 0);
|
||||
for (size_t i = 0, sz = join_mask.size(); i < sz; ++i)
|
||||
{
|
||||
/// Condition hold, do not save row
|
||||
if (join_mask[i])
|
||||
continue;
|
||||
|
||||
/// NULL key will be saved anyway because, do not save twice
|
||||
if (save_nullmap && (*null_map)[i])
|
||||
continue;
|
||||
|
||||
not_joined_map->getData()[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
Block structured_block = structureRightBlock(block);
|
||||
size_t total_rows = 0;
|
||||
size_t total_bytes = 0;
|
||||
@ -647,7 +680,10 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits)
|
||||
{
|
||||
joinDispatch(kind, strictness, data->maps, [&](auto kind_, auto strictness_, auto & map)
|
||||
{
|
||||
size_t size = insertFromBlockImpl<strictness_>(*this, data->type, map, rows, key_columns, key_sizes, stored_block, null_map, data->pool);
|
||||
size_t size = insertFromBlockImpl<strictness_>(
|
||||
*this, data->type, map, rows, key_columns, key_sizes, stored_block, null_map,
|
||||
join_mask_col ? &assert_cast<const ColumnUInt8 &>(*join_mask_col).getData() : nullptr,
|
||||
data->pool);
|
||||
/// Number of buckets + 1 value from zero storage
|
||||
used_flags.reinit<kind_, strictness_>(size + 1);
|
||||
});
|
||||
@ -656,6 +692,9 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits)
|
||||
if (save_nullmap)
|
||||
data->blocks_nullmaps.emplace_back(stored_block, null_map_holder);
|
||||
|
||||
if (not_joined_map)
|
||||
data->blocks_nullmaps.emplace_back(stored_block, std::move(not_joined_map));
|
||||
|
||||
if (!check_limits)
|
||||
return true;
|
||||
|
||||
@ -693,6 +732,7 @@ public:
|
||||
const HashJoin & join,
|
||||
const ColumnRawPtrs & key_columns_,
|
||||
const Sizes & key_sizes_,
|
||||
const UInt8ColumnDataPtr & join_mask_column_,
|
||||
bool is_asof_join,
|
||||
bool is_join_get_)
|
||||
: key_columns(key_columns_)
|
||||
@ -700,6 +740,7 @@ public:
|
||||
, rows_to_add(block.rows())
|
||||
, asof_type(join.getAsofType())
|
||||
, asof_inequality(join.getAsofInequality())
|
||||
, join_mask_column(join_mask_column_)
|
||||
, is_join_get(is_join_get_)
|
||||
{
|
||||
size_t num_columns_to_add = block_with_columns_to_add.columns();
|
||||
@ -784,6 +825,8 @@ public:
|
||||
ASOF::Inequality asofInequality() const { return asof_inequality; }
|
||||
const IColumn & leftAsofKey() const { return *left_asof_key; }
|
||||
|
||||
bool isRowFiltered(size_t i) { return join_mask_column && !(*join_mask_column)[i]; }
|
||||
|
||||
const ColumnRawPtrs & key_columns;
|
||||
const Sizes & key_sizes;
|
||||
size_t rows_to_add;
|
||||
@ -799,6 +842,7 @@ private:
|
||||
std::optional<TypeIndex> asof_type;
|
||||
ASOF::Inequality asof_inequality;
|
||||
const IColumn * left_asof_key = nullptr;
|
||||
UInt8ColumnDataPtr join_mask_column;
|
||||
bool is_join_get;
|
||||
|
||||
void addColumn(const ColumnWithTypeAndName & src_column, const std::string & qualified_name)
|
||||
@ -891,7 +935,9 @@ NO_INLINE IColumn::Filter joinRightColumns(
|
||||
}
|
||||
}
|
||||
|
||||
auto find_result = key_getter.findKey(map, i, pool);
|
||||
bool row_acceptable = !added_columns.isRowFiltered(i);
|
||||
using FindResult = typename KeyGetter::FindResult;
|
||||
auto find_result = row_acceptable ? key_getter.findKey(map, i, pool) : FindResult();
|
||||
|
||||
if (find_result.isFound())
|
||||
{
|
||||
@ -1098,7 +1144,20 @@ void HashJoin::joinBlockImpl(
|
||||
* For ASOF, the last column is used as the ASOF column
|
||||
*/
|
||||
|
||||
AddedColumns added_columns(block_with_columns_to_add, block, savedBlockSample(), *this, left_key_columns, key_sizes, is_asof_join, is_join_get);
|
||||
/// Only rows where mask == true can be joined
|
||||
ColumnPtr join_mask_column = JoinCommon::getColumnAsMask(block, condition_mask_column_name_left);
|
||||
|
||||
AddedColumns added_columns(
|
||||
block_with_columns_to_add,
|
||||
block,
|
||||
savedBlockSample(),
|
||||
*this,
|
||||
left_key_columns,
|
||||
key_sizes,
|
||||
join_mask_column ? &assert_cast<const ColumnUInt8 &>(*join_mask_column).getData() : nullptr,
|
||||
is_asof_join,
|
||||
is_join_get);
|
||||
|
||||
bool has_required_right_keys = (required_right_keys.columns() != 0);
|
||||
added_columns.need_filter = need_filter || has_required_right_keys;
|
||||
|
||||
@ -1324,7 +1383,8 @@ ColumnWithTypeAndName HashJoin::joinGet(const Block & block, const Block & block
|
||||
void HashJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed)
|
||||
{
|
||||
const Names & key_names_left = table_join->keyNamesLeft();
|
||||
JoinCommon::checkTypesOfKeys(block, key_names_left, right_table_keys, key_names_right);
|
||||
JoinCommon::checkTypesOfKeys(block, key_names_left, condition_mask_column_name_left,
|
||||
right_sample_block, key_names_right, condition_mask_column_name_right);
|
||||
|
||||
if (overDictionary())
|
||||
{
|
||||
|
@ -377,6 +377,10 @@ private:
|
||||
/// Left table column names that are sources for required_right_keys columns
|
||||
std::vector<String> required_right_keys_sources;
|
||||
|
||||
/// Additional conditions for rows to join from JOIN ON section
|
||||
String condition_mask_column_name_left;
|
||||
String condition_mask_column_name_right;
|
||||
|
||||
Poco::Logger * log;
|
||||
|
||||
Block totals;
|
||||
|
@ -4,8 +4,7 @@
|
||||
#include <DataStreams/AddingDefaultBlockOutputStream.h>
|
||||
#include <DataStreams/CheckConstraintsBlockOutputStream.h>
|
||||
#include <DataStreams/CountingBlockOutputStream.h>
|
||||
#include <DataStreams/InputStreamFromASTInsertQuery.h>
|
||||
#include <DataStreams/NullAndDoCopyBlockInputStream.h>
|
||||
#include <Processors/Transforms/getSourceFromFromASTInsertQuery.h>
|
||||
#include <DataStreams/PushingToViewsBlockOutputStream.h>
|
||||
#include <DataStreams/SquashingBlockOutputStream.h>
|
||||
#include <DataStreams/copyData.h>
|
||||
@ -351,9 +350,13 @@ BlockIO InterpreterInsertQuery::execute()
|
||||
}
|
||||
else if (query.data && !query.has_tail) /// can execute without additional data
|
||||
{
|
||||
// res.out = std::move(out_streams.at(0));
|
||||
res.in = std::make_shared<InputStreamFromASTInsertQuery>(query_ptr, nullptr, query_sample_block, getContext(), nullptr);
|
||||
res.in = std::make_shared<NullAndDoCopyBlockInputStream>(res.in, out_streams.at(0));
|
||||
auto pipe = getSourceFromFromASTInsertQuery(query_ptr, nullptr, query_sample_block, getContext(), nullptr);
|
||||
res.pipeline.init(std::move(pipe));
|
||||
res.pipeline.resize(1);
|
||||
res.pipeline.setSinks([&](const Block &, Pipe::StreamType)
|
||||
{
|
||||
return std::make_shared<SinkToOutputStream>(out_streams.at(0));
|
||||
});
|
||||
}
|
||||
else
|
||||
res.out = std::move(out_streams.at(0));
|
||||
|
@ -663,7 +663,7 @@ void InterpreterSystemQuery::syncReplica(ASTSystemQuery &)
|
||||
{
|
||||
LOG_ERROR(log, "SYNC REPLICA {}: Timed out!", table_id.getNameForLogs());
|
||||
throw Exception(
|
||||
"SYNC REPLICA " + table_id.getNameForLogs() + ": command timed out! "
|
||||
"SYNC REPLICA " + table_id.getNameForLogs() + ": command timed out. "
|
||||
"See the 'receive_timeout' setting", ErrorCodes::TIMEOUT_EXCEEDED);
|
||||
}
|
||||
LOG_TRACE(log, "SYNC REPLICA {}: OK", table_id.getNameForLogs());
|
||||
|
@ -1,19 +1,21 @@
|
||||
#include <limits>
|
||||
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Core/SortCursor.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <DataStreams/BlocksListBlockInputStream.h>
|
||||
#include <DataStreams/TemporaryFileStream.h>
|
||||
#include <DataStreams/materializeBlock.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Interpreters/MergeJoin.h>
|
||||
#include <Interpreters/TableJoin.h>
|
||||
#include <Interpreters/sortBlock.h>
|
||||
#include <Interpreters/join_common.h>
|
||||
#include <DataStreams/materializeBlock.h>
|
||||
#include <DataStreams/TemporaryFileStream.h>
|
||||
#include <Processors/Sources/SourceFromInputStream.h>
|
||||
#include <Processors/QueryPipeline.h>
|
||||
#include <Processors/Transforms/MergeSortingTransform.h>
|
||||
#include <Interpreters/sortBlock.h>
|
||||
#include <Processors/Executors/PipelineExecutingBlockInputStream.h>
|
||||
#include <DataStreams/BlocksListBlockInputStream.h>
|
||||
#include <Processors/QueryPipeline.h>
|
||||
#include <Processors/Sources/SourceFromInputStream.h>
|
||||
#include <Processors/Transforms/MergeSortingTransform.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -23,12 +25,50 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int PARAMETER_OUT_OF_BOUND;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
String deriveTempName(const String & name)
|
||||
{
|
||||
return "--" + name;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert column with conditions for left or right table to join to joining key.
|
||||
* Input column type is UInt8 output is Nullable(UInt8).
|
||||
* 0 converted to NULL and such rows won't be joined,
|
||||
* 1 converted to 0 (any constant non-NULL value to join)
|
||||
*/
|
||||
ColumnWithTypeAndName condtitionColumnToJoinable(const Block & block, const String & src_column_name)
|
||||
{
|
||||
size_t res_size = block.rows();
|
||||
auto data_col = ColumnUInt8::create(res_size, 0);
|
||||
auto null_map = ColumnUInt8::create(res_size, 0);
|
||||
|
||||
if (!src_column_name.empty())
|
||||
{
|
||||
auto mask_col = JoinCommon::getColumnAsMask(block, src_column_name);
|
||||
assert(mask_col);
|
||||
const auto & mask_data = assert_cast<const ColumnUInt8 &>(*mask_col).getData();
|
||||
|
||||
for (size_t i = 0; i < res_size; ++i)
|
||||
null_map->getData()[i] = !mask_data[i];
|
||||
}
|
||||
|
||||
ColumnPtr res_col = ColumnNullable::create(std::move(data_col), std::move(null_map));
|
||||
DataTypePtr res_col_type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>());
|
||||
String res_name = deriveTempName(src_column_name);
|
||||
|
||||
if (block.has(res_name))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Conflicting column name '{}'", res_name);
|
||||
|
||||
return {res_col, res_col_type, res_name};
|
||||
}
|
||||
|
||||
template <bool has_left_nulls, bool has_right_nulls>
|
||||
int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, size_t lhs_pos, size_t rhs_pos)
|
||||
{
|
||||
@ -180,7 +220,7 @@ class MergeJoinCursor
|
||||
{
|
||||
public:
|
||||
MergeJoinCursor(const Block & block, const SortDescription & desc_)
|
||||
: impl(SortCursorImpl(block, desc_))
|
||||
: impl(block, desc_)
|
||||
{
|
||||
/// SortCursorImpl can work with permutation, but MergeJoinCursor can't.
|
||||
if (impl.permutation)
|
||||
@ -320,14 +360,17 @@ MutableColumns makeMutableColumns(const Block & block, size_t rows_to_reserve =
|
||||
void makeSortAndMerge(const Names & keys, SortDescription & sort, SortDescription & merge)
|
||||
{
|
||||
NameSet unique_keys;
|
||||
for (const auto & sd: merge)
|
||||
unique_keys.insert(sd.column_name);
|
||||
|
||||
for (const auto & key_name : keys)
|
||||
{
|
||||
merge.emplace_back(SortColumnDescription(key_name, 1, 1));
|
||||
merge.emplace_back(key_name);
|
||||
|
||||
if (!unique_keys.count(key_name))
|
||||
if (!unique_keys.contains(key_name))
|
||||
{
|
||||
unique_keys.insert(key_name);
|
||||
sort.emplace_back(SortColumnDescription(key_name, 1, 1));
|
||||
sort.emplace_back(key_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -464,15 +507,31 @@ MergeJoin::MergeJoin(std::shared_ptr<TableJoin> table_join_, const Block & right
|
||||
ErrorCodes::PARAMETER_OUT_OF_BOUND);
|
||||
}
|
||||
|
||||
for (const auto & right_key : table_join->keyNamesRight())
|
||||
std::tie(mask_column_name_left, mask_column_name_right) = table_join->joinConditionColumnNames();
|
||||
|
||||
/// Add auxiliary joining keys to join only rows where conditions from JOIN ON sections holds
|
||||
/// Input boolean column converted to nullable and only rows with non NULLS value will be joined
|
||||
if (!mask_column_name_left.empty() || !mask_column_name_right.empty())
|
||||
{
|
||||
JoinCommon::checkTypesOfMasks({}, "", right_sample_block, mask_column_name_right);
|
||||
|
||||
key_names_left.push_back(deriveTempName(mask_column_name_left));
|
||||
key_names_right.push_back(deriveTempName(mask_column_name_right));
|
||||
}
|
||||
|
||||
key_names_left.insert(key_names_left.end(), table_join->keyNamesLeft().begin(), table_join->keyNamesLeft().end());
|
||||
key_names_right.insert(key_names_right.end(), table_join->keyNamesRight().begin(), table_join->keyNamesRight().end());
|
||||
|
||||
addConditionJoinColumn(right_sample_block, JoinTableSide::Right);
|
||||
JoinCommon::splitAdditionalColumns(key_names_right, right_sample_block, right_table_keys, right_columns_to_add);
|
||||
|
||||
for (const auto & right_key : key_names_right)
|
||||
{
|
||||
if (right_sample_block.getByName(right_key).type->lowCardinality())
|
||||
lowcard_right_keys.push_back(right_key);
|
||||
}
|
||||
|
||||
table_join->splitAdditionalColumns(right_sample_block, right_table_keys, right_columns_to_add);
|
||||
JoinCommon::removeLowCardinalityInplace(right_table_keys);
|
||||
JoinCommon::removeLowCardinalityInplace(right_sample_block, table_join->keyNamesRight());
|
||||
JoinCommon::removeLowCardinalityInplace(right_sample_block, key_names_right);
|
||||
|
||||
const NameSet required_right_keys = table_join->requiredRightKeys();
|
||||
for (const auto & column : right_table_keys)
|
||||
@ -484,8 +543,8 @@ MergeJoin::MergeJoin(std::shared_ptr<TableJoin> table_join_, const Block & right
|
||||
if (nullable_right_side)
|
||||
JoinCommon::convertColumnsToNullable(right_columns_to_add);
|
||||
|
||||
makeSortAndMerge(table_join->keyNamesLeft(), left_sort_description, left_merge_description);
|
||||
makeSortAndMerge(table_join->keyNamesRight(), right_sort_description, right_merge_description);
|
||||
makeSortAndMerge(key_names_left, left_sort_description, left_merge_description);
|
||||
makeSortAndMerge(key_names_right, right_sort_description, right_merge_description);
|
||||
|
||||
/// Temporary disable 'partial_merge_join_left_table_buffer_bytes' without 'partial_merge_join_optimizations'
|
||||
if (table_join->enablePartialMergeJoinOptimizations())
|
||||
@ -526,7 +585,8 @@ void MergeJoin::mergeInMemoryRightBlocks()
|
||||
pipeline.init(std::move(source));
|
||||
|
||||
/// TODO: there should be no split keys by blocks for RIGHT|FULL JOIN
|
||||
pipeline.addTransform(std::make_shared<MergeSortingTransform>(pipeline.getHeader(), right_sort_description, max_rows_in_right_block, 0, 0, 0, 0, nullptr, 0));
|
||||
pipeline.addTransform(std::make_shared<MergeSortingTransform>(
|
||||
pipeline.getHeader(), right_sort_description, max_rows_in_right_block, 0, 0, 0, 0, nullptr, 0));
|
||||
|
||||
auto sorted_input = PipelineExecutingBlockInputStream(std::move(pipeline));
|
||||
|
||||
@ -602,6 +662,7 @@ bool MergeJoin::addJoinedBlock(const Block & src_block, bool)
|
||||
{
|
||||
Block block = modifyRightBlock(src_block);
|
||||
|
||||
addConditionJoinColumn(block, JoinTableSide::Right);
|
||||
sortBlock(block, right_sort_description);
|
||||
return saveRightBlock(std::move(block));
|
||||
}
|
||||
@ -611,16 +672,22 @@ void MergeJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed)
|
||||
Names lowcard_keys = lowcard_right_keys;
|
||||
if (block)
|
||||
{
|
||||
JoinCommon::checkTypesOfKeys(block, table_join->keyNamesLeft(), right_table_keys, table_join->keyNamesRight());
|
||||
JoinCommon::checkTypesOfMasks(block, mask_column_name_left, right_sample_block, mask_column_name_right);
|
||||
|
||||
/// Add auxiliary column, will be removed after joining
|
||||
addConditionJoinColumn(block, JoinTableSide::Left);
|
||||
|
||||
JoinCommon::checkTypesOfKeys(block, key_names_left, right_table_keys, key_names_right);
|
||||
|
||||
materializeBlockInplace(block);
|
||||
|
||||
for (const auto & column_name : table_join->keyNamesLeft())
|
||||
for (const auto & column_name : key_names_left)
|
||||
{
|
||||
if (block.getByName(column_name).type->lowCardinality())
|
||||
lowcard_keys.push_back(column_name);
|
||||
}
|
||||
|
||||
JoinCommon::removeLowCardinalityInplace(block, table_join->keyNamesLeft(), false);
|
||||
JoinCommon::removeLowCardinalityInplace(block, key_names_left, false);
|
||||
|
||||
sortBlock(block, left_sort_description);
|
||||
|
||||
@ -655,6 +722,9 @@ void MergeJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed)
|
||||
if (!not_processed && left_blocks_buffer)
|
||||
not_processed = std::make_shared<NotProcessed>(NotProcessed{{}, 0, 0, 0});
|
||||
|
||||
if (needConditionJoinColumn())
|
||||
block.erase(deriveTempName(mask_column_name_left));
|
||||
|
||||
for (const auto & column_name : lowcard_keys)
|
||||
{
|
||||
if (!block.has(column_name))
|
||||
@ -697,7 +767,7 @@ void MergeJoin::joinSortedBlock(Block & block, ExtraBlockPtr & not_processed)
|
||||
|
||||
if (skip_not_intersected)
|
||||
{
|
||||
int intersection = left_cursor.intersect(min_max_right_blocks[i], table_join->keyNamesRight());
|
||||
int intersection = left_cursor.intersect(min_max_right_blocks[i], key_names_right);
|
||||
if (intersection < 0)
|
||||
break; /// (left) ... (right)
|
||||
if (intersection > 0)
|
||||
@ -730,7 +800,7 @@ void MergeJoin::joinSortedBlock(Block & block, ExtraBlockPtr & not_processed)
|
||||
|
||||
if (skip_not_intersected)
|
||||
{
|
||||
int intersection = left_cursor.intersect(min_max_right_blocks[i], table_join->keyNamesRight());
|
||||
int intersection = left_cursor.intersect(min_max_right_blocks[i], key_names_right);
|
||||
if (intersection < 0)
|
||||
break; /// (left) ... (right)
|
||||
if (intersection > 0)
|
||||
@ -831,7 +901,7 @@ bool MergeJoin::leftJoin(MergeJoinCursor & left_cursor, const Block & left_block
|
||||
}
|
||||
|
||||
bool MergeJoin::allInnerJoin(MergeJoinCursor & left_cursor, const Block & left_block, RightBlockInfo & right_block_info,
|
||||
MutableColumns & left_columns, MutableColumns & right_columns, size_t & left_key_tail)
|
||||
MutableColumns & left_columns, MutableColumns & right_columns, size_t & left_key_tail)
|
||||
{
|
||||
const Block & right_block = *right_block_info.block;
|
||||
MergeJoinCursor right_cursor(right_block, right_merge_description);
|
||||
@ -970,11 +1040,15 @@ void MergeJoin::initRightTableWriter()
|
||||
class NonMergeJoinedBlockInputStream : private NotJoined, public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
NonMergeJoinedBlockInputStream(const MergeJoin & parent_, const Block & result_sample_block_, UInt64 max_block_size_)
|
||||
NonMergeJoinedBlockInputStream(const MergeJoin & parent_,
|
||||
const Block & result_sample_block_,
|
||||
const Names & key_names_right_,
|
||||
UInt64 max_block_size_)
|
||||
: NotJoined(*parent_.table_join,
|
||||
parent_.modifyRightBlock(parent_.right_sample_block),
|
||||
parent_.right_sample_block,
|
||||
result_sample_block_)
|
||||
result_sample_block_,
|
||||
{}, key_names_right_)
|
||||
, parent(parent_)
|
||||
, max_block_size(max_block_size_)
|
||||
{}
|
||||
@ -1062,10 +1136,26 @@ private:
|
||||
BlockInputStreamPtr MergeJoin::createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const
|
||||
{
|
||||
if (table_join->strictness() == ASTTableJoin::Strictness::All && (is_right || is_full))
|
||||
return std::make_shared<NonMergeJoinedBlockInputStream>(*this, result_sample_block, max_block_size);
|
||||
return std::make_shared<NonMergeJoinedBlockInputStream>(*this, result_sample_block, key_names_right, max_block_size);
|
||||
return {};
|
||||
}
|
||||
|
||||
bool MergeJoin::needConditionJoinColumn() const
|
||||
{
|
||||
return !mask_column_name_left.empty() || !mask_column_name_right.empty();
|
||||
}
|
||||
|
||||
void MergeJoin::addConditionJoinColumn(Block & block, JoinTableSide block_side) const
|
||||
{
|
||||
if (needConditionJoinColumn())
|
||||
{
|
||||
if (block_side == JoinTableSide::Left)
|
||||
block.insert(condtitionColumnToJoinable(block, mask_column_name_left));
|
||||
else
|
||||
block.insert(condtitionColumnToJoinable(block, mask_column_name_right));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
MergeJoin::RightBlockInfo::RightBlockInfo(std::shared_ptr<Block> block_, size_t block_number_, size_t & skip_, RowBitmaps * bitmaps_)
|
||||
: block(block_)
|
||||
|
@ -16,7 +16,7 @@ class TableJoin;
|
||||
class MergeJoinCursor;
|
||||
struct MergeJoinEqualRange;
|
||||
class RowBitmaps;
|
||||
|
||||
enum class JoinTableSide;
|
||||
|
||||
class MergeJoin : public IJoin
|
||||
{
|
||||
@ -79,6 +79,14 @@ private:
|
||||
Block right_columns_to_add;
|
||||
SortedBlocksWriter::Blocks right_blocks;
|
||||
|
||||
Names key_names_right;
|
||||
Names key_names_left;
|
||||
|
||||
/// Additional conditions for rows to join from JOIN ON section.
|
||||
/// Only rows where conditions are met can be joined.
|
||||
String mask_column_name_left;
|
||||
String mask_column_name_right;
|
||||
|
||||
/// Each block stores first and last row from corresponding sorted block on disk
|
||||
Blocks min_max_right_blocks;
|
||||
std::shared_ptr<SortedBlocksBuffer> left_blocks_buffer;
|
||||
@ -151,6 +159,9 @@ private:
|
||||
void mergeFlushedRightBlocks();
|
||||
|
||||
void initRightTableWriter();
|
||||
|
||||
bool needConditionJoinColumn() const;
|
||||
void addConditionJoinColumn(Block & block, JoinTableSide block_side) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,17 +1,17 @@
|
||||
#include <Interpreters/TableJoin.h>
|
||||
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
|
||||
#include <Core/Settings.h>
|
||||
#include <Core/Block.h>
|
||||
#include <Core/ColumnsWithTypeAndName.h>
|
||||
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
|
||||
#include <Core/Block.h>
|
||||
#include <Core/ColumnsWithTypeAndName.h>
|
||||
#include <Core/Settings.h>
|
||||
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataStreams/materializeBlock.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -132,6 +132,8 @@ ASTPtr TableJoin::leftKeysList() const
|
||||
{
|
||||
ASTPtr keys_list = std::make_shared<ASTExpressionList>();
|
||||
keys_list->children = key_asts_left;
|
||||
if (ASTPtr extra_cond = joinConditionColumn(JoinTableSide::Left))
|
||||
keys_list->children.push_back(extra_cond);
|
||||
return keys_list;
|
||||
}
|
||||
|
||||
@ -140,6 +142,8 @@ ASTPtr TableJoin::rightKeysList() const
|
||||
ASTPtr keys_list = std::make_shared<ASTExpressionList>();
|
||||
if (hasOn())
|
||||
keys_list->children = key_asts_right;
|
||||
if (ASTPtr extra_cond = joinConditionColumn(JoinTableSide::Right))
|
||||
keys_list->children.push_back(extra_cond);
|
||||
return keys_list;
|
||||
}
|
||||
|
||||
@ -176,22 +180,6 @@ NamesWithAliases TableJoin::getRequiredColumns(const Block & sample, const Names
|
||||
return getNamesWithAliases(required_columns);
|
||||
}
|
||||
|
||||
void TableJoin::splitAdditionalColumns(const Block & sample_block, Block & block_keys, Block & block_others) const
|
||||
{
|
||||
block_others = materializeBlock(sample_block);
|
||||
|
||||
for (const String & column_name : key_names_right)
|
||||
{
|
||||
/// Extract right keys with correct keys order. There could be the same key names.
|
||||
if (!block_keys.has(column_name))
|
||||
{
|
||||
auto & col = block_others.getByName(column_name);
|
||||
block_keys.insert(col);
|
||||
block_others.erase(column_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Block TableJoin::getRequiredRightKeys(const Block & right_table_keys, std::vector<String> & keys_sources) const
|
||||
{
|
||||
const Names & left_keys = keyNamesLeft();
|
||||
@ -474,4 +462,48 @@ String TableJoin::renamedRightColumnName(const String & name) const
|
||||
return name;
|
||||
}
|
||||
|
||||
void TableJoin::addJoinCondition(const ASTPtr & ast, bool is_left)
|
||||
{
|
||||
LOG_TRACE(&Poco::Logger::get("TableJoin"), "Add join condition for {} table: {}", (is_left ? "left" : "right"), queryToString(ast));
|
||||
|
||||
if (is_left)
|
||||
on_filter_condition_asts_left.push_back(ast);
|
||||
else
|
||||
on_filter_condition_asts_right.push_back(ast);
|
||||
}
|
||||
|
||||
/// Returns all conditions related to one table joined with 'and' function
|
||||
static ASTPtr buildJoinConditionColumn(const ASTs & on_filter_condition_asts)
|
||||
{
|
||||
if (on_filter_condition_asts.empty())
|
||||
return nullptr;
|
||||
|
||||
if (on_filter_condition_asts.size() == 1)
|
||||
return on_filter_condition_asts[0];
|
||||
|
||||
auto function = std::make_shared<ASTFunction>();
|
||||
function->name = "and";
|
||||
function->arguments = std::make_shared<ASTExpressionList>();
|
||||
function->children.push_back(function->arguments);
|
||||
function->arguments->children = on_filter_condition_asts;
|
||||
return function;
|
||||
}
|
||||
|
||||
ASTPtr TableJoin::joinConditionColumn(JoinTableSide side) const
|
||||
{
|
||||
if (side == JoinTableSide::Left)
|
||||
return buildJoinConditionColumn(on_filter_condition_asts_left);
|
||||
return buildJoinConditionColumn(on_filter_condition_asts_right);
|
||||
}
|
||||
|
||||
std::pair<String, String> TableJoin::joinConditionColumnNames() const
|
||||
{
|
||||
std::pair<String, String> res;
|
||||
if (auto cond_ast = joinConditionColumn(JoinTableSide::Left))
|
||||
res.first = cond_ast->getColumnName();
|
||||
if (auto cond_ast = joinConditionColumn(JoinTableSide::Right))
|
||||
res.second = cond_ast->getColumnName();
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -33,6 +33,12 @@ struct Settings;
|
||||
class IVolume;
|
||||
using VolumePtr = std::shared_ptr<IVolume>;
|
||||
|
||||
enum class JoinTableSide
|
||||
{
|
||||
Left,
|
||||
Right
|
||||
};
|
||||
|
||||
class TableJoin
|
||||
{
|
||||
|
||||
@ -67,9 +73,12 @@ private:
|
||||
|
||||
Names key_names_left;
|
||||
Names key_names_right; /// Duplicating names are qualified.
|
||||
ASTs on_filter_condition_asts_left;
|
||||
ASTs on_filter_condition_asts_right;
|
||||
|
||||
ASTs key_asts_left;
|
||||
ASTs key_asts_right;
|
||||
|
||||
ASTTableJoin table_join;
|
||||
|
||||
ASOF::Inequality asof_inequality = ASOF::Inequality::GreaterOrEquals;
|
||||
@ -150,6 +159,23 @@ public:
|
||||
void addUsingKey(const ASTPtr & ast);
|
||||
void addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast);
|
||||
|
||||
/* Conditions for left/right table from JOIN ON section.
|
||||
*
|
||||
* Conditions for left and right tables stored separately and united with 'and' function into one column.
|
||||
* For example for query:
|
||||
* SELECT ... JOIN ... ON t1.id == t2.id AND expr11(t1) AND expr21(t2) AND expr12(t1) AND expr22(t2)
|
||||
*
|
||||
* We will build two new ASTs: `expr11(t1) AND expr12(t1)`, `expr21(t2) AND expr22(t2)`
|
||||
* Such columns will be added and calculated for left and right tables respectively.
|
||||
* Only rows where conditions are met (where new columns have non-zero value) will be joined.
|
||||
*
|
||||
* NOTE: non-equi condition containing columns from different tables (like `... ON t1.id = t2.id AND t1.val > t2.val)
|
||||
* doesn't supported yet, it can be added later.
|
||||
*/
|
||||
void addJoinCondition(const ASTPtr & ast, bool is_left);
|
||||
ASTPtr joinConditionColumn(JoinTableSide side) const;
|
||||
std::pair<String, String> joinConditionColumnNames() const;
|
||||
|
||||
bool hasUsing() const { return table_join.using_expression_list != nullptr; }
|
||||
bool hasOn() const { return table_join.on_expression != nullptr; }
|
||||
|
||||
@ -201,8 +227,6 @@ public:
|
||||
/// StorageJoin overrides key names (cause of different names qualification)
|
||||
void setRightKeys(const Names & keys) { key_names_right = keys; }
|
||||
|
||||
/// Split key and other columns by keys name list
|
||||
void splitAdditionalColumns(const Block & sample_block, Block & block_keys, Block & block_others) const;
|
||||
Block getRequiredRightKeys(const Block & right_table_keys, std::vector<String> & keys_sources) const;
|
||||
|
||||
String renamedRightColumnName(const String & name) const;
|
||||
|
@ -532,9 +532,12 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTTableJoin & table_
|
||||
|
||||
CollectJoinOnKeysVisitor::Data data{analyzed_join, tables[0], tables[1], aliases, is_asof};
|
||||
CollectJoinOnKeysVisitor(data).visit(table_join.on_expression);
|
||||
if (!data.has_some)
|
||||
if (analyzed_join.keyNamesLeft().empty())
|
||||
{
|
||||
throw Exception("Cannot get JOIN keys from JOIN ON section: " + queryToString(table_join.on_expression),
|
||||
ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
|
||||
}
|
||||
|
||||
if (is_asof)
|
||||
data.asofToJoinKeys();
|
||||
}
|
||||
|
@ -14,11 +14,10 @@
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataStreams/NullBlockOutputStream.h>
|
||||
#include <DataStreams/NullAndDoCopyBlockInputStream.h>
|
||||
#include <DataStreams/copyData.h>
|
||||
#include <Processors/NullSink.h>
|
||||
#include <filesystem>
|
||||
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
@ -168,48 +167,72 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context,
|
||||
return getDistributedDDLStatus(node_path, entry, context);
|
||||
}
|
||||
|
||||
|
||||
class DDLQueryStatusSource final : public SourceWithProgress
|
||||
{
|
||||
public:
|
||||
DDLQueryStatusSource(
|
||||
const String & zk_node_path, const DDLLogEntry & entry, ContextPtr context_, const std::optional<Strings> & hosts_to_wait = {});
|
||||
|
||||
String getName() const override { return "DDLQueryStatus"; }
|
||||
Chunk generate() override;
|
||||
Status prepare() override;
|
||||
|
||||
private:
|
||||
static Strings getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path);
|
||||
|
||||
Strings getNewAndUpdate(const Strings & current_list_of_finished_hosts);
|
||||
|
||||
std::pair<String, UInt16> parseHostAndPort(const String & host_id) const;
|
||||
|
||||
String node_path;
|
||||
ContextPtr context;
|
||||
Stopwatch watch;
|
||||
Poco::Logger * log;
|
||||
|
||||
NameSet waiting_hosts; /// hosts from task host list
|
||||
NameSet finished_hosts; /// finished hosts from host list
|
||||
NameSet ignoring_hosts; /// appeared hosts that are not in hosts list
|
||||
Strings current_active_hosts; /// Hosts that were in active state at the last check
|
||||
size_t num_hosts_finished = 0;
|
||||
|
||||
/// Save the first detected error and throw it at the end of execution
|
||||
std::unique_ptr<Exception> first_exception;
|
||||
|
||||
Int64 timeout_seconds = 120;
|
||||
bool by_hostname = true;
|
||||
bool throw_on_timeout = true;
|
||||
bool timeout_exceeded = false;
|
||||
};
|
||||
|
||||
|
||||
BlockIO getDistributedDDLStatus(const String & node_path, const DDLLogEntry & entry, ContextPtr context, const std::optional<Strings> & hosts_to_wait)
|
||||
{
|
||||
BlockIO io;
|
||||
if (context->getSettingsRef().distributed_ddl_task_timeout == 0)
|
||||
return io;
|
||||
|
||||
BlockInputStreamPtr stream = std::make_shared<DDLQueryStatusInputStream>(node_path, entry, context, hosts_to_wait);
|
||||
if (context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NONE)
|
||||
{
|
||||
/// Wait for query to finish, but ignore output
|
||||
auto null_output = std::make_shared<NullBlockOutputStream>(stream->getHeader());
|
||||
stream = std::make_shared<NullAndDoCopyBlockInputStream>(std::move(stream), std::move(null_output));
|
||||
}
|
||||
ProcessorPtr processor = std::make_shared<DDLQueryStatusSource>(node_path, entry, context, hosts_to_wait);
|
||||
io.pipeline.init(Pipe{processor});
|
||||
|
||||
if (context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NONE)
|
||||
io.pipeline.setSinks([](const Block & header, QueryPipeline::StreamType){ return std::make_shared<EmptySink>(header); });
|
||||
|
||||
io.in = std::move(stream);
|
||||
return io;
|
||||
}
|
||||
|
||||
DDLQueryStatusInputStream::DDLQueryStatusInputStream(const String & zk_node_path, const DDLLogEntry & entry, ContextPtr context_,
|
||||
const std::optional<Strings> & hosts_to_wait)
|
||||
: node_path(zk_node_path)
|
||||
, context(context_)
|
||||
, watch(CLOCK_MONOTONIC_COARSE)
|
||||
, log(&Poco::Logger::get("DDLQueryStatusInputStream"))
|
||||
static Block getSampleBlock(ContextPtr context_, bool hosts_to_wait)
|
||||
{
|
||||
if (context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::THROW ||
|
||||
context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NONE)
|
||||
throw_on_timeout = true;
|
||||
else if (context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT ||
|
||||
context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NEVER_THROW)
|
||||
throw_on_timeout = false;
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown output mode");
|
||||
auto output_mode = context_->getSettingsRef().distributed_ddl_output_mode;
|
||||
|
||||
auto maybe_make_nullable = [&](const DataTypePtr & type) -> DataTypePtr
|
||||
{
|
||||
if (throw_on_timeout)
|
||||
if (output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::NONE)
|
||||
return type;
|
||||
return std::make_shared<DataTypeNullable>(type);
|
||||
};
|
||||
|
||||
sample = Block{
|
||||
Block res = Block{
|
||||
{std::make_shared<DataTypeString>(), "host"},
|
||||
{std::make_shared<DataTypeUInt16>(), "port"},
|
||||
{maybe_make_nullable(std::make_shared<DataTypeInt64>()), "status"},
|
||||
@ -218,11 +241,27 @@ DDLQueryStatusInputStream::DDLQueryStatusInputStream(const String & zk_node_path
|
||||
{std::make_shared<DataTypeUInt64>(), "num_hosts_active"},
|
||||
};
|
||||
|
||||
if (hosts_to_wait)
|
||||
res.erase("port");
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
DDLQueryStatusSource::DDLQueryStatusSource(
|
||||
const String & zk_node_path, const DDLLogEntry & entry, ContextPtr context_, const std::optional<Strings> & hosts_to_wait)
|
||||
: SourceWithProgress(getSampleBlock(context_, hosts_to_wait.has_value()), true)
|
||||
, node_path(zk_node_path)
|
||||
, context(context_)
|
||||
, watch(CLOCK_MONOTONIC_COARSE)
|
||||
, log(&Poco::Logger::get("DDLQueryStatusInputStream"))
|
||||
{
|
||||
auto output_mode = context->getSettingsRef().distributed_ddl_output_mode;
|
||||
throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::NONE;
|
||||
|
||||
if (hosts_to_wait)
|
||||
{
|
||||
waiting_hosts = NameSet(hosts_to_wait->begin(), hosts_to_wait->end());
|
||||
by_hostname = false;
|
||||
sample.erase("port");
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -231,11 +270,10 @@ DDLQueryStatusInputStream::DDLQueryStatusInputStream(const String & zk_node_path
|
||||
}
|
||||
|
||||
addTotalRowsApprox(waiting_hosts.size());
|
||||
|
||||
timeout_seconds = context->getSettingsRef().distributed_ddl_task_timeout;
|
||||
}
|
||||
|
||||
std::pair<String, UInt16> DDLQueryStatusInputStream::parseHostAndPort(const String & host_id) const
|
||||
std::pair<String, UInt16> DDLQueryStatusSource::parseHostAndPort(const String & host_id) const
|
||||
{
|
||||
String host = host_id;
|
||||
UInt16 port = 0;
|
||||
@ -248,37 +286,28 @@ std::pair<String, UInt16> DDLQueryStatusInputStream::parseHostAndPort(const Stri
|
||||
return {host, port};
|
||||
}
|
||||
|
||||
Block DDLQueryStatusInputStream::readImpl()
|
||||
Chunk DDLQueryStatusSource::generate()
|
||||
{
|
||||
Block res;
|
||||
bool all_hosts_finished = num_hosts_finished >= waiting_hosts.size();
|
||||
|
||||
/// Seems like num_hosts_finished cannot be strictly greater than waiting_hosts.size()
|
||||
assert(num_hosts_finished <= waiting_hosts.size());
|
||||
if (all_hosts_finished || timeout_exceeded)
|
||||
{
|
||||
bool throw_if_error_on_host = context->getSettingsRef().distributed_ddl_output_mode != DistributedDDLOutputMode::NEVER_THROW;
|
||||
if (first_exception && throw_if_error_on_host)
|
||||
throw Exception(*first_exception);
|
||||
|
||||
return res;
|
||||
}
|
||||
if (all_hosts_finished || timeout_exceeded)
|
||||
return {};
|
||||
|
||||
auto zookeeper = context->getZooKeeper();
|
||||
size_t try_number = 0;
|
||||
|
||||
while (res.rows() == 0)
|
||||
while (true)
|
||||
{
|
||||
if (isCancelled())
|
||||
{
|
||||
bool throw_if_error_on_host = context->getSettingsRef().distributed_ddl_output_mode != DistributedDDLOutputMode::NEVER_THROW;
|
||||
if (first_exception && throw_if_error_on_host)
|
||||
throw Exception(*first_exception);
|
||||
|
||||
return res;
|
||||
}
|
||||
return {};
|
||||
|
||||
if (timeout_seconds >= 0 && watch.elapsedSeconds() > timeout_seconds)
|
||||
{
|
||||
timeout_exceeded = true;
|
||||
|
||||
size_t num_unfinished_hosts = waiting_hosts.size() - num_hosts_finished;
|
||||
size_t num_active_hosts = current_active_hosts.size();
|
||||
|
||||
@ -286,10 +315,13 @@ Block DDLQueryStatusInputStream::readImpl()
|
||||
"There are {} unfinished hosts ({} of them are currently active), "
|
||||
"they are going to execute the query in background";
|
||||
if (throw_on_timeout)
|
||||
throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, msg_format,
|
||||
node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
|
||||
{
|
||||
if (!first_exception)
|
||||
first_exception = std::make_unique<Exception>(ErrorCodes::TIMEOUT_EXCEEDED, msg_format,
|
||||
node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
|
||||
return {};
|
||||
}
|
||||
|
||||
timeout_exceeded = true;
|
||||
LOG_INFO(log, msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
|
||||
|
||||
NameSet unfinished_hosts = waiting_hosts;
|
||||
@ -297,7 +329,7 @@ Block DDLQueryStatusInputStream::readImpl()
|
||||
unfinished_hosts.erase(host_id);
|
||||
|
||||
/// Query is not finished on the rest hosts, so fill the corresponding rows with NULLs.
|
||||
MutableColumns columns = sample.cloneEmptyColumns();
|
||||
MutableColumns columns = output.getHeader().cloneEmptyColumns();
|
||||
for (const String & host_id : unfinished_hosts)
|
||||
{
|
||||
auto [host, port] = parseHostAndPort(host_id);
|
||||
@ -310,8 +342,7 @@ Block DDLQueryStatusInputStream::readImpl()
|
||||
columns[num++]->insert(num_unfinished_hosts);
|
||||
columns[num++]->insert(num_active_hosts);
|
||||
}
|
||||
res = sample.cloneWithColumns(std::move(columns));
|
||||
return res;
|
||||
return Chunk(std::move(columns), unfinished_hosts.size());
|
||||
}
|
||||
|
||||
if (num_hosts_finished != 0 || try_number != 0)
|
||||
@ -321,9 +352,13 @@ Block DDLQueryStatusInputStream::readImpl()
|
||||
|
||||
if (!zookeeper->exists(node_path))
|
||||
{
|
||||
throw Exception(ErrorCodes::UNFINISHED,
|
||||
"Cannot provide query execution status. The query's node {} has been deleted by the cleaner since it was finished (or its lifetime is expired)",
|
||||
node_path);
|
||||
/// Paradoxically, this exception will be throw even in case of "never_throw" mode.
|
||||
|
||||
if (!first_exception)
|
||||
first_exception = std::make_unique<Exception>(ErrorCodes::UNFINISHED,
|
||||
"Cannot provide query execution status. The query's node {} has been deleted by the cleaner"
|
||||
" since it was finished (or its lifetime is expired)", node_path);
|
||||
return {};
|
||||
}
|
||||
|
||||
Strings new_hosts = getNewAndUpdate(getChildrenAllowNoNode(zookeeper, fs::path(node_path) / "finished"));
|
||||
@ -333,7 +368,7 @@ Block DDLQueryStatusInputStream::readImpl()
|
||||
|
||||
current_active_hosts = getChildrenAllowNoNode(zookeeper, fs::path(node_path) / "active");
|
||||
|
||||
MutableColumns columns = sample.cloneEmptyColumns();
|
||||
MutableColumns columns = output.getHeader().cloneEmptyColumns();
|
||||
for (const String & host_id : new_hosts)
|
||||
{
|
||||
ExecutionStatus status(-1, "Cannot obtain error message");
|
||||
@ -345,8 +380,11 @@ Block DDLQueryStatusInputStream::readImpl()
|
||||
|
||||
auto [host, port] = parseHostAndPort(host_id);
|
||||
|
||||
if (status.code != 0 && first_exception == nullptr)
|
||||
if (status.code != 0 && !first_exception
|
||||
&& context->getSettingsRef().distributed_ddl_output_mode != DistributedDDLOutputMode::NEVER_THROW)
|
||||
{
|
||||
first_exception = std::make_unique<Exception>(status.code, "There was an error on [{}:{}]: {}", host, port, status.message);
|
||||
}
|
||||
|
||||
++num_hosts_finished;
|
||||
|
||||
@ -359,13 +397,34 @@ Block DDLQueryStatusInputStream::readImpl()
|
||||
columns[num++]->insert(waiting_hosts.size() - num_hosts_finished);
|
||||
columns[num++]->insert(current_active_hosts.size());
|
||||
}
|
||||
res = sample.cloneWithColumns(std::move(columns));
|
||||
}
|
||||
|
||||
return res;
|
||||
return Chunk(std::move(columns), new_hosts.size());
|
||||
}
|
||||
}
|
||||
|
||||
Strings DDLQueryStatusInputStream::getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path)
|
||||
IProcessor::Status DDLQueryStatusSource::prepare()
|
||||
{
|
||||
/// This method is overloaded to throw exception after all data is read.
|
||||
/// Exception is pushed into pipe (instead of simply being thrown) to ensure the order of data processing and exception.
|
||||
|
||||
if (finished)
|
||||
{
|
||||
if (first_exception)
|
||||
{
|
||||
if (!output.canPush())
|
||||
return Status::PortFull;
|
||||
|
||||
output.pushException(std::make_exception_ptr(*first_exception));
|
||||
}
|
||||
|
||||
output.finish();
|
||||
return Status::Finished;
|
||||
}
|
||||
else
|
||||
return SourceWithProgress::prepare();
|
||||
}
|
||||
|
||||
Strings DDLQueryStatusSource::getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path)
|
||||
{
|
||||
Strings res;
|
||||
Coordination::Error code = zookeeper->tryGetChildren(node_path, res);
|
||||
@ -374,7 +433,7 @@ Strings DDLQueryStatusInputStream::getChildrenAllowNoNode(const std::shared_ptr<
|
||||
return res;
|
||||
}
|
||||
|
||||
Strings DDLQueryStatusInputStream::getNewAndUpdate(const Strings & current_list_of_finished_hosts)
|
||||
Strings DDLQueryStatusSource::getNewAndUpdate(const Strings & current_list_of_finished_hosts)
|
||||
{
|
||||
Strings diff;
|
||||
for (const String & host : current_list_of_finished_hosts)
|
||||
@ -384,7 +443,7 @@ Strings DDLQueryStatusInputStream::getNewAndUpdate(const Strings & current_list_
|
||||
if (!ignoring_hosts.count(host))
|
||||
{
|
||||
ignoring_hosts.emplace(host);
|
||||
LOG_INFO(log, "Unexpected host {} appeared in task {}", host, node_path);
|
||||
LOG_INFO(log, "Unexpected host {} appeared in task {}", host, node_path);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/BlockIO.h>
|
||||
#include <Processors/Sources/SourceWithProgress.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
@ -22,54 +23,12 @@ struct DDLLogEntry;
|
||||
bool isSupportedAlterType(int type);
|
||||
|
||||
/// Pushes distributed DDL query to the queue.
|
||||
/// Returns DDLQueryStatusInputStream, which reads results of query execution on each host in the cluster.
|
||||
/// Returns DDLQueryStatusSource, which reads results of query execution on each host in the cluster.
|
||||
BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, ContextPtr context);
|
||||
BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, ContextPtr context, const AccessRightsElements & query_requires_access);
|
||||
BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, ContextPtr context, AccessRightsElements && query_requires_access);
|
||||
|
||||
BlockIO getDistributedDDLStatus(const String & node_path, const DDLLogEntry & entry, ContextPtr context, const std::optional<Strings> & hosts_to_wait = {});
|
||||
|
||||
class DDLQueryStatusInputStream final : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
DDLQueryStatusInputStream(const String & zk_node_path, const DDLLogEntry & entry, ContextPtr context_, const std::optional<Strings> & hosts_to_wait = {});
|
||||
|
||||
String getName() const override { return "DDLQueryStatusInputStream"; }
|
||||
|
||||
Block getHeader() const override { return sample; }
|
||||
|
||||
Block getSampleBlock() const { return sample.cloneEmpty(); }
|
||||
|
||||
Block readImpl() override;
|
||||
|
||||
private:
|
||||
|
||||
static Strings getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path);
|
||||
|
||||
Strings getNewAndUpdate(const Strings & current_list_of_finished_hosts);
|
||||
|
||||
std::pair<String, UInt16> parseHostAndPort(const String & host_id) const;
|
||||
|
||||
String node_path;
|
||||
ContextPtr context;
|
||||
Stopwatch watch;
|
||||
Poco::Logger * log;
|
||||
|
||||
Block sample;
|
||||
|
||||
NameSet waiting_hosts; /// hosts from task host list
|
||||
NameSet finished_hosts; /// finished hosts from host list
|
||||
NameSet ignoring_hosts; /// appeared hosts that are not in hosts list
|
||||
Strings current_active_hosts; /// Hosts that were in active state at the last check
|
||||
size_t num_hosts_finished = 0;
|
||||
|
||||
/// Save the first detected error and throw it at the end of execution
|
||||
std::unique_ptr<Exception> first_exception;
|
||||
|
||||
Int64 timeout_seconds = 120;
|
||||
bool by_hostname = true;
|
||||
bool throw_on_timeout = true;
|
||||
bool timeout_exceeded = false;
|
||||
};
|
||||
BlockIO getDistributedDDLStatus(
|
||||
const String & node_path, const DDLLogEntry & entry, ContextPtr context, const std::optional<Strings> & hosts_to_wait = {});
|
||||
|
||||
}
|
||||
|
@ -11,7 +11,7 @@
|
||||
#include <DataStreams/BlockIO.h>
|
||||
#include <DataStreams/copyData.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <DataStreams/InputStreamFromASTInsertQuery.h>
|
||||
#include <Processors/Transforms/getSourceFromFromASTInsertQuery.h>
|
||||
#include <DataStreams/CountingBlockOutputStream.h>
|
||||
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
@ -53,6 +53,7 @@
|
||||
#include <Processors/Transforms/LimitsCheckingTransform.h>
|
||||
#include <Processors/Transforms/MaterializingTransform.h>
|
||||
#include <Processors/Formats/IOutputFormat.h>
|
||||
#include <Processors/Sources/SinkToOutputStream.h>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
@ -512,9 +513,9 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
StoragePtr storage = context->executeTableFunction(input_function);
|
||||
auto & input_storage = dynamic_cast<StorageInput &>(*storage);
|
||||
auto input_metadata_snapshot = input_storage.getInMemoryMetadataPtr();
|
||||
BlockInputStreamPtr input_stream = std::make_shared<InputStreamFromASTInsertQuery>(
|
||||
auto pipe = getSourceFromFromASTInsertQuery(
|
||||
ast, istr, input_metadata_snapshot->getSampleBlock(), context, input_function);
|
||||
input_storage.setInputStream(input_stream);
|
||||
input_storage.setPipe(std::move(pipe));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -992,8 +993,17 @@ void executeQuery(
|
||||
{
|
||||
if (streams.out)
|
||||
{
|
||||
InputStreamFromASTInsertQuery in(ast, &istr, streams.out->getHeader(), context, nullptr);
|
||||
copyData(in, *streams.out);
|
||||
auto pipe = getSourceFromFromASTInsertQuery(ast, &istr, streams.out->getHeader(), context, nullptr);
|
||||
|
||||
pipeline.init(std::move(pipe));
|
||||
pipeline.resize(1);
|
||||
pipeline.setSinks([&](const Block &, Pipe::StreamType)
|
||||
{
|
||||
return std::make_shared<SinkToOutputStream>(streams.out);
|
||||
});
|
||||
|
||||
auto executor = pipeline.execute();
|
||||
executor->execute(pipeline.getNumThreads());
|
||||
}
|
||||
else if (streams.in)
|
||||
{
|
||||
|
@ -1,21 +1,29 @@
|
||||
#include <Interpreters/join_common.h>
|
||||
#include <Interpreters/TableJoin.h>
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
|
||||
#include <DataStreams/materializeBlock.h>
|
||||
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
#include <Interpreters/TableJoin.h>
|
||||
|
||||
#include <common/logger_useful.h>
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TYPE_MISMATCH;
|
||||
extern const int INVALID_JOIN_ON_EXPRESSION;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int TYPE_MISMATCH;
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -220,6 +228,12 @@ ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names)
|
||||
return ptrs;
|
||||
}
|
||||
|
||||
ColumnPtr materializeColumn(const Block & block, const String & column_name)
|
||||
{
|
||||
const auto & src_column = block.getByName(column_name).column;
|
||||
return recursiveRemoveLowCardinality(src_column->convertToFullColumnIfConst());
|
||||
}
|
||||
|
||||
Columns materializeColumns(const Block & block, const Names & names)
|
||||
{
|
||||
Columns materialized;
|
||||
@ -227,8 +241,7 @@ Columns materializeColumns(const Block & block, const Names & names)
|
||||
|
||||
for (const auto & column_name : names)
|
||||
{
|
||||
const auto & src_column = block.getByName(column_name).column;
|
||||
materialized.emplace_back(recursiveRemoveLowCardinality(src_column->convertToFullColumnIfConst()));
|
||||
materialized.emplace_back(materializeColumn(block, column_name));
|
||||
}
|
||||
|
||||
return materialized;
|
||||
@ -294,7 +307,8 @@ ColumnRawPtrs extractKeysForJoin(const Block & block_keys, const Names & key_nam
|
||||
return key_columns;
|
||||
}
|
||||
|
||||
void checkTypesOfKeys(const Block & block_left, const Names & key_names_left, const Block & block_right, const Names & key_names_right)
|
||||
void checkTypesOfKeys(const Block & block_left, const Names & key_names_left,
|
||||
const Block & block_right, const Names & key_names_right)
|
||||
{
|
||||
size_t keys_size = key_names_left.size();
|
||||
|
||||
@ -305,12 +319,38 @@ void checkTypesOfKeys(const Block & block_left, const Names & key_names_left, co
|
||||
|
||||
if (!left_type->equals(*right_type))
|
||||
throw Exception("Type mismatch of columns to JOIN by: "
|
||||
+ key_names_left[i] + " " + left_type->getName() + " at left, "
|
||||
+ key_names_right[i] + " " + right_type->getName() + " at right",
|
||||
ErrorCodes::TYPE_MISMATCH);
|
||||
+ key_names_left[i] + " " + left_type->getName() + " at left, "
|
||||
+ key_names_right[i] + " " + right_type->getName() + " at right",
|
||||
ErrorCodes::TYPE_MISMATCH);
|
||||
}
|
||||
}
|
||||
|
||||
void checkTypesOfKeys(const Block & block_left, const Names & key_names_left, const String & condition_name_left,
|
||||
const Block & block_right, const Names & key_names_right, const String & condition_name_right)
|
||||
{
|
||||
checkTypesOfKeys(block_left, key_names_left,block_right,key_names_right);
|
||||
checkTypesOfMasks(block_left, condition_name_left, block_right, condition_name_right);
|
||||
}
|
||||
|
||||
void checkTypesOfMasks(const Block & block_left, const String & condition_name_left,
|
||||
const Block & block_right, const String & condition_name_right)
|
||||
{
|
||||
auto check_cond_column_type = [](const Block & block, const String & col_name)
|
||||
{
|
||||
if (col_name.empty())
|
||||
return;
|
||||
|
||||
DataTypePtr dtype = removeNullable(recursiveRemoveLowCardinality(block.getByName(col_name).type));
|
||||
|
||||
if (!dtype->equals(DataTypeUInt8{}))
|
||||
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
|
||||
"Expected logical expression in JOIN ON section, got unexpected column '{}' of type '{}'",
|
||||
col_name, dtype->getName());
|
||||
};
|
||||
check_cond_column_type(block_left, condition_name_left);
|
||||
check_cond_column_type(block_right, condition_name_right);
|
||||
}
|
||||
|
||||
void createMissedColumns(Block & block)
|
||||
{
|
||||
for (size_t i = 0; i < block.columns(); ++i)
|
||||
@ -359,28 +399,80 @@ bool typesEqualUpToNullability(DataTypePtr left_type, DataTypePtr right_type)
|
||||
return left_type_strict->equals(*right_type_strict);
|
||||
}
|
||||
|
||||
ColumnPtr getColumnAsMask(const Block & block, const String & column_name)
|
||||
{
|
||||
if (column_name.empty())
|
||||
return nullptr;
|
||||
|
||||
const auto & src_col = block.getByName(column_name);
|
||||
|
||||
DataTypePtr col_type = recursiveRemoveLowCardinality(src_col.type);
|
||||
if (isNothing(col_type))
|
||||
return ColumnUInt8::create(block.rows(), 0);
|
||||
|
||||
const auto & join_condition_col = recursiveRemoveLowCardinality(src_col.column->convertToFullColumnIfConst());
|
||||
|
||||
if (const auto * nullable_col = typeid_cast<const ColumnNullable *>(join_condition_col.get()))
|
||||
{
|
||||
if (isNothing(assert_cast<const DataTypeNullable &>(*col_type).getNestedType()))
|
||||
return ColumnUInt8::create(block.rows(), 0);
|
||||
|
||||
/// Return nested column with NULL set to false
|
||||
const auto & nest_col = assert_cast<const ColumnUInt8 &>(nullable_col->getNestedColumn());
|
||||
const auto & null_map = nullable_col->getNullMapColumn();
|
||||
|
||||
auto res = ColumnUInt8::create(nullable_col->size(), 0);
|
||||
for (size_t i = 0, sz = nullable_col->size(); i < sz; ++i)
|
||||
res->getData()[i] = !null_map.getData()[i] && nest_col.getData()[i];
|
||||
return res;
|
||||
}
|
||||
else
|
||||
return join_condition_col;
|
||||
}
|
||||
|
||||
|
||||
void splitAdditionalColumns(const Names & key_names, const Block & sample_block, Block & block_keys, Block & block_others)
|
||||
{
|
||||
block_others = materializeBlock(sample_block);
|
||||
|
||||
for (const String & column_name : key_names)
|
||||
{
|
||||
/// Extract right keys with correct keys order. There could be the same key names.
|
||||
if (!block_keys.has(column_name))
|
||||
{
|
||||
auto & col = block_others.getByName(column_name);
|
||||
block_keys.insert(col);
|
||||
block_others.erase(column_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
NotJoined::NotJoined(const TableJoin & table_join, const Block & saved_block_sample_, const Block & right_sample_block,
|
||||
const Block & result_sample_block_)
|
||||
const Block & result_sample_block_, const Names & key_names_left_, const Names & key_names_right_)
|
||||
: saved_block_sample(saved_block_sample_)
|
||||
, result_sample_block(materializeBlock(result_sample_block_))
|
||||
, key_names_left(key_names_left_.empty() ? table_join.keyNamesLeft() : key_names_left_)
|
||||
, key_names_right(key_names_right_.empty() ? table_join.keyNamesRight() : key_names_right_)
|
||||
{
|
||||
std::vector<String> tmp;
|
||||
Block right_table_keys;
|
||||
Block sample_block_with_columns_to_add;
|
||||
table_join.splitAdditionalColumns(right_sample_block, right_table_keys, sample_block_with_columns_to_add);
|
||||
|
||||
JoinCommon::splitAdditionalColumns(key_names_right, right_sample_block, right_table_keys,
|
||||
sample_block_with_columns_to_add);
|
||||
Block required_right_keys = table_join.getRequiredRightKeys(right_table_keys, tmp);
|
||||
|
||||
std::unordered_map<size_t, size_t> left_to_right_key_remap;
|
||||
|
||||
if (table_join.hasUsing())
|
||||
{
|
||||
for (size_t i = 0; i < table_join.keyNamesLeft().size(); ++i)
|
||||
for (size_t i = 0; i < key_names_left.size(); ++i)
|
||||
{
|
||||
const String & left_key_name = table_join.keyNamesLeft()[i];
|
||||
const String & right_key_name = table_join.keyNamesRight()[i];
|
||||
const String & left_key_name = key_names_left[i];
|
||||
const String & right_key_name = key_names_right[i];
|
||||
|
||||
size_t left_key_pos = result_sample_block.getPositionByName(left_key_name);
|
||||
size_t right_key_pos = saved_block_sample.getPositionByName(right_key_name);
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Core/Block.h>
|
||||
#include <Interpreters/IJoin.h>
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
@ -12,6 +13,7 @@ struct ColumnWithTypeAndName;
|
||||
class TableJoin;
|
||||
class IColumn;
|
||||
using ColumnRawPtrs = std::vector<const IColumn *>;
|
||||
using UInt8ColumnDataPtr = const ColumnUInt8::Container *;
|
||||
|
||||
namespace JoinCommon
|
||||
{
|
||||
@ -22,6 +24,7 @@ void convertColumnsToNullable(Block & block, size_t starting_pos = 0);
|
||||
void removeColumnNullability(ColumnWithTypeAndName & column);
|
||||
void changeColumnRepresentation(const ColumnPtr & src_column, ColumnPtr & dst_column);
|
||||
ColumnPtr emptyNotNullableClone(const ColumnPtr & column);
|
||||
ColumnPtr materializeColumn(const Block & block, const String & name);
|
||||
Columns materializeColumns(const Block & block, const Names & names);
|
||||
ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names);
|
||||
ColumnRawPtrs getRawPointers(const Columns & columns);
|
||||
@ -31,8 +34,17 @@ void restoreLowCardinalityInplace(Block & block);
|
||||
|
||||
ColumnRawPtrs extractKeysForJoin(const Block & block_keys, const Names & key_names_right);
|
||||
|
||||
/// Throw an exception if blocks have different types of key columns. Compare up to Nullability.
|
||||
void checkTypesOfKeys(const Block & block_left, const Names & key_names_left, const Block & block_right, const Names & key_names_right);
|
||||
/// Throw an exception if join condition column is not UIint8
|
||||
void checkTypesOfMasks(const Block & block_left, const String & condition_name_left,
|
||||
const Block & block_right, const String & condition_name_right);
|
||||
|
||||
/// Throw an exception if blocks have different types of key columns . Compare up to Nullability.
|
||||
void checkTypesOfKeys(const Block & block_left, const Names & key_names_left,
|
||||
const Block & block_right, const Names & key_names_right);
|
||||
|
||||
/// Check both keys and conditions
|
||||
void checkTypesOfKeys(const Block & block_left, const Names & key_names_left, const String & condition_name_left,
|
||||
const Block & block_right, const Names & key_names_right, const String & condition_name_right);
|
||||
|
||||
void createMissedColumns(Block & block);
|
||||
void joinTotals(Block left_totals, Block right_totals, const TableJoin & table_join, Block & out_block);
|
||||
@ -41,6 +53,12 @@ void addDefaultValues(IColumn & column, const DataTypePtr & type, size_t count);
|
||||
|
||||
bool typesEqualUpToNullability(DataTypePtr left_type, DataTypePtr right_type);
|
||||
|
||||
/// Return mask array of type ColumnUInt8 for specified column. Source should have type UInt8 or Nullable(UInt8).
|
||||
ColumnPtr getColumnAsMask(const Block & block, const String & column_name);
|
||||
|
||||
/// Split key and other columns by keys name list
|
||||
void splitAdditionalColumns(const Names & key_names, const Block & sample_block, Block & block_keys, Block & block_others);
|
||||
|
||||
void changeLowCardinalityInplace(ColumnWithTypeAndName & column);
|
||||
|
||||
}
|
||||
@ -50,7 +68,7 @@ class NotJoined
|
||||
{
|
||||
public:
|
||||
NotJoined(const TableJoin & table_join, const Block & saved_block_sample_, const Block & right_sample_block,
|
||||
const Block & result_sample_block_);
|
||||
const Block & result_sample_block_, const Names & key_names_left_ = {}, const Names & key_names_right_ = {});
|
||||
|
||||
void correctLowcardAndNullability(MutableColumns & columns_right);
|
||||
void addLeftColumns(Block & block, size_t rows_added) const;
|
||||
@ -61,6 +79,9 @@ protected:
|
||||
Block saved_block_sample;
|
||||
Block result_sample_block;
|
||||
|
||||
Names key_names_left;
|
||||
Names key_names_right;
|
||||
|
||||
~NotJoined() = default;
|
||||
|
||||
private:
|
||||
|
@ -370,7 +370,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
|
||||
|
||||
if (!written && 0 == strcmp(name.c_str(), "tupleElement"))
|
||||
{
|
||||
// fuzzer sometimes may inserts tupleElement() created from ASTLiteral:
|
||||
// fuzzer sometimes may insert tupleElement() created from ASTLiteral:
|
||||
//
|
||||
// Function_tupleElement, 0xx
|
||||
// -ExpressionList_, 0xx
|
||||
|
@ -72,12 +72,16 @@ public:
|
||||
size_t getCurrentUnitNumber() const { return current_unit_number; }
|
||||
void setCurrentUnitNumber(size_t current_unit_number_) { current_unit_number = current_unit_number_; }
|
||||
|
||||
void addBuffer(std::unique_ptr<ReadBuffer> buffer) { owned_buffers.emplace_back(std::move(buffer)); }
|
||||
|
||||
protected:
|
||||
ColumnMappingPtr column_mapping{};
|
||||
|
||||
private:
|
||||
/// Number of currently parsed chunk (if parallel parsing is enabled)
|
||||
size_t current_unit_number = 0;
|
||||
|
||||
std::vector<std::unique_ptr<ReadBuffer>> owned_buffers;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -394,7 +394,7 @@ public:
|
||||
pushData({.chunk = std::move(chunk), .exception = {}});
|
||||
}
|
||||
|
||||
void ALWAYS_INLINE push(std::exception_ptr exception)
|
||||
void ALWAYS_INLINE pushException(std::exception_ptr exception)
|
||||
{
|
||||
pushData({.chunk = {}, .exception = std::move(exception)});
|
||||
}
|
||||
|
@ -172,7 +172,7 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFacto
|
||||
String query_string = formattedAST(query);
|
||||
|
||||
auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
|
||||
std::move(connections), query_string, header, context, throttler, scalars, external_tables, stage);
|
||||
pool, std::move(connections), query_string, header, context, throttler, scalars, external_tables, stage);
|
||||
|
||||
return createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read);
|
||||
}
|
||||
|
@ -7,6 +7,9 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Reads all data into queue.
|
||||
* After all data has been read - output it in the same order.
|
||||
*/
|
||||
class QueueBuffer : public IAccumulatingTransform
|
||||
{
|
||||
private:
|
||||
|
@ -2,7 +2,8 @@
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <Interpreters/inplaceBlockConversions.h>
|
||||
#include <DataStreams/AddingDefaultsBlockInputStream.h>
|
||||
#include <Processors/Formats/IInputFormat.h>
|
||||
#include <Processors/Transforms/AddingDefaultsTransform.h>
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
@ -128,31 +129,32 @@ static MutableColumnPtr mixColumns(const ColumnWithTypeAndName & col_read,
|
||||
}
|
||||
|
||||
|
||||
AddingDefaultsBlockInputStream::AddingDefaultsBlockInputStream(
|
||||
const BlockInputStreamPtr & input,
|
||||
AddingDefaultsTransform::AddingDefaultsTransform(
|
||||
const Block & header,
|
||||
const ColumnsDescription & columns_,
|
||||
IInputFormat & input_format_,
|
||||
ContextPtr context_)
|
||||
: columns(columns_)
|
||||
: ISimpleTransform(header, header, true)
|
||||
, columns(columns_)
|
||||
, column_defaults(columns.getDefaults())
|
||||
, input_format(input_format_)
|
||||
, context(context_)
|
||||
{
|
||||
children.push_back(input);
|
||||
header = input->getHeader();
|
||||
}
|
||||
|
||||
|
||||
Block AddingDefaultsBlockInputStream::readImpl()
|
||||
void AddingDefaultsTransform::transform(Chunk & chunk)
|
||||
{
|
||||
Block res = children.back()->read();
|
||||
if (!res)
|
||||
return res;
|
||||
|
||||
if (column_defaults.empty())
|
||||
return res;
|
||||
return;
|
||||
|
||||
const BlockMissingValues & block_missing_values = children.back()->getMissingValues();
|
||||
const BlockMissingValues & block_missing_values = input_format.getMissingValues();
|
||||
if (block_missing_values.empty())
|
||||
return res;
|
||||
return;
|
||||
|
||||
const auto & header = getOutputPort().getHeader();
|
||||
size_t num_rows = chunk.getNumRows();
|
||||
auto res = header.cloneWithColumns(chunk.detachColumns());
|
||||
|
||||
/// res block already has all columns values, with default value for type
|
||||
/// (not value specified in table). We identify which columns we need to
|
||||
@ -170,7 +172,7 @@ Block AddingDefaultsBlockInputStream::readImpl()
|
||||
}
|
||||
|
||||
if (!evaluate_block.columns())
|
||||
evaluate_block.insert({ColumnConst::create(ColumnUInt8::create(1, 0), res.rows()), std::make_shared<DataTypeUInt8>(), "_dummy"});
|
||||
evaluate_block.insert({ColumnConst::create(ColumnUInt8::create(1, 0), num_rows), std::make_shared<DataTypeUInt8>(), "_dummy"});
|
||||
|
||||
auto dag = evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), columns, context, false);
|
||||
if (dag)
|
||||
@ -224,7 +226,7 @@ Block AddingDefaultsBlockInputStream::readImpl()
|
||||
res.setColumns(std::move(mutation));
|
||||
}
|
||||
|
||||
return res;
|
||||
chunk.setColumns(res.getColumns(), num_rows);
|
||||
}
|
||||
|
||||
}
|
@ -1,31 +1,33 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Processors/ISimpleTransform.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class IInputFormat;
|
||||
|
||||
/// Adds defaults to columns using BlockDelayedDefaults bitmask attached to Block by child InputStream.
|
||||
class AddingDefaultsBlockInputStream : public IBlockInputStream
|
||||
class AddingDefaultsTransform : public ISimpleTransform
|
||||
{
|
||||
public:
|
||||
AddingDefaultsBlockInputStream(
|
||||
const BlockInputStreamPtr & input,
|
||||
AddingDefaultsTransform(
|
||||
const Block & header,
|
||||
const ColumnsDescription & columns_,
|
||||
IInputFormat & input_format_,
|
||||
ContextPtr context_);
|
||||
|
||||
String getName() const override { return "AddingDefaults"; }
|
||||
Block getHeader() const override { return header; }
|
||||
String getName() const override { return "AddingDefaultsTransform"; }
|
||||
|
||||
protected:
|
||||
Block readImpl() override;
|
||||
void transform(Chunk & chunk) override;
|
||||
|
||||
private:
|
||||
Block header;
|
||||
const ColumnsDescription columns;
|
||||
const ColumnDefaults column_defaults;
|
||||
IInputFormat & input_format;
|
||||
ContextPtr context;
|
||||
};
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/FieldVisitorsAccurateComparison.h>
|
||||
#include <common/arithmeticOverflow.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
@ -196,6 +197,16 @@ WindowTransform::WindowTransform(const Block & input_header_,
|
||||
, input_header(input_header_)
|
||||
, window_description(window_description_)
|
||||
{
|
||||
// Materialize all columns in header, because we materialize all columns
|
||||
// in chunks and it's convenient if they match.
|
||||
auto input_columns = input_header.getColumns();
|
||||
for (auto & column : input_columns)
|
||||
{
|
||||
column = std::move(column)->convertToFullColumnIfConst();
|
||||
}
|
||||
input_header.setColumns(std::move(input_columns));
|
||||
|
||||
// Initialize window function workspaces.
|
||||
workspaces.reserve(functions.size());
|
||||
for (const auto & f : functions)
|
||||
{
|
||||
@ -850,6 +861,8 @@ void WindowTransform::updateAggregationState()
|
||||
assert(prev_frame_start <= prev_frame_end);
|
||||
assert(prev_frame_start <= frame_start);
|
||||
assert(prev_frame_end <= frame_end);
|
||||
assert(partition_start <= frame_start);
|
||||
assert(frame_end <= partition_end);
|
||||
|
||||
// We might have to reset aggregation state and/or add some rows to it.
|
||||
// Figure out what to do.
|
||||
@ -965,10 +978,37 @@ void WindowTransform::writeOutCurrentRow()
|
||||
}
|
||||
}
|
||||
|
||||
static void assertSameColumns(const Columns & left_all,
|
||||
const Columns & right_all)
|
||||
{
|
||||
assert(left_all.size() == right_all.size());
|
||||
|
||||
for (size_t i = 0; i < left_all.size(); ++i)
|
||||
{
|
||||
const auto * left_column = left_all[i].get();
|
||||
const auto * right_column = right_all[i].get();
|
||||
|
||||
assert(left_column);
|
||||
assert(right_column);
|
||||
|
||||
assert(typeid(*left_column).hash_code()
|
||||
== typeid(*right_column).hash_code());
|
||||
|
||||
if (isColumnConst(*left_column))
|
||||
{
|
||||
Field left_value = assert_cast<const ColumnConst &>(*left_column).getField();
|
||||
Field right_value = assert_cast<const ColumnConst &>(*right_column).getField();
|
||||
|
||||
assert(left_value == right_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WindowTransform::appendChunk(Chunk & chunk)
|
||||
{
|
||||
// fmt::print(stderr, "new chunk, {} rows, finished={}\n", chunk.getNumRows(),
|
||||
// input_is_finished);
|
||||
// fmt::print(stderr, "chunk structure '{}'\n", chunk.dumpStructure());
|
||||
|
||||
// First, prepare the new input block and add it to the queue. We might not
|
||||
// have it if it's end of data, though.
|
||||
@ -984,28 +1024,42 @@ void WindowTransform::appendChunk(Chunk & chunk)
|
||||
|
||||
blocks.push_back({});
|
||||
auto & block = blocks.back();
|
||||
|
||||
// Use the number of rows from the Chunk, because it is correct even in
|
||||
// the case where the Chunk has no columns. Not sure if this actually
|
||||
// happens, because even in the case of `count() over ()` we have a dummy
|
||||
// input column.
|
||||
block.rows = chunk.getNumRows();
|
||||
block.input_columns = chunk.detachColumns();
|
||||
|
||||
// If we have a (logically) constant column, some Chunks will have a
|
||||
// Const column for it, and some -- materialized. Such difference is
|
||||
// generated by e.g. MergingSortedAlgorithm, which mostly materializes
|
||||
// the constant ORDER BY columns, but in some obscure cases passes them
|
||||
// through, unmaterialized. This mix is a pain to work with in Window
|
||||
// Transform, because we have to compare columns across blocks, when e.g.
|
||||
// searching for peer group boundaries, and each of the four combinations
|
||||
// of const and materialized requires different code.
|
||||
// Another problem with Const columns is that the aggregate functions
|
||||
// can't work with them, so we have to materialize them like the
|
||||
// Aggregator does.
|
||||
// Just materialize everything.
|
||||
auto columns = chunk.detachColumns();
|
||||
for (auto & column : columns)
|
||||
column = std::move(column)->convertToFullColumnIfConst();
|
||||
block.input_columns = std::move(columns);
|
||||
|
||||
// Initialize output columns.
|
||||
for (auto & ws : workspaces)
|
||||
{
|
||||
// Aggregate functions can't work with constant columns, so we have to
|
||||
// materialize them like the Aggregator does.
|
||||
for (const auto column_index : ws.argument_column_indices)
|
||||
{
|
||||
block.input_columns[column_index]
|
||||
= std::move(block.input_columns[column_index])
|
||||
->convertToFullColumnIfConst();
|
||||
}
|
||||
|
||||
block.output_columns.push_back(ws.aggregate_function->getReturnType()
|
||||
->createColumn());
|
||||
block.output_columns.back()->reserve(block.rows);
|
||||
}
|
||||
|
||||
// As a debugging aid, assert that all chunks have the same C++ type of
|
||||
// columns, that also matches the input header, because we often have to
|
||||
// work across chunks.
|
||||
assertSameColumns(input_header.getColumns(), block.input_columns);
|
||||
}
|
||||
|
||||
// Start the calculations. First, advance the partition end.
|
||||
@ -1475,12 +1529,21 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction
|
||||
return;
|
||||
}
|
||||
|
||||
if (!getLeastSupertype({argument_types[0], argument_types[2]}))
|
||||
const auto supertype = getLeastSupertype({argument_types[0], argument_types[2]});
|
||||
if (!supertype)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"The default value type '{}' is not convertible to the argument type '{}'",
|
||||
argument_types[2]->getName(),
|
||||
argument_types[0]->getName());
|
||||
"There is no supertype for the argument type '{}' and the default value type '{}'",
|
||||
argument_types[0]->getName(),
|
||||
argument_types[2]->getName());
|
||||
}
|
||||
if (!argument_types[0]->equals(*supertype))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"The supertype '{}' for the argument type '{}' and the default value type '{}' is not the same as the argument type",
|
||||
supertype->getName(),
|
||||
argument_types[0]->getName(),
|
||||
argument_types[2]->getName());
|
||||
}
|
||||
|
||||
if (argument_types.size() > 3)
|
||||
@ -1491,8 +1554,7 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction
|
||||
}
|
||||
}
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{ return argument_types[0]; }
|
||||
DataTypePtr getReturnType() const override { return argument_types[0]; }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
@ -1534,9 +1596,13 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction
|
||||
if (argument_types.size() > 2)
|
||||
{
|
||||
// Column with default values is specified.
|
||||
to.insertFrom(*current_block.input_columns[
|
||||
workspace.argument_column_indices[2]],
|
||||
transform->current_row.row);
|
||||
// The conversion through Field is inefficient, but we accept
|
||||
// subtypes of the argument type as a default value (for convenience),
|
||||
// and it's a pain to write conversion that respects ColumnNothing
|
||||
// and ColumnConst and so on.
|
||||
const IColumn & default_column = *current_block.input_columns[
|
||||
workspace.argument_column_indices[2]].get();
|
||||
to.insert(default_column[transform->current_row.row]);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1573,40 +1639,49 @@ void registerWindowFunctions(AggregateFunctionFactory & factory)
|
||||
// to a (rows between unbounded preceding and unbounded following) frame,
|
||||
// instead of adding separate logic for them.
|
||||
|
||||
factory.registerFunction("rank", [](const std::string & name,
|
||||
const AggregateFunctionProperties properties = {
|
||||
// By default, if an aggregate function has a null argument, it will be
|
||||
// replaced with AggregateFunctionNothing. We don't need this behavior
|
||||
// e.g. for lagInFrame(number, 1, null).
|
||||
.returns_default_when_only_null = true,
|
||||
// This probably doesn't make any difference for window functions because
|
||||
// it is an Aggregator-specific setting.
|
||||
.is_order_dependent = true };
|
||||
|
||||
factory.registerFunction("rank", {[](const std::string & name,
|
||||
const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
return std::make_shared<WindowFunctionRank>(name, argument_types,
|
||||
parameters);
|
||||
});
|
||||
}, properties});
|
||||
|
||||
factory.registerFunction("dense_rank", [](const std::string & name,
|
||||
factory.registerFunction("dense_rank", {[](const std::string & name,
|
||||
const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
return std::make_shared<WindowFunctionDenseRank>(name, argument_types,
|
||||
parameters);
|
||||
});
|
||||
}, properties});
|
||||
|
||||
factory.registerFunction("row_number", [](const std::string & name,
|
||||
factory.registerFunction("row_number", {[](const std::string & name,
|
||||
const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
return std::make_shared<WindowFunctionRowNumber>(name, argument_types,
|
||||
parameters);
|
||||
});
|
||||
}, properties});
|
||||
|
||||
factory.registerFunction("lagInFrame", [](const std::string & name,
|
||||
factory.registerFunction("lagInFrame", {[](const std::string & name,
|
||||
const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
return std::make_shared<WindowFunctionLagLeadInFrame<false>>(
|
||||
name, argument_types, parameters);
|
||||
});
|
||||
}, properties});
|
||||
|
||||
factory.registerFunction("leadInFrame", [](const std::string & name,
|
||||
factory.registerFunction("leadInFrame", {[](const std::string & name,
|
||||
const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
return std::make_shared<WindowFunctionLagLeadInFrame<true>>(
|
||||
name, argument_types, parameters);
|
||||
});
|
||||
}, properties});
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,13 +1,16 @@
|
||||
#include <Parsers/ASTInsertQuery.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/InterpreterSetQuery.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <IO/ConcatReadBuffer.h>
|
||||
#include <IO/ReadBufferFromMemory.h>
|
||||
#include <DataStreams/BlockIO.h>
|
||||
#include <DataStreams/InputStreamFromASTInsertQuery.h>
|
||||
#include <DataStreams/AddingDefaultsBlockInputStream.h>
|
||||
#include <Processors/Transforms/getSourceFromFromASTInsertQuery.h>
|
||||
#include <Processors/Transforms/AddingDefaultsTransform.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Processors/Pipe.h>
|
||||
#include <Processors/Formats/IInputFormat.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -20,7 +23,7 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery(
|
||||
Pipe getSourceFromFromASTInsertQuery(
|
||||
const ASTPtr & ast,
|
||||
ReadBuffer * input_buffer_tail_part,
|
||||
const Block & header,
|
||||
@ -42,7 +45,7 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery(
|
||||
|
||||
/// Data could be in parsed (ast_insert_query.data) and in not parsed yet (input_buffer_tail_part) part of query.
|
||||
|
||||
input_buffer_ast_part = std::make_unique<ReadBufferFromMemory>(
|
||||
auto input_buffer_ast_part = std::make_unique<ReadBufferFromMemory>(
|
||||
ast_insert_query->data, ast_insert_query->data ? ast_insert_query->end - ast_insert_query->data : 0);
|
||||
|
||||
ConcatReadBuffer::ReadBuffers buffers;
|
||||
@ -56,9 +59,10 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery(
|
||||
* - because 'query.data' could refer to memory piece, used as buffer for 'input_buffer_tail_part'.
|
||||
*/
|
||||
|
||||
input_buffer_contacenated = std::make_unique<ConcatReadBuffer>(buffers);
|
||||
auto input_buffer_contacenated = std::make_unique<ConcatReadBuffer>(buffers);
|
||||
|
||||
res_stream = context->getInputFormat(format, *input_buffer_contacenated, header, context->getSettings().max_insert_block_size);
|
||||
auto source = FormatFactory::instance().getInput(format, *input_buffer_contacenated, header, context, context->getSettings().max_insert_block_size);
|
||||
Pipe pipe(source);
|
||||
|
||||
if (context->getSettingsRef().input_format_defaults_for_omitted_fields && ast_insert_query->table_id && !input_function)
|
||||
{
|
||||
@ -66,8 +70,18 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery(
|
||||
auto metadata_snapshot = storage->getInMemoryMetadataPtr();
|
||||
const auto & columns = metadata_snapshot->getColumns();
|
||||
if (columns.hasDefaults())
|
||||
res_stream = std::make_shared<AddingDefaultsBlockInputStream>(res_stream, columns, context);
|
||||
{
|
||||
pipe.addSimpleTransform([&](const Block & cur_header)
|
||||
{
|
||||
return std::make_shared<AddingDefaultsTransform>(cur_header, columns, *source, context);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
source->addBuffer(std::move(input_buffer_ast_part));
|
||||
source->addBuffer(std::move(input_buffer_contacenated));
|
||||
|
||||
return pipe;
|
||||
}
|
||||
|
||||
}
|
26
src/Processors/Transforms/getSourceFromFromASTInsertQuery.h
Normal file
26
src/Processors/Transforms/getSourceFromFromASTInsertQuery.h
Normal file
@ -0,0 +1,26 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Prepares a pipe which produce data containing in INSERT query
|
||||
* Head of inserting data could be stored in INSERT ast directly
|
||||
* Remaining (tail) data could be stored in input_buffer_tail_part
|
||||
*/
|
||||
|
||||
class Pipe;
|
||||
|
||||
Pipe getSourceFromFromASTInsertQuery(
|
||||
const ASTPtr & ast,
|
||||
ReadBuffer * input_buffer_tail_part,
|
||||
const Block & header,
|
||||
ContextPtr context,
|
||||
const ASTPtr & input_function);
|
||||
|
||||
}
|
@ -139,6 +139,7 @@ SRCS(
|
||||
Sources/SinkToOutputStream.cpp
|
||||
Sources/SourceFromInputStream.cpp
|
||||
Sources/SourceWithProgress.cpp
|
||||
Transforms/AddingDefaultsTransform.cpp
|
||||
Transforms/AddingSelectorTransform.cpp
|
||||
Transforms/AggregatingInOrderTransform.cpp
|
||||
Transforms/AggregatingTransform.cpp
|
||||
@ -165,6 +166,7 @@ SRCS(
|
||||
Transforms/SortingTransform.cpp
|
||||
Transforms/TotalsHavingTransform.cpp
|
||||
Transforms/WindowTransform.cpp
|
||||
Transforms/getSourceFromFromASTInsertQuery.cpp
|
||||
printPipeline.cpp
|
||||
|
||||
)
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/SettingsChanges.h>
|
||||
#include <DataStreams/AddingDefaultsBlockInputStream.h>
|
||||
#include <Processors/Transforms/AddingDefaultsTransform.h>
|
||||
#include <DataStreams/AsynchronousBlockInputStream.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <Interpreters/Context.h>
|
||||
@ -20,6 +20,10 @@
|
||||
#include <Parsers/ASTQueryWithOutput.h>
|
||||
#include <Parsers/ParserQuery.h>
|
||||
#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
|
||||
#include <Processors/Executors/PullingPipelineExecutor.h>
|
||||
#include <Processors/Formats/IInputFormat.h>
|
||||
#include <Processors/QueryPipeline.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Server/IServer.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Poco/FileStream.h>
|
||||
@ -547,7 +551,8 @@ namespace
|
||||
|
||||
std::optional<ReadBufferFromCallback> read_buffer;
|
||||
std::optional<WriteBufferFromString> write_buffer;
|
||||
BlockInputStreamPtr block_input_stream;
|
||||
std::unique_ptr<QueryPipeline> pipeline;
|
||||
std::unique_ptr<PullingPipelineExecutor> pipeline_executor;
|
||||
BlockOutputStreamPtr block_output_stream;
|
||||
bool need_input_data_from_insert_query = true;
|
||||
bool need_input_data_from_query_info = true;
|
||||
@ -755,16 +760,16 @@ namespace
|
||||
throw Exception("Unexpected context in Input initializer", ErrorCodes::LOGICAL_ERROR);
|
||||
input_function_is_used = true;
|
||||
initializeBlockInputStream(input_storage->getInMemoryMetadataPtr()->getSampleBlock());
|
||||
block_input_stream->readPrefix();
|
||||
});
|
||||
|
||||
query_context->setInputBlocksReaderCallback([this](ContextPtr context) -> Block
|
||||
{
|
||||
if (context != query_context)
|
||||
throw Exception("Unexpected context in InputBlocksReader", ErrorCodes::LOGICAL_ERROR);
|
||||
auto block = block_input_stream->read();
|
||||
if (!block)
|
||||
block_input_stream->readSuffix();
|
||||
|
||||
Block block;
|
||||
while (!block && pipeline_executor->pull(block));
|
||||
|
||||
return block;
|
||||
});
|
||||
|
||||
@ -797,13 +802,15 @@ namespace
|
||||
/// So we mustn't touch the input stream from other thread.
|
||||
initializeBlockInputStream(io.out->getHeader());
|
||||
|
||||
block_input_stream->readPrefix();
|
||||
io.out->writePrefix();
|
||||
|
||||
while (auto block = block_input_stream->read())
|
||||
io.out->write(block);
|
||||
Block block;
|
||||
while (pipeline_executor->pull(block))
|
||||
{
|
||||
if (block)
|
||||
io.out->write(block);
|
||||
}
|
||||
|
||||
block_input_stream->readSuffix();
|
||||
io.out->writeSuffix();
|
||||
}
|
||||
|
||||
@ -866,9 +873,11 @@ namespace
|
||||
return {nullptr, 0}; /// no more input data
|
||||
});
|
||||
|
||||
assert(!block_input_stream);
|
||||
block_input_stream = query_context->getInputFormat(
|
||||
input_format, *read_buffer, header, query_context->getSettings().max_insert_block_size);
|
||||
assert(!pipeline);
|
||||
pipeline = std::make_unique<QueryPipeline>();
|
||||
auto source = FormatFactory::instance().getInput(
|
||||
input_format, *read_buffer, header, query_context, query_context->getSettings().max_insert_block_size);
|
||||
pipeline->init(Pipe(source));
|
||||
|
||||
/// Add default values if necessary.
|
||||
if (ast)
|
||||
@ -881,10 +890,17 @@ namespace
|
||||
StoragePtr storage = DatabaseCatalog::instance().getTable(table_id, query_context);
|
||||
const auto & columns = storage->getInMemoryMetadataPtr()->getColumns();
|
||||
if (!columns.empty())
|
||||
block_input_stream = std::make_shared<AddingDefaultsBlockInputStream>(block_input_stream, columns, query_context);
|
||||
{
|
||||
pipeline->addSimpleTransform([&](const Block & cur_header)
|
||||
{
|
||||
return std::make_shared<AddingDefaultsTransform>(cur_header, columns, *source, query_context);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pipeline_executor = std::make_unique<PullingPipelineExecutor>(*pipeline);
|
||||
}
|
||||
|
||||
void Call::createExternalTables()
|
||||
@ -1196,7 +1212,8 @@ namespace
|
||||
void Call::close()
|
||||
{
|
||||
responder.reset();
|
||||
block_input_stream.reset();
|
||||
pipeline_executor.reset();
|
||||
pipeline.reset();
|
||||
block_output_stream.reset();
|
||||
read_buffer.reset();
|
||||
write_buffer.reset();
|
||||
|
@ -1026,7 +1026,17 @@ bool TCPHandler::receivePacket()
|
||||
return false;
|
||||
|
||||
case Protocol::Client::Cancel:
|
||||
{
|
||||
/// For testing connection collector.
|
||||
const Settings & settings = query_context->getSettingsRef();
|
||||
if (settings.sleep_in_receive_cancel_ms.totalMilliseconds())
|
||||
{
|
||||
std::chrono::milliseconds ms(settings.sleep_in_receive_cancel_ms.totalMilliseconds());
|
||||
std::this_thread::sleep_for(ms);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
case Protocol::Client::Hello:
|
||||
receiveUnexpectedHello();
|
||||
@ -1063,6 +1073,13 @@ String TCPHandler::receiveReadTaskResponseAssumeLocked()
|
||||
if (packet_type == Protocol::Client::Cancel)
|
||||
{
|
||||
state.is_cancelled = true;
|
||||
/// For testing connection collector.
|
||||
const Settings & settings = query_context->getSettingsRef();
|
||||
if (settings.sleep_in_receive_cancel_ms.totalMilliseconds())
|
||||
{
|
||||
std::chrono::milliseconds ms(settings.sleep_in_receive_cancel_ms.totalMilliseconds());
|
||||
std::this_thread::sleep_for(ms);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
else
|
||||
@ -1461,6 +1478,16 @@ bool TCPHandler::isQueryCancelled()
|
||||
throw NetException("Unexpected packet Cancel received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
|
||||
LOG_INFO(log, "Query was cancelled.");
|
||||
state.is_cancelled = true;
|
||||
/// For testing connection collector.
|
||||
{
|
||||
const Settings & settings = query_context->getSettingsRef();
|
||||
if (settings.sleep_in_receive_cancel_ms.totalMilliseconds())
|
||||
{
|
||||
std::chrono::milliseconds ms(settings.sleep_in_receive_cancel_ms.totalMilliseconds());
|
||||
std::this_thread::sleep_for(ms);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
default:
|
||||
|
@ -506,12 +506,15 @@ bool ColumnsDescription::hasColumnOrSubcolumn(GetFlags flags, const String & col
|
||||
|
||||
void ColumnsDescription::addSubcolumnsToList(NamesAndTypesList & source_list) const
|
||||
{
|
||||
NamesAndTypesList subcolumns_list;
|
||||
for (const auto & col : source_list)
|
||||
{
|
||||
auto range = subcolumns.get<1>().equal_range(col.name);
|
||||
if (range.first != range.second)
|
||||
source_list.insert(source_list.end(), range.first, range.second);
|
||||
subcolumns_list.insert(subcolumns_list.end(), range.first, range.second);
|
||||
}
|
||||
|
||||
source_list.splice(source_list.end(), std::move(subcolumns_list));
|
||||
}
|
||||
|
||||
NamesAndTypesList ColumnsDescription::getAllWithSubcolumns() const
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <DataStreams/NativeBlockInputStream.h>
|
||||
#include <DataStreams/ConvertingBlockInputStream.h>
|
||||
#include <DataStreams/OneBlockInputStream.h>
|
||||
#include <Processors/Sources/SourceWithProgress.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
@ -902,50 +903,78 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
class DirectoryMonitorBlockInputStream : public IBlockInputStream
|
||||
class DirectoryMonitorSource : public SourceWithProgress
|
||||
{
|
||||
public:
|
||||
explicit DirectoryMonitorBlockInputStream(const String & file_name)
|
||||
: in(file_name)
|
||||
, decompressing_in(in)
|
||||
, block_in(decompressing_in, DBMS_TCP_PROTOCOL_VERSION)
|
||||
, log{&Poco::Logger::get("DirectoryMonitorBlockInputStream")}
|
||||
{
|
||||
readDistributedHeader(in, log);
|
||||
|
||||
block_in.readPrefix();
|
||||
first_block = block_in.read();
|
||||
header = first_block.cloneEmpty();
|
||||
struct Data
|
||||
{
|
||||
std::unique_ptr<ReadBufferFromFile> in;
|
||||
std::unique_ptr<CompressedReadBuffer> decompressing_in;
|
||||
std::unique_ptr<NativeBlockInputStream> block_in;
|
||||
|
||||
Poco::Logger * log = nullptr;
|
||||
|
||||
Block first_block;
|
||||
|
||||
explicit Data(const String & file_name)
|
||||
{
|
||||
in = std::make_unique<ReadBufferFromFile>(file_name);
|
||||
decompressing_in = std::make_unique<CompressedReadBuffer>(*in);
|
||||
block_in = std::make_unique<NativeBlockInputStream>(*decompressing_in, DBMS_TCP_PROTOCOL_VERSION);
|
||||
log = &Poco::Logger::get("DirectoryMonitorSource");
|
||||
|
||||
readDistributedHeader(*in, log);
|
||||
|
||||
block_in->readPrefix();
|
||||
first_block = block_in->read();
|
||||
}
|
||||
|
||||
Data(Data &&) = default;
|
||||
};
|
||||
|
||||
explicit DirectoryMonitorSource(const String & file_name)
|
||||
: DirectoryMonitorSource(Data(file_name))
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return "DirectoryMonitor"; }
|
||||
explicit DirectoryMonitorSource(Data data_)
|
||||
: SourceWithProgress(data_.first_block.cloneEmpty())
|
||||
, data(std::move(data_))
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return "DirectoryMonitorSource"; }
|
||||
|
||||
protected:
|
||||
Block getHeader() const override { return header; }
|
||||
Block readImpl() override
|
||||
Chunk generate() override
|
||||
{
|
||||
if (first_block)
|
||||
return std::move(first_block);
|
||||
if (data.first_block)
|
||||
{
|
||||
size_t num_rows = data.first_block.rows();
|
||||
Chunk res(data.first_block.getColumns(), num_rows);
|
||||
data.first_block.clear();
|
||||
return res;
|
||||
}
|
||||
|
||||
return block_in.read();
|
||||
auto block = data.block_in->read();
|
||||
if (!block)
|
||||
{
|
||||
data.block_in->readSuffix();
|
||||
return {};
|
||||
}
|
||||
|
||||
size_t num_rows = block.rows();
|
||||
return Chunk(block.getColumns(), num_rows);
|
||||
}
|
||||
|
||||
void readSuffix() override { block_in.readSuffix(); }
|
||||
|
||||
private:
|
||||
ReadBufferFromFile in;
|
||||
CompressedReadBuffer decompressing_in;
|
||||
NativeBlockInputStream block_in;
|
||||
|
||||
Block first_block;
|
||||
Block header;
|
||||
|
||||
Poco::Logger * log;
|
||||
Data data;
|
||||
};
|
||||
|
||||
BlockInputStreamPtr StorageDistributedDirectoryMonitor::createStreamFromFile(const String & file_name)
|
||||
ProcessorPtr StorageDistributedDirectoryMonitor::createSourceFromFile(const String & file_name)
|
||||
{
|
||||
return std::make_shared<DirectoryMonitorBlockInputStream>(file_name);
|
||||
return std::make_shared<DirectoryMonitorSource>(file_name);
|
||||
}
|
||||
|
||||
bool StorageDistributedDirectoryMonitor::addAndSchedule(size_t file_size, size_t ms)
|
||||
|
@ -21,6 +21,9 @@ class StorageDistributed;
|
||||
class ActionBlocker;
|
||||
class BackgroundSchedulePool;
|
||||
|
||||
class IProcessor;
|
||||
using ProcessorPtr = std::shared_ptr<IProcessor>;
|
||||
|
||||
/** Details of StorageDistributed.
|
||||
* This type is not designed for standalone use.
|
||||
*/
|
||||
@ -45,7 +48,7 @@ public:
|
||||
|
||||
void shutdownAndDropAllData();
|
||||
|
||||
static BlockInputStreamPtr createStreamFromFile(const String & file_name);
|
||||
static ProcessorPtr createSourceFromFile(const String & file_name);
|
||||
|
||||
/// For scheduling via DistributedBlockOutputStream
|
||||
bool addAndSchedule(size_t file_size, size_t ms);
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Common/getNumberOfPhysicalCPUCores.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Processors/Sources/SourceFromInputStream.h>
|
||||
#include <librdkafka/rdkafka.h>
|
||||
@ -746,10 +747,11 @@ void registerStorageKafka(StorageFactory & factory)
|
||||
#undef CHECK_KAFKA_STORAGE_ARGUMENT
|
||||
|
||||
auto num_consumers = kafka_settings->kafka_num_consumers.value;
|
||||
auto physical_cpu_cores = getNumberOfPhysicalCPUCores();
|
||||
|
||||
if (num_consumers > 16)
|
||||
if (num_consumers > physical_cpu_cores)
|
||||
{
|
||||
throw Exception("Number of consumers can not be bigger than 16", ErrorCodes::BAD_ARGUMENTS);
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Number of consumers can not be bigger than {}", physical_cpu_cores);
|
||||
}
|
||||
else if (num_consumers < 1)
|
||||
{
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
#include <Disks/IDisk.h>
|
||||
|
||||
#include <DataStreams/RemoteBlockInputStream.h>
|
||||
#include <DataStreams/RemoteQueryExecutor.h>
|
||||
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeUUID.h>
|
||||
@ -57,6 +57,7 @@
|
||||
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
|
||||
#include <Processors/QueryPlan/ReadFromPreparedSource.h>
|
||||
#include <Processors/Sources/NullSource.h>
|
||||
#include <Processors/Sources/RemoteSource.h>
|
||||
#include <Processors/Sources/SourceFromInputStream.h>
|
||||
#include <Processors/NullSink.h>
|
||||
|
||||
@ -739,9 +740,10 @@ QueryPipelinePtr StorageDistributed::distributedWrite(const ASTInsertQuery & que
|
||||
"Expected exactly one connection for shard " + toString(shard_info.shard_num), ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
/// INSERT SELECT query returns empty block
|
||||
auto in_stream = std::make_shared<RemoteBlockInputStream>(std::move(connections), new_query_str, Block{}, local_context);
|
||||
auto remote_query_executor
|
||||
= std::make_shared<RemoteQueryExecutor>(shard_info.pool, std::move(connections), new_query_str, Block{}, local_context);
|
||||
pipelines.emplace_back(std::make_unique<QueryPipeline>());
|
||||
pipelines.back()->init(Pipe(std::make_shared<SourceFromInputStream>(std::move(in_stream))));
|
||||
pipelines.back()->init(Pipe(std::make_shared<RemoteSource>(remote_query_executor, false, settings.async_socket_for_remote)));
|
||||
pipelines.back()->setSinks([](const Block & header, QueryPipeline::StreamType) -> ProcessorPtr
|
||||
{
|
||||
return std::make_shared<EmptySink>(header);
|
||||
|
@ -16,7 +16,7 @@
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataStreams/IBlockOutputStream.h>
|
||||
#include <DataStreams/AddingDefaultsBlockInputStream.h>
|
||||
#include <Processors/Transforms/AddingDefaultsTransform.h>
|
||||
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
@ -34,6 +34,7 @@
|
||||
#include <Processors/Formats/InputStreamFromInputFormat.h>
|
||||
#include <Processors/Sources/NullSource.h>
|
||||
#include <Processors/Pipe.h>
|
||||
#include <Processors/Executors/PullingPipelineExecutor.h>
|
||||
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
@ -186,7 +187,7 @@ StorageFile::StorageFile(const std::string & table_path_, const std::string & us
|
||||
throw Exception("Cannot get table structure from file, because no files match specified name", ErrorCodes::INCORRECT_FILE_NAME);
|
||||
|
||||
auto & first_path = paths[0];
|
||||
Block header = StorageDistributedDirectoryMonitor::createStreamFromFile(first_path)->getHeader();
|
||||
Block header = StorageDistributedDirectoryMonitor::createSourceFromFile(first_path)->getOutputs().front().getHeader();
|
||||
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
auto columns = ColumnsDescription(header.getNamesAndTypesList());
|
||||
@ -347,7 +348,9 @@ public:
|
||||
/// Special case for distributed format. Defaults are not needed here.
|
||||
if (storage->format_name == "Distributed")
|
||||
{
|
||||
reader = StorageDistributedDirectoryMonitor::createStreamFromFile(current_path);
|
||||
pipeline = std::make_unique<QueryPipeline>();
|
||||
pipeline->init(Pipe(StorageDistributedDirectoryMonitor::createSourceFromFile(current_path)));
|
||||
reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@ -385,24 +388,31 @@ public:
|
||||
auto format = FormatFactory::instance().getInput(
|
||||
storage->format_name, *read_buf, get_block_for_format(), context, max_block_size, storage->format_settings);
|
||||
|
||||
reader = std::make_shared<InputStreamFromInputFormat>(format);
|
||||
pipeline = std::make_unique<QueryPipeline>();
|
||||
pipeline->init(Pipe(format));
|
||||
|
||||
if (columns_description.hasDefaults())
|
||||
reader = std::make_shared<AddingDefaultsBlockInputStream>(reader, columns_description, context);
|
||||
{
|
||||
pipeline->addSimpleTransform([&](const Block & header)
|
||||
{
|
||||
return std::make_shared<AddingDefaultsTransform>(header, columns_description, *format, context);
|
||||
});
|
||||
}
|
||||
|
||||
reader->readPrefix();
|
||||
reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
|
||||
}
|
||||
|
||||
if (auto res = reader->read())
|
||||
Chunk chunk;
|
||||
if (reader->pull(chunk))
|
||||
{
|
||||
Columns columns = res.getColumns();
|
||||
UInt64 num_rows = res.rows();
|
||||
//Columns columns = res.getColumns();
|
||||
UInt64 num_rows = chunk.getNumRows();
|
||||
|
||||
/// Enrich with virtual columns.
|
||||
if (files_info->need_path_column)
|
||||
{
|
||||
auto column = DataTypeString().createColumnConst(num_rows, current_path);
|
||||
columns.push_back(column->convertToFullColumnIfConst());
|
||||
chunk.addColumn(column->convertToFullColumnIfConst());
|
||||
}
|
||||
|
||||
if (files_info->need_file_column)
|
||||
@ -411,10 +421,10 @@ public:
|
||||
auto file_name = current_path.substr(last_slash_pos + 1);
|
||||
|
||||
auto column = DataTypeString().createColumnConst(num_rows, std::move(file_name));
|
||||
columns.push_back(column->convertToFullColumnIfConst());
|
||||
chunk.addColumn(column->convertToFullColumnIfConst());
|
||||
}
|
||||
|
||||
return Chunk(std::move(columns), num_rows);
|
||||
return chunk;
|
||||
}
|
||||
|
||||
/// Read only once for file descriptor.
|
||||
@ -422,8 +432,8 @@ public:
|
||||
finished_generate = true;
|
||||
|
||||
/// Close file prematurely if stream was ended.
|
||||
reader->readSuffix();
|
||||
reader.reset();
|
||||
pipeline.reset();
|
||||
read_buf.reset();
|
||||
}
|
||||
|
||||
@ -438,7 +448,8 @@ private:
|
||||
String current_path;
|
||||
Block sample_block;
|
||||
std::unique_ptr<ReadBuffer> read_buf;
|
||||
BlockInputStreamPtr reader;
|
||||
std::unique_ptr<QueryPipeline> pipeline;
|
||||
std::unique_ptr<PullingPipelineExecutor> reader;
|
||||
|
||||
ColumnsDescription columns_description;
|
||||
|
||||
|
@ -45,9 +45,9 @@ public:
|
||||
};
|
||||
|
||||
|
||||
void StorageInput::setInputStream(BlockInputStreamPtr input_stream_)
|
||||
void StorageInput::setPipe(Pipe pipe_)
|
||||
{
|
||||
input_stream = input_stream_;
|
||||
pipe = std::move(pipe_);
|
||||
}
|
||||
|
||||
|
||||
@ -70,10 +70,10 @@ Pipe StorageInput::read(
|
||||
return Pipe(std::make_shared<StorageInputSource>(query_context, metadata_snapshot->getSampleBlock()));
|
||||
}
|
||||
|
||||
if (!input_stream)
|
||||
if (pipe.empty())
|
||||
throw Exception("Input stream is not initialized, input() must be used only in INSERT SELECT query", ErrorCodes::INVALID_USAGE_OF_INPUT);
|
||||
|
||||
return Pipe(std::make_shared<SourceFromInputStream>(input_stream));
|
||||
return std::move(pipe);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <Storages/IStorage.h>
|
||||
#include <common/shared_ptr_helper.h>
|
||||
#include <Processors/Pipe.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -15,7 +16,7 @@ public:
|
||||
String getName() const override { return "Input"; }
|
||||
|
||||
/// A table will read from this stream.
|
||||
void setInputStream(BlockInputStreamPtr input_stream_);
|
||||
void setPipe(Pipe pipe_);
|
||||
|
||||
Pipe read(
|
||||
const Names & column_names,
|
||||
@ -27,7 +28,7 @@ public:
|
||||
unsigned num_streams) override;
|
||||
|
||||
private:
|
||||
BlockInputStreamPtr input_stream;
|
||||
Pipe pipe;
|
||||
|
||||
protected:
|
||||
StorageInput(const StorageID & table_id, const ColumnsDescription & columns_);
|
||||
|
@ -19,9 +19,12 @@
|
||||
#include <Formats/FormatFactory.h>
|
||||
|
||||
#include <DataStreams/IBlockOutputStream.h>
|
||||
#include <DataStreams/AddingDefaultsBlockInputStream.h>
|
||||
#include <Processors/Transforms/AddingDefaultsTransform.h>
|
||||
#include <DataStreams/narrowBlockInputStreams.h>
|
||||
|
||||
#include <Processors/QueryPipeline.h>
|
||||
#include <Processors/Executors/PullingPipelineExecutor.h>
|
||||
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
|
||||
#include <aws/core/auth/AWSCredentials.h>
|
||||
@ -206,10 +209,18 @@ bool StorageS3Source::initialize()
|
||||
read_buf = wrapReadBufferWithCompressionMethod(
|
||||
std::make_unique<ReadBufferFromS3>(client, bucket, current_key, max_single_read_retries), chooseCompressionMethod(current_key, compression_hint));
|
||||
auto input_format = FormatFactory::instance().getInput(format, *read_buf, sample_block, getContext(), max_block_size);
|
||||
reader = std::make_shared<InputStreamFromInputFormat>(input_format);
|
||||
pipeline = std::make_unique<QueryPipeline>();
|
||||
pipeline->init(Pipe(input_format));
|
||||
|
||||
if (columns_desc.hasDefaults())
|
||||
reader = std::make_shared<AddingDefaultsBlockInputStream>(reader, columns_desc, getContext());
|
||||
{
|
||||
pipeline->addSimpleTransform([&](const Block & header)
|
||||
{
|
||||
return std::make_shared<AddingDefaultsTransform>(header, columns_desc, *input_format, getContext());
|
||||
});
|
||||
}
|
||||
|
||||
reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
|
||||
|
||||
initialized = false;
|
||||
return true;
|
||||
@ -225,31 +236,25 @@ Chunk StorageS3Source::generate()
|
||||
if (!reader)
|
||||
return {};
|
||||
|
||||
if (!initialized)
|
||||
Chunk chunk;
|
||||
if (reader->pull(chunk))
|
||||
{
|
||||
reader->readPrefix();
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
if (auto block = reader->read())
|
||||
{
|
||||
auto columns = block.getColumns();
|
||||
UInt64 num_rows = block.rows();
|
||||
UInt64 num_rows = chunk.getNumRows();
|
||||
|
||||
if (with_path_column)
|
||||
columns.push_back(DataTypeString().createColumnConst(num_rows, file_path)->convertToFullColumnIfConst());
|
||||
chunk.addColumn(DataTypeString().createColumnConst(num_rows, file_path)->convertToFullColumnIfConst());
|
||||
if (with_file_column)
|
||||
{
|
||||
size_t last_slash_pos = file_path.find_last_of('/');
|
||||
columns.push_back(DataTypeString().createColumnConst(num_rows, file_path.substr(
|
||||
chunk.addColumn(DataTypeString().createColumnConst(num_rows, file_path.substr(
|
||||
last_slash_pos + 1))->convertToFullColumnIfConst());
|
||||
}
|
||||
|
||||
return Chunk(std::move(columns), num_rows);
|
||||
return chunk;
|
||||
}
|
||||
|
||||
reader->readSuffix();
|
||||
reader.reset();
|
||||
pipeline.reset();
|
||||
read_buf.reset();
|
||||
|
||||
if (!initialize())
|
||||
|
@ -27,6 +27,7 @@ namespace Aws::S3
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class PullingPipelineExecutor;
|
||||
class StorageS3SequentialSource;
|
||||
class StorageS3Source : public SourceWithProgress, WithContext
|
||||
{
|
||||
@ -79,7 +80,8 @@ private:
|
||||
|
||||
|
||||
std::unique_ptr<ReadBuffer> read_buf;
|
||||
BlockInputStreamPtr reader;
|
||||
std::unique_ptr<QueryPipeline> pipeline;
|
||||
std::unique_ptr<PullingPipelineExecutor> reader;
|
||||
bool initialized = false;
|
||||
bool with_file_column = false;
|
||||
bool with_path_column = false;
|
||||
|
@ -26,7 +26,7 @@
|
||||
#include <Interpreters/getTableExpressions.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <DataStreams/IBlockOutputStream.h>
|
||||
#include <DataStreams/AddingDefaultsBlockInputStream.h>
|
||||
#include <Processors/Transforms/AddingDefaultsTransform.h>
|
||||
#include <DataStreams/narrowBlockInputStreams.h>
|
||||
#include <Processors/Formats/InputStreamFromInputFormat.h>
|
||||
#include <Processors/Pipe.h>
|
||||
@ -126,8 +126,15 @@ Pipe StorageS3Cluster::read(
|
||||
/// For unknown reason global context is passed to IStorage::read() method
|
||||
/// So, task_identifier is passed as constructor argument. It is more obvious.
|
||||
auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
|
||||
*connections.back(), queryToString(query_info.query), header, context,
|
||||
/*throttler=*/nullptr, scalars, Tables(), processed_stage, callback);
|
||||
*connections.back(),
|
||||
queryToString(query_info.query),
|
||||
header,
|
||||
context,
|
||||
/*throttler=*/nullptr,
|
||||
scalars,
|
||||
Tables(),
|
||||
processed_stage,
|
||||
callback);
|
||||
|
||||
pipes.emplace_back(std::make_shared<RemoteSource>(remote_query_executor, add_agg_info, false));
|
||||
}
|
||||
|
@ -16,11 +16,12 @@
|
||||
#include <Processors/Formats/InputStreamFromInputFormat.h>
|
||||
|
||||
#include <DataStreams/IBlockOutputStream.h>
|
||||
#include <DataStreams/AddingDefaultsBlockInputStream.h>
|
||||
#include <Processors/Transforms/AddingDefaultsTransform.h>
|
||||
|
||||
#include <Poco/Net/HTTPRequest.h>
|
||||
#include <Processors/Sources/SourceWithProgress.h>
|
||||
#include <Processors/Pipe.h>
|
||||
#include <Processors/QueryPipeline.h>
|
||||
#include <Processors/Executors/PullingPipelineExecutor.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <algorithm>
|
||||
|
||||
@ -104,8 +105,15 @@ namespace
|
||||
compression_method);
|
||||
|
||||
auto input_format = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size, format_settings);
|
||||
reader = std::make_shared<InputStreamFromInputFormat>(input_format);
|
||||
reader = std::make_shared<AddingDefaultsBlockInputStream>(reader, columns, context);
|
||||
pipeline = std::make_unique<QueryPipeline>();
|
||||
pipeline->init(Pipe(input_format));
|
||||
|
||||
pipeline->addSimpleTransform([&](const Block & cur_header)
|
||||
{
|
||||
return std::make_shared<AddingDefaultsTransform>(cur_header, columns, *input_format, context);
|
||||
});
|
||||
|
||||
reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
@ -118,15 +126,11 @@ namespace
|
||||
if (!reader)
|
||||
return {};
|
||||
|
||||
if (!initialized)
|
||||
reader->readPrefix();
|
||||
Chunk chunk;
|
||||
if (reader->pull(chunk))
|
||||
return chunk;
|
||||
|
||||
initialized = true;
|
||||
|
||||
if (auto block = reader->read())
|
||||
return Chunk(block.getColumns(), block.rows());
|
||||
|
||||
reader->readSuffix();
|
||||
pipeline->reset();
|
||||
reader.reset();
|
||||
|
||||
return {};
|
||||
@ -135,8 +139,8 @@ namespace
|
||||
private:
|
||||
String name;
|
||||
std::unique_ptr<ReadBuffer> read_buf;
|
||||
BlockInputStreamPtr reader;
|
||||
bool initialized = false;
|
||||
std::unique_ptr<QueryPipeline> pipeline;
|
||||
std::unique_ptr<PullingPipelineExecutor> reader;
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -85,8 +85,8 @@ ColumnsDescription ITableFunctionFileLike::getActualTableStructure(ContextPtr co
|
||||
Strings paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read);
|
||||
if (paths.empty())
|
||||
throw Exception("Cannot get table structure from file, because no files match specified name", ErrorCodes::INCORRECT_FILE_NAME);
|
||||
auto read_stream = StorageDistributedDirectoryMonitor::createStreamFromFile(paths[0]);
|
||||
return ColumnsDescription{read_stream->getHeader().getNamesAndTypesList()};
|
||||
auto read_stream = StorageDistributedDirectoryMonitor::createSourceFromFile(paths[0]);
|
||||
return ColumnsDescription{read_stream->getOutputs().front().getHeader().getNamesAndTypesList()};
|
||||
}
|
||||
return parseColumnsListFromString(structure, context);
|
||||
}
|
||||
|
@ -49,6 +49,7 @@ MESSAGES_TO_RETRY = [
|
||||
"ConnectionPoolWithFailover: Connection failed at try",
|
||||
"DB::Exception: New table appeared in database being dropped or detached. Try again",
|
||||
"is already started to be removing by another replica right now",
|
||||
"Shutdown is called for table", # It happens in SYSTEM SYNC REPLICA query if session with ZooKeeper is being reinitialized.
|
||||
DISTRIBUTED_DDL_TIMEOUT_MSG # FIXME
|
||||
]
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
<openSSL>
|
||||
<client>
|
||||
<invalidCertificateHandler replace="replace">
|
||||
<name>AcceptCertificateHandler</name> <!-- For tests only-->
|
||||
<name>AcceptCertificateHandler</name> <!-- For tests only -->
|
||||
</invalidCertificateHandler>
|
||||
</client>
|
||||
</openSSL>
|
||||
|
@ -261,12 +261,31 @@ class ClickhouseIntegrationTestsRunner:
|
||||
|
||||
def _get_all_tests(self, repo_path):
|
||||
image_cmd = self._get_runner_image_cmd(repo_path)
|
||||
cmd = "cd {}/tests/integration && ./runner --tmpfs {} ' --setup-plan' | grep '::' | sed 's/ (fixtures used:.*//g' | sed 's/^ *//g' | sed 's/ *$//g' | grep -v 'SKIPPED' | sort -u > all_tests.txt".format(repo_path, image_cmd)
|
||||
out_file = "all_tests.txt"
|
||||
out_file_full = "all_tests_full.txt"
|
||||
cmd = "cd {repo_path}/tests/integration && " \
|
||||
"./runner --tmpfs {image_cmd} ' --setup-plan' " \
|
||||
"| tee {out_file_full} | grep '::' | sed 's/ (fixtures used:.*//g' | sed 's/^ *//g' | sed 's/ *$//g' " \
|
||||
"| grep -v 'SKIPPED' | sort -u > {out_file}".format(
|
||||
repo_path=repo_path, image_cmd=image_cmd, out_file=out_file, out_file_full=out_file_full)
|
||||
|
||||
logging.info("Getting all tests with cmd '%s'", cmd)
|
||||
subprocess.check_call(cmd, shell=True) # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL
|
||||
|
||||
all_tests_file_path = "{}/tests/integration/all_tests.txt".format(repo_path)
|
||||
all_tests_file_path = "{repo_path}/tests/integration/{out_file}".format(repo_path=repo_path, out_file=out_file)
|
||||
if not os.path.isfile(all_tests_file_path) or os.path.getsize(all_tests_file_path) == 0:
|
||||
all_tests_full_file_path = "{repo_path}/tests/integration/{out_file}".format(repo_path=repo_path, out_file=out_file_full)
|
||||
if os.path.isfile(all_tests_full_file_path):
|
||||
# log runner output
|
||||
logging.info("runner output:")
|
||||
with open(all_tests_full_file_path, 'r') as all_tests_full_file:
|
||||
for line in all_tests_full_file:
|
||||
line = line.rstrip()
|
||||
if line:
|
||||
logging.info("runner output: %s", line)
|
||||
else:
|
||||
logging.info("runner output '%s' is empty", all_tests_full_file_path)
|
||||
|
||||
raise Exception("There is something wrong with getting all tests list: file '{}' is empty or does not exist.".format(all_tests_file_path))
|
||||
|
||||
all_tests = []
|
||||
@ -376,7 +395,7 @@ class ClickhouseIntegrationTestsRunner:
|
||||
|
||||
image_cmd = self._get_runner_image_cmd(repo_path)
|
||||
test_group_str = test_group.replace('/', '_').replace('.', '_')
|
||||
|
||||
|
||||
log_paths = []
|
||||
test_data_dirs = {}
|
||||
|
||||
|
@ -0,0 +1,4 @@
|
||||
<?xml version="1.0"?>
|
||||
<yandex>
|
||||
<max_concurrent_queries>10000</max_concurrent_queries>
|
||||
</yandex>
|
36
tests/integration/test_async_drain_connection/test.py
Normal file
36
tests/integration/test_async_drain_connection/test.py
Normal file
@ -0,0 +1,36 @@
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from multiprocessing.dummy import Pool
|
||||
import pytest
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
node = cluster.add_instance("node", main_configs=["configs/config.xml"])
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def started_cluster():
|
||||
try:
|
||||
cluster.start()
|
||||
node.query(
|
||||
'create table t (number UInt64) engine = Distributed(test_cluster_two_shards, system, numbers);'
|
||||
)
|
||||
yield cluster
|
||||
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
def test_filled_async_drain_connection_pool(started_cluster):
|
||||
busy_pool = Pool(10)
|
||||
|
||||
def execute_query(i):
|
||||
for _ in range(100):
|
||||
node.query('select * from t where number = 0 limit 2;',
|
||||
settings={
|
||||
"sleep_in_receive_cancel_ms": 10000000,
|
||||
"max_execution_time": 5
|
||||
})
|
||||
|
||||
p = busy_pool.map(execute_query, range(10))
|
@ -23,10 +23,15 @@ join_use_nulls = 1
|
||||
-
|
||||
\N \N
|
||||
-
|
||||
1 1 \N \N
|
||||
2 2 \N \N
|
||||
-
|
||||
1 1 1 1
|
||||
2 2 \N \N
|
||||
-
|
||||
1 1 1 1
|
||||
-
|
||||
2 2 \N \N
|
||||
join_use_nulls = 0
|
||||
1 1
|
||||
2 2
|
||||
@ -49,7 +54,12 @@ join_use_nulls = 0
|
||||
-
|
||||
-
|
||||
-
|
||||
1 1 0 0
|
||||
2 2 0 0
|
||||
-
|
||||
1 1 1 1
|
||||
2 2 0 0
|
||||
-
|
||||
1 1 1 1
|
||||
-
|
||||
2 2 0 0
|
||||
|
@ -4,9 +4,8 @@ drop table if exists s;
|
||||
create table t(a Int64, b Int64) engine = Memory;
|
||||
create table s(a Int64, b Int64) engine = Memory;
|
||||
|
||||
insert into t values(1,1);
|
||||
insert into t values(2,2);
|
||||
insert into s values(1,1);
|
||||
insert into t values (1,1), (2,2);
|
||||
insert into s values (1,1);
|
||||
|
||||
select 'join_use_nulls = 1';
|
||||
set join_use_nulls = 1;
|
||||
@ -30,11 +29,13 @@ select * from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null;
|
||||
select '-';
|
||||
select s.* from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null;
|
||||
select '-';
|
||||
select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a; -- {serverError 403 }
|
||||
select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a;
|
||||
select '-';
|
||||
select t.*, s.* from t left join s on (s.a=t.a) order by t.a;
|
||||
select '-';
|
||||
select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2; -- {serverError 403 }
|
||||
select t.*, s.* from t left join s on (t.b=toInt64(1) and s.a=t.a) where s.b=1;
|
||||
select '-';
|
||||
select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where t.b=2;
|
||||
|
||||
select 'join_use_nulls = 0';
|
||||
set join_use_nulls = 0;
|
||||
@ -58,11 +59,13 @@ select '-';
|
||||
select '-';
|
||||
-- select s.* from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null; -- TODO
|
||||
select '-';
|
||||
select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a; -- {serverError 403 }
|
||||
select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a;
|
||||
select '-';
|
||||
select t.*, s.* from t left join s on (s.a=t.a) order by t.a;
|
||||
select '-';
|
||||
select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2; -- {serverError 403 }
|
||||
select t.*, s.* from t left join s on (t.b=toInt64(1) and s.a=t.a) where s.b=1;
|
||||
select '-';
|
||||
select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where t.b=2;
|
||||
|
||||
drop table t;
|
||||
drop table s;
|
||||
|
@ -89,7 +89,7 @@ idx10 ['This','is','a','test']
|
||||
23.00
|
||||
24.00
|
||||
=== Try load data from datapage_v2.snappy.parquet
|
||||
Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Not yet implemented: Unsupported encoding.: data for INSERT was parsed from stdin. (CANNOT_READ_ALL_DATA)
|
||||
Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Not yet implemented: Unsupported encoding.: While executing ParquetBlockInputFormat: data for INSERT was parsed from stdin. (CANNOT_READ_ALL_DATA)
|
||||
|
||||
=== Try load data from datatype-date32.parquet
|
||||
1925-01-01
|
||||
|
@ -11,7 +11,7 @@
|
||||
10
|
||||
11
|
||||
12
|
||||
13 fail: join predicates
|
||||
13
|
||||
14
|
||||
0.000000
|
||||
15 fail: correlated subquery
|
||||
|
@ -476,7 +476,7 @@ group by
|
||||
order by
|
||||
l_shipmode;
|
||||
|
||||
select 13, 'fail: join predicates'; -- TODO: Invalid expression for JOIN ON
|
||||
select 13;
|
||||
select
|
||||
c_count,
|
||||
count(*) as custdist
|
||||
@ -484,7 +484,7 @@ from
|
||||
(
|
||||
select
|
||||
c_custkey,
|
||||
count(o_orderkey)
|
||||
count(o_orderkey) as c_count
|
||||
from
|
||||
customer left outer join orders on
|
||||
c_custkey = o_custkey
|
||||
@ -496,7 +496,7 @@ group by
|
||||
c_count
|
||||
order by
|
||||
custdist desc,
|
||||
c_count desc; -- { serverError 403 }
|
||||
c_count desc;
|
||||
|
||||
select 14;
|
||||
select
|
||||
|
@ -4,8 +4,8 @@
|
||||
4 4
|
||||
mt 0 0_1_1_0 2
|
||||
rmt 0 0_0_0_0 2
|
||||
1 1
|
||||
2 2
|
||||
1 s1
|
||||
2 s2
|
||||
mt 0 0_1_1_0 2
|
||||
rmt 0 0_3_3_0 2
|
||||
0000000000 UPDATE s = concat(\'s\', toString(n)) WHERE 1 [] 0 1
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user