Merge branch 'master' into romanzhukov-DOCSUP-11551-adding_new_third-party

This commit is contained in:
romanzhukov 2021-07-24 20:08:24 +03:00
commit d7881aa588
137 changed files with 2181 additions and 1361 deletions

View File

@ -13,6 +13,3 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation.
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [ClickHouse Meetup by ByteDance (online)](https://www.meetup.com/ByteDanceDev-group/events/279543467/) on 23 July 2021.

View File

@ -0,0 +1,24 @@
#pragma once
#include <vector>
/// Removes duplicates from a container without changing the order of its elements.
/// Keeps the last occurrence of each element.
/// Should NOT be used for containers with a lot of elements because it has O(N^2) complexity.
template <typename T>
void removeDuplicatesKeepLast(std::vector<T> & vec)
{
auto begin = vec.begin();
auto end = vec.end();
auto new_begin = end;
for (auto current = end; current != begin;)
{
--current;
if (std::find(new_begin, end, *current) == end)
{
--new_begin;
if (new_begin != current)
*new_begin = *current;
}
}
vec.erase(begin, new_begin);
}

View File

@ -164,6 +164,10 @@ fi
# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
# Watchdog is launched by default, but does not send SIGINT to the main process,
# so the container can't be finished by ctrl+c
CLICKHOUSE_WATCHDOG_ENABLE=${CLICKHOUSE_WATCHDOG_ENABLE:-0}
export CLICKHOUSE_WATCHDOG_ENABLE
exec $gosu /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" "$@"
fi

View File

@ -194,6 +194,10 @@ continue
jobs
pstree -aspgT
server_exit_code=0
wait $server_pid || server_exit_code=$?
echo "Server exit code is $server_exit_code"
# Make files with status and description we'll show for this check on Github.
task_exit_code=$fuzzer_exit_code
if [ "$server_died" == 1 ]

View File

@ -1196,7 +1196,7 @@ create table changes engine File(TSV, 'metrics/changes.tsv') as
if(left > right, left / right, right / left) times_diff
from metrics
group by metric
having abs(diff) > 0.05 and isFinite(diff)
having abs(diff) > 0.05 and isFinite(diff) and isFinite(times_diff)
)
order by diff desc
;

View File

@ -5,6 +5,8 @@ toc_title: MaterializeMySQL
# MaterializeMySQL {#materialize-mysql}
**This is experimental feature that should not be used in production.**
Creates ClickHouse database with all the tables existing in MySQL, and all the data in those tables.
ClickHouse server works as MySQL replica. It reads binlog and performs DDL and DML queries.

View File

@ -30,21 +30,25 @@ Other common parameters are inherited from clickhouse-server config (`listen_hos
Internal coordination settings are located in `<keeper_server>.<coordination_settings>` section:
- `operation_timeout_ms` — timeout for a single client operation
- `session_timeout_ms` — timeout for client session
- `dead_session_check_period_ms` — how often clickhouse-keeper check dead sessions and remove them
- `heart_beat_interval_ms` — how often a clickhouse-keeper leader will send heartbeats to followers
- `election_timeout_lower_bound_ms` — if follower didn't receive heartbeats from the leader in this interval, then it can initiate leader election
- `election_timeout_upper_bound_ms` — if follower didn't receive heartbeats from the leader in this interval, then it must initiate leader election
- `rotate_log_storage_interval` — how many logs to store in a single file
- `reserved_log_items` — how many coordination logs to store before compaction
- `snapshot_distance` — how often clickhouse-keeper will create new snapshots (in the number of logs)
- `snapshots_to_keep` — how many snapshots to keep
- `stale_log_gap` — the threshold when leader consider follower as stale and send snapshot to it instead of logs
- `force_sync` — call `fsync` on each write to coordination log
- `raft_logs_level` — text logging level about coordination (trace, debug, and so on)
- `shutdown_timeout` — wait to finish internal connections and shutdown
- `startup_timeout` — if the server doesn't connect to other quorum participants in the specified timeout it will terminate
- `operation_timeout_ms` — timeout for a single client operation (default: 10000)
- `session_timeout_ms` — timeout for client session (default: 30000)
- `dead_session_check_period_ms` — how often clickhouse-keeper check dead sessions and remove them (default: 500)
- `heart_beat_interval_ms` — how often a clickhouse-keeper leader will send heartbeats to followers (default: 500)
- `election_timeout_lower_bound_ms` — if follower didn't receive heartbeats from the leader in this interval, then it can initiate leader election (default: 1000)
- `election_timeout_upper_bound_ms` — if follower didn't receive heartbeats from the leader in this interval, then it must initiate leader election (default: 2000)
- `rotate_log_storage_interval` — how many log records to store in a single file (default: 100000)
- `reserved_log_items` — how many coordination log records to store before compaction (default: 100000)
- `snapshot_distance` — how often clickhouse-keeper will create new snapshots (in the number of records in logs) (default: 100000)
- `snapshots_to_keep` — how many snapshots to keep (default: 3)
- `stale_log_gap` — the threshold when leader consider follower as stale and send snapshot to it instead of logs (default: 10000)
- `fresh_log_gap` - when node became fresh (default: 200)
- `max_requests_batch_size` - max size of batch in requests count before it will be sent to RAFT (default: 100)
- `force_sync` — call `fsync` on each write to coordination log (default: true)
- `quorum_reads` - execute read requests as writes through whole RAFT consesus with similar speed (default: false)
- `raft_logs_level` — text logging level about coordination (trace, debug, and so on) (default: system default)
- `auto_forwarding` - allow to forward write requests from followers to leader (default: true)
- `shutdown_timeout` — wait to finish internal connections and shutdown (ms) (default: 5000)
- `startup_timeout` — if the server doesn't connect to other quorum participants in the specified timeout it will terminate (ms) (default: 30000)
Quorum configuration is located in `<keeper_server>.<raft_configuration>` section and contain servers description. The only parameter for the whole quorum is `secure`, which enables encrypted connection for communication between quorum participants. The main parameters for each `<server>` are:

View File

@ -716,7 +716,7 @@ Keys for server/client settings:
- extendedVerification Automatically extended verification of certificates after the session ends. Acceptable values: `true`, `false`.
- requireTLSv1 Require a TLSv1 connection. Acceptable values: `true`, `false`.
- requireTLSv1_1 Require a TLSv1.1 connection. Acceptable values: `true`, `false`.
- requireTLSv1 Require a TLSv1.2 connection. Acceptable values: `true`, `false`.
- requireTLSv1_2 Require a TLSv1.2 connection. Acceptable values: `true`, `false`.
- fips Activates OpenSSL FIPS mode. Supported if the librarys OpenSSL version supports FIPS.
- privateKeyPassphraseHandler Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: `<privateKeyPassphraseHandler>`, `<name>KeyFileHandler</name>`, `<options><password>test</password></options>`, `</privateKeyPassphraseHandler>`.
- invalidCertificateHandler Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: `<invalidCertificateHandler> <name>ConsoleCertificateHandler</name> </invalidCertificateHandler>` .

View File

@ -13,7 +13,7 @@ To revoke privileges, use the [REVOKE](../../sql-reference/statements/revoke.md)
## Granting Privilege Syntax {#grant-privigele-syntax}
``` sql
GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION]
GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION]
```
- `privilege` — Type of privilege.
@ -21,17 +21,19 @@ GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.ta
- `user` — ClickHouse user account.
The `WITH GRANT OPTION` clause grants `user` or `role` with permission to execute the `GRANT` query. Users can grant privileges of the same scope they have and less.
The `WITH REPLACE OPTION` clause replace old privileges by new privileges for the `user` or `role`, if not specified it is append privileges.
## Assigning Role Syntax {#assign-role-syntax}
``` sql
GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_USER} [,...] [WITH ADMIN OPTION]
GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_USER} [,...] [WITH ADMIN OPTION] [WITH REPLACE OPTION]
```
- `role` — ClickHouse user role.
- `user` — ClickHouse user account.
The `WITH ADMIN OPTION` clause grants [ADMIN OPTION](#admin-option-privilege) privilege to `user` or `role`.
The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or `role`, if not specified it is append roles.
## Usage {#grant-usage}

View File

@ -464,7 +464,7 @@ SSLのサポートは以下によって提供されます `libpoco` 図書館
- extendedVerification Automatically extended verification of certificates after the session ends. Acceptable values: `true`, `false`.
- requireTLSv1 Require a TLSv1 connection. Acceptable values: `true`, `false`.
- requireTLSv1_1 Require a TLSv1.1 connection. Acceptable values: `true`, `false`.
- requireTLSv1 Require a TLSv1.2 connection. Acceptable values: `true`, `false`.
- requireTLSv1_2 Require a TLSv1.2 connection. Acceptable values: `true`, `false`.
- fips Activates OpenSSL FIPS mode. Supported if the library's OpenSSL version supports FIPS.
- privateKeyPassphraseHandler Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: `<privateKeyPassphraseHandler>`, `<name>KeyFileHandler</name>`, `<options><password>test</password></options>`, `</privateKeyPassphraseHandler>`.
- invalidCertificateHandler Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: `<invalidCertificateHandler> <name>ConsoleCertificateHandler</name> </invalidCertificateHandler>` .

View File

@ -15,7 +15,7 @@ toc_title: GRANT
## 権限構文の付与 {#grant-privigele-syntax}
``` sql
GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION]
GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION]
```
- `privilege` — Type of privilege.
@ -23,17 +23,19 @@ GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.ta
- `user` — ClickHouse user account.
この `WITH GRANT OPTION` 句の付与 `user` または `role` 実行する許可を得て `GRANT` クエリ。 ユーザーは、持っているスコープとそれ以下の権限を付与できます。
この `WITH REPLACE OPTION` 句は `user`または` role`の新しい特権で古い特権を置き換えます, 指定しない場合は、古い特権を古いものに追加してください
## ロール構文の割り当て {#assign-role-syntax}
``` sql
GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_USER} [,...] [WITH ADMIN OPTION]
GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_USER} [,...] [WITH ADMIN OPTION] [WITH REPLACE OPTION]
```
- `role` — ClickHouse user role.
- `user` — ClickHouse user account.
この `WITH ADMIN OPTION` 句の付与 [ADMIN OPTION](#admin-option-privilege) への特権 `user` または `role`.
この `WITH REPLACE OPTION` 句は`user`または` role`の新しい役割によって古い役割を置き換えます, 指定しない場合は、古い特権を古いものに追加してください
## 使用法 {#grant-usage}

View File

@ -13,7 +13,7 @@ toc_title: GRANT
## Синтаксис присвоения привилегий {#grant-privigele-syntax}
```sql
GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION]
GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION]
```
- `privilege` — Тип привилегии
@ -21,18 +21,20 @@ GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.ta
- `user` — Пользователь ClickHouse.
`WITH GRANT OPTION` разрешает пользователю или роли выполнять запрос `GRANT`. Пользователь может выдавать только те привилегии, которые есть у него, той же или меньшей области действий.
`WITH REPLACE OPTION` заменяет все старые привилегии новыми привилегиями для `user` или `role`, Если не указано, добавьте новые привилегии для старых.
## Синтаксис назначения ролей {#assign-role-syntax}
```sql
GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_USER} [,...] [WITH ADMIN OPTION]
GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_USER} [,...] [WITH ADMIN OPTION] [WITH REPLACE OPTION]
```
- `role` — Роль пользователя ClickHouse.
- `user` — Пользователь ClickHouse.
`WITH ADMIN OPTION` присваивает привилегию [ADMIN OPTION](#admin-option-privilege) пользователю или роли.
`WITH REPLACE OPTION` заменяет все старые роли новыми ролями для пользователя `user` или `role`, Если не указано, добавьте новые роли в старые.
## Использование {#grant-usage}
@ -481,4 +483,3 @@ GRANT INSERT(x,y) ON db.table TO john
### ADMIN OPTION {#admin-option-privilege}
Привилегия `ADMIN OPTION` разрешает пользователю назначать свои роли другому пользователю.

View File

@ -462,7 +462,7 @@ SSL客户端/服务器配置。
- extendedVerification Automatically extended verification of certificates after the session ends. Acceptable values: `true`, `false`.
- requireTLSv1 Require a TLSv1 connection. Acceptable values: `true`, `false`.
- requireTLSv1_1 Require a TLSv1.1 connection. Acceptable values: `true`, `false`.
- requireTLSv1 Require a TLSv1.2 connection. Acceptable values: `true`, `false`.
- requireTLSv1_2 Require a TLSv1.2 connection. Acceptable values: `true`, `false`.
- fips Activates OpenSSL FIPS mode. Supported if the librarys OpenSSL version supports FIPS.
- privateKeyPassphraseHandler Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: `<privateKeyPassphraseHandler>`, `<name>KeyFileHandler</name>`, `<options><password>test</password></options>`, `</privateKeyPassphraseHandler>`.
- invalidCertificateHandler Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: `<invalidCertificateHandler> <name>ConsoleCertificateHandler</name> </invalidCertificateHandler>` .

View File

@ -12,7 +12,7 @@ toc_title: 授权操作
## 授权操作语法 {#grant-privigele-syntax}
``` sql
GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION]
GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION]
```
- `privilege` — 权限类型
@ -20,17 +20,19 @@ GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.ta
- `user` — 用户账号
`WITH GRANT OPTION` 授予 `user``role`执行 `GRANT` 操作的权限。用户可将在自身权限范围内的权限进行授权
`WITH REPLACE OPTION` 以当前sql里的新权限替代掉 `user``role`的旧权限,如果没有该选项则是追加授权。
## 角色分配的语法 {#assign-role-syntax}
``` sql
GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_USER} [,...] [WITH ADMIN OPTION]
GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_USER} [,...] [WITH ADMIN OPTION] [WITH REPLACE OPTION]
```
- `role` — 角色
- `user` — 用户
`WITH ADMIN OPTION` 授予 `user``role` 执行[ADMIN OPTION](#admin-option-privilege) 的权限
`WITH REPLACE OPTION` 以当前sql里的新role替代掉 `user``role`的旧role如果没有该选项则是追加roles。
## 用法 {#grant-usage}

View File

@ -271,7 +271,8 @@ private:
if (max_time > 0 && total_watch.elapsedSeconds() >= max_time)
{
std::cout << "Stopping launch of queries. Requested time limit is exhausted.\n";
std::cout << "Stopping launch of queries."
<< " Requested time limit " << max_time << " seconds is exhausted.\n";
return false;
}
@ -313,6 +314,7 @@ private:
}
catch (...)
{
shutdown = true;
pool.wait();
throw;
}
@ -368,8 +370,7 @@ private:
{
extracted = queue.tryPop(query, 100);
if (shutdown
|| (max_iterations && queries_executed == max_iterations))
if (shutdown || (max_iterations && queries_executed == max_iterations))
{
return;
}
@ -382,8 +383,9 @@ private:
}
catch (...)
{
std::cerr << "An error occurred while processing the query '"
<< query << "'.\n";
std::lock_guard lock(mutex);
std::cerr << "An error occurred while processing the query " << "'" << query << "'"
<< ": " << getCurrentExceptionMessage(false) << std::endl;
if (!continue_on_errors)
{
shutdown = true;

View File

@ -26,6 +26,9 @@
#include <boost/algorithm/string/replace.hpp>
#include <Poco/String.h>
#include <Poco/Util/Application.h>
#include <Processors/Formats/IInputFormat.h>
#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
#include <Processors/QueryPipeline.h>
#include <Columns/ColumnString.h>
#include <common/find_symbols.h>
#include <common/LineReader.h>
@ -55,8 +58,7 @@
#include <IO/Operators.h>
#include <IO/UseSSL.h>
#include <IO/WriteBufferFromOStream.h>
#include <DataStreams/AsynchronousBlockInputStream.h>
#include <DataStreams/AddingDefaultsBlockInputStream.h>
#include <Processors/Transforms/AddingDefaultsTransform.h>
#include <DataStreams/InternalTextLogsRowOutputStream.h>
#include <DataStreams/NullBlockOutputStream.h>
#include <Parsers/ASTCreateQuery.h>
@ -80,6 +82,7 @@
#include <Functions/registerFunctions.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <Formats/registerFormats.h>
#include <Formats/FormatFactory.h>
#include <Common/Config/configReadClient.h>
#include <Storages/ColumnsDescription.h>
#include <common/argsToConfig.h>
@ -422,6 +425,7 @@ private:
{TokenType::Semicolon, Replxx::Color::INTENSE},
{TokenType::Dot, Replxx::Color::INTENSE},
{TokenType::Asterisk, Replxx::Color::INTENSE},
{TokenType::HereDoc, Replxx::Color::CYAN},
{TokenType::Plus, Replxx::Color::INTENSE},
{TokenType::Minus, Replxx::Color::INTENSE},
{TokenType::Slash, Replxx::Color::INTENSE},
@ -447,8 +451,7 @@ private:
{TokenType::ErrorDoubleQuoteIsNotClosed, Replxx::Color::RED},
{TokenType::ErrorSinglePipeMark, Replxx::Color::RED},
{TokenType::ErrorWrongNumber, Replxx::Color::RED},
{ TokenType::ErrorMaxQuerySizeExceeded,
Replxx::Color::RED }};
{TokenType::ErrorMaxQuerySizeExceeded, Replxx::Color::RED }};
const Replxx::Color unknown_token_color = Replxx::Color::RED;
@ -1925,19 +1928,24 @@ private:
current_format = insert->format;
}
BlockInputStreamPtr block_input = context->getInputFormat(current_format, buf, sample, insert_format_max_block_size);
auto source = FormatFactory::instance().getInput(current_format, buf, sample, context, insert_format_max_block_size);
Pipe pipe(source);
if (columns_description.hasDefaults())
block_input = std::make_shared<AddingDefaultsBlockInputStream>(block_input, columns_description, context);
BlockInputStreamPtr async_block_input = std::make_shared<AsynchronousBlockInputStream>(block_input);
async_block_input->readPrefix();
while (true)
{
Block block = async_block_input->read();
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<AddingDefaultsTransform>(header, columns_description, *source, context);
});
}
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
PullingAsyncPipelineExecutor executor(pipeline);
Block block;
while (executor.pull(block))
{
/// Check if server send Log packet
receiveLogs();
@ -1949,18 +1957,18 @@ private:
* We're exiting with error, so it makes sense to kill the
* input stream without waiting for it to complete.
*/
async_block_input->cancel(true);
executor.cancel();
return;
}
connection->sendData(block);
processed_rows += block.rows();
if (!block)
break;
if (block)
{
connection->sendData(block);
processed_rows += block.rows();
}
}
async_block_input->readSuffix();
connection->sendData({});
}

View File

@ -15,8 +15,8 @@
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeUUID.h>
#include <Interpreters/Context.h>
#include <DataStreams/IBlockOutputStream.h>
#include <DataStreams/LimitBlockInputStream.h>
#include <Processors/Pipe.h>
#include <Processors/LimitTransform.h>
#include <Common/SipHash.h>
#include <Common/UTF8Helpers.h>
#include <Common/StringUtils/StringUtils.h>
@ -24,6 +24,10 @@
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
#include <Formats/registerFormats.h>
#include <Formats/FormatFactory.h>
#include <Processors/Formats/IInputFormat.h>
#include <Processors/QueryPipeline.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Core/Block.h>
#include <common/StringRef.h>
#include <common/DateLUT.h>
@ -1156,17 +1160,20 @@ try
if (!silent)
std::cerr << "Training models\n";
BlockInputStreamPtr input = context->getInputFormat(input_format, file_in, header, max_block_size);
Pipe pipe(FormatFactory::instance().getInput(input_format, file_in, header, context, max_block_size));
input->readPrefix();
while (Block block = input->read())
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
PullingPipelineExecutor executor(pipeline);
Block block;
while (executor.pull(block))
{
obfuscator.train(block.getColumns());
source_rows += block.rows();
if (!silent)
std::cerr << "Processed " << source_rows << " rows\n";
}
input->readSuffix();
}
obfuscator.finalize();
@ -1183,15 +1190,26 @@ try
file_in.seek(0, SEEK_SET);
BlockInputStreamPtr input = context->getInputFormat(input_format, file_in, header, max_block_size);
BlockOutputStreamPtr output = context->getOutputStreamParallelIfPossible(output_format, file_out, header);
Pipe pipe(FormatFactory::instance().getInput(input_format, file_in, header, context, max_block_size));
if (processed_rows + source_rows > limit)
input = std::make_shared<LimitBlockInputStream>(input, limit - processed_rows, 0);
{
pipe.addSimpleTransform([&](const Block & cur_header)
{
return std::make_shared<LimitTransform>(cur_header, limit - processed_rows, 0);
});
}
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
BlockOutputStreamPtr output = context->getOutputStreamParallelIfPossible(output_format, file_out, header);
PullingPipelineExecutor executor(pipeline);
input->readPrefix();
output->writePrefix();
while (Block block = input->read())
Block block;
while (executor.pull(block))
{
Columns columns = obfuscator.generate(block.getColumns());
output->write(header.cloneWithColumns(columns));
@ -1200,7 +1218,6 @@ try
std::cerr << "Processed " << processed_rows << " rows\n";
}
output->writeSuffix();
input->readSuffix();
obfuscator.updateSeed();
}

View File

@ -489,11 +489,12 @@ std::shared_ptr<const EnabledSettings> AccessControlManager::getEnabledSettings(
return settings_profiles_cache->getEnabledSettings(user_id, settings_from_user, enabled_roles, settings_from_enabled_roles);
}
std::shared_ptr<const SettingsChanges> AccessControlManager::getProfileSettings(const String & profile_name) const
std::shared_ptr<const SettingsProfilesInfo> AccessControlManager::getSettingsProfileInfo(const UUID & profile_id)
{
return settings_profiles_cache->getProfileSettings(profile_name);
return settings_profiles_cache->getSettingsProfileInfo(profile_id);
}
const ExternalAuthenticators & AccessControlManager::getExternalAuthenticators() const
{
return *external_authenticators;

View File

@ -32,8 +32,7 @@ class RowPolicyCache;
class EnabledQuota;
class QuotaCache;
struct QuotaUsage;
struct SettingsProfile;
using SettingsProfilePtr = std::shared_ptr<const SettingsProfile>;
struct SettingsProfilesInfo;
class EnabledSettings;
class SettingsProfilesCache;
class SettingsProfileElements;
@ -145,7 +144,7 @@ public:
const boost::container::flat_set<UUID> & enabled_roles,
const SettingsProfileElements & settings_from_enabled_roles) const;
std::shared_ptr<const SettingsChanges> getProfileSettings(const String & profile_name) const;
std::shared_ptr<const SettingsProfilesInfo> getSettingsProfileInfo(const UUID & profile_id);
const ExternalAuthenticators & getExternalAuthenticators() const;

View File

@ -7,6 +7,7 @@
#include <Access/User.h>
#include <Access/EnabledRolesInfo.h>
#include <Access/EnabledSettings.h>
#include <Access/SettingsProfilesInfo.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Common/Exception.h>
#include <Common/quoteString.h>
@ -307,23 +308,25 @@ std::shared_ptr<const ContextAccess> ContextAccess::getFullAccess()
}
std::shared_ptr<const Settings> ContextAccess::getDefaultSettings() const
SettingsChanges ContextAccess::getDefaultSettings() const
{
std::lock_guard lock{mutex};
if (enabled_settings)
return enabled_settings->getSettings();
static const auto everything_by_default = std::make_shared<Settings>();
return everything_by_default;
{
if (auto info = enabled_settings->getInfo())
return info->settings;
}
return {};
}
std::shared_ptr<const SettingsConstraints> ContextAccess::getSettingsConstraints() const
std::shared_ptr<const SettingsProfilesInfo> ContextAccess::getDefaultProfileInfo() const
{
std::lock_guard lock{mutex};
if (enabled_settings)
return enabled_settings->getConstraints();
static const auto no_constraints = std::make_shared<SettingsConstraints>();
return no_constraints;
return enabled_settings->getInfo();
static const auto everything_by_default = std::make_shared<SettingsProfilesInfo>(*manager);
return everything_by_default;
}

View File

@ -23,7 +23,8 @@ class EnabledQuota;
class EnabledSettings;
struct QuotaUsage;
struct Settings;
class SettingsConstraints;
struct SettingsProfilesInfo;
class SettingsChanges;
class AccessControlManager;
class IAST;
using ASTPtr = std::shared_ptr<IAST>;
@ -84,11 +85,9 @@ public:
std::shared_ptr<const EnabledQuota> getQuota() const;
std::optional<QuotaUsage> getQuotaUsage() const;
/// Returns the default settings, i.e. the settings to apply on user's login.
std::shared_ptr<const Settings> getDefaultSettings() const;
/// Returns the settings' constraints.
std::shared_ptr<const SettingsConstraints> getSettingsConstraints() const;
/// Returns the default settings, i.e. the settings which should be applied on user's login.
SettingsChanges getDefaultSettings() const;
std::shared_ptr<const SettingsProfilesInfo> getDefaultProfileInfo() const;
/// Returns the current access rights.
std::shared_ptr<const AccessRights> getAccessRights() const;

View File

@ -11,27 +11,16 @@ EnabledSettings::EnabledSettings(const Params & params_) : params(params_)
EnabledSettings::~EnabledSettings() = default;
std::shared_ptr<const Settings> EnabledSettings::getSettings() const
std::shared_ptr<const SettingsProfilesInfo> EnabledSettings::getInfo() const
{
std::lock_guard lock{mutex};
return settings;
return info;
}
std::shared_ptr<const SettingsConstraints> EnabledSettings::getConstraints() const
void EnabledSettings::setInfo(const std::shared_ptr<const SettingsProfilesInfo> & info_)
{
std::lock_guard lock{mutex};
return constraints;
}
void EnabledSettings::setSettingsAndConstraints(
const std::shared_ptr<const Settings> & settings_, const std::shared_ptr<const SettingsConstraints> & constraints_)
{
std::lock_guard lock{mutex};
settings = settings_;
constraints = constraints_;
info = info_;
}
}

View File

@ -1,15 +1,15 @@
#pragma once
#include <common/types.h>
#include <Core/UUID.h>
#include <Access/SettingsConstraints.h>
#include <Access/SettingsProfileElement.h>
#include <Core/UUID.h>
#include <boost/container/flat_set.hpp>
#include <mutex>
namespace DB
{
struct SettingsProfilesInfo;
/// Watches settings profiles for a specific user and roles.
class EnabledSettings
{
@ -30,27 +30,19 @@ public:
friend bool operator >=(const Params & lhs, const Params & rhs) { return !(lhs < rhs); }
};
~EnabledSettings();
/// Returns the default settings come from settings profiles defined for the user
/// and the roles passed in the constructor.
std::shared_ptr<const Settings> getSettings() const;
std::shared_ptr<const SettingsProfilesInfo> getInfo() const;
/// Returns the constraints come from settings profiles defined for the user
/// and the roles passed in the constructor.
std::shared_ptr<const SettingsConstraints> getConstraints() const;
~EnabledSettings();
private:
friend class SettingsProfilesCache;
EnabledSettings(const Params & params_);
void setSettingsAndConstraints(
const std::shared_ptr<const Settings> & settings_, const std::shared_ptr<const SettingsConstraints> & constraints_);
void setInfo(const std::shared_ptr<const SettingsProfilesInfo> & info_);
const Params params;
SettingsProfileElements settings_from_enabled;
std::shared_ptr<const Settings> settings;
std::shared_ptr<const SettingsConstraints> constraints;
std::shared_ptr<const SettingsProfilesInfo> info;
mutable std::mutex mutex;
};
}

View File

@ -3,7 +3,6 @@
#include <boost/range/algorithm/set_algorithm.hpp>
#include <boost/range/algorithm_ext/erase.hpp>
namespace DB
{
void GrantedRoles::grant(const UUID & role_)
@ -80,7 +79,7 @@ std::vector<UUID> GrantedRoles::findGranted(const boost::container::flat_set<UUI
{
std::vector<UUID> res;
res.reserve(ids.size());
boost::range::set_difference(ids, roles, std::back_inserter(res));
boost::range::set_intersection(ids, roles, std::back_inserter(res));
return res;
}
@ -111,7 +110,7 @@ std::vector<UUID> GrantedRoles::findGrantedWithAdminOption(const boost::containe
{
std::vector<UUID> res;
res.reserve(ids.size());
boost::range::set_difference(ids, roles_with_admin_option, std::back_inserter(res));
boost::range::set_intersection(ids, roles_with_admin_option, std::back_inserter(res));
return res;
}

View File

@ -197,6 +197,16 @@ String IAccessStorage::readName(const UUID & id) const
}
Strings IAccessStorage::readNames(const std::vector<UUID> & ids) const
{
Strings res;
res.reserve(ids.size());
for (const auto & id : ids)
res.emplace_back(readName(id));
return res;
}
std::optional<String> IAccessStorage::tryReadName(const UUID & id) const
{
String name;
@ -207,6 +217,19 @@ std::optional<String> IAccessStorage::tryReadName(const UUID & id) const
}
Strings IAccessStorage::tryReadNames(const std::vector<UUID> & ids) const
{
Strings res;
res.reserve(ids.size());
for (const auto & id : ids)
{
if (auto name = tryReadName(id))
res.emplace_back(std::move(name).value());
}
return res;
}
UUID IAccessStorage::insert(const AccessEntityPtr & entity)
{
return insertImpl(entity, false);

View File

@ -84,7 +84,9 @@ public:
/// Reads only name of an entity.
String readName(const UUID & id) const;
Strings readNames(const std::vector<UUID> & ids) const;
std::optional<String> tryReadName(const UUID & id) const;
Strings tryReadNames(const std::vector<UUID> & ids) const;
/// Returns true if a specified entity can be inserted into this storage.
/// This function doesn't check whether there are no entities with such name in the storage.

View File

@ -18,8 +18,6 @@ namespace ErrorCodes
}
SettingsConstraints::SettingsConstraints() = default;
SettingsConstraints::SettingsConstraints(const AccessControlManager & manager_) : manager(&manager_)
{
}
@ -201,13 +199,10 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh
}
};
if (manager)
{
if (reaction == THROW_ON_VIOLATION)
manager->checkSettingNameIsAllowed(setting_name);
else if (!manager->isSettingNameAllowed(setting_name))
return false;
}
if (reaction == THROW_ON_VIOLATION)
manager->checkSettingNameIsAllowed(setting_name);
else if (!manager->isSettingNameAllowed(setting_name))
return false;
Field current_value, new_value;
if (current_settings.tryGet(setting_name, current_value))

View File

@ -51,7 +51,6 @@ class AccessControlManager;
class SettingsConstraints
{
public:
SettingsConstraints();
SettingsConstraints(const AccessControlManager & manager_);
SettingsConstraints(const SettingsConstraints & src);
SettingsConstraints & operator =(const SettingsConstraints & src);

View File

@ -0,0 +1,21 @@
#pragma once
#include <Access/SettingsConstraints.h>
#include <Core/UUID.h>
#include <vector>
namespace DB
{
/// Information about currently applied constraints and profiles.
struct SettingsConstraintsAndProfileIDs
{
SettingsConstraints constraints;
std::vector<UUID> current_profiles;
std::vector<UUID> enabled_profiles;
SettingsConstraintsAndProfileIDs(const AccessControlManager & manager_) : constraints(manager_) {}
};
}

View File

@ -7,6 +7,7 @@
#include <Common/SettingsChanges.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <common/removeDuplicates.h>
namespace DB
@ -172,4 +173,21 @@ SettingsConstraints SettingsProfileElements::toSettingsConstraints(const AccessC
return res;
}
std::vector<UUID> SettingsProfileElements::toProfileIDs() const
{
std::vector<UUID> res;
for (const auto & elem : *this)
{
if (elem.parent_profile)
res.push_back(*elem.parent_profile);
}
/// If some profile occurs multiple times (with some other settings in between),
/// the latest occurrence overrides all the previous ones.
removeDuplicatesKeepLast(res);
return res;
}
}

View File

@ -62,6 +62,7 @@ public:
Settings toSettings() const;
SettingsChanges toSettingsChanges() const;
SettingsConstraints toSettingsConstraints(const AccessControlManager & manager) const;
std::vector<UUID> toProfileIDs() const;
};
}

View File

@ -1,11 +1,8 @@
#include <Access/SettingsProfilesCache.h>
#include <Access/AccessControlManager.h>
#include <Access/SettingsProfile.h>
#include <Core/Settings.h>
#include <Common/SettingsChanges.h>
#include <Access/SettingsProfilesInfo.h>
#include <Common/quoteString.h>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/algorithm_ext/push_back.hpp>
namespace DB
@ -15,7 +12,6 @@ namespace ErrorCodes
extern const int THERE_IS_NO_PROFILE;
}
SettingsProfilesCache::SettingsProfilesCache(const AccessControlManager & manager_)
: manager(manager_) {}
@ -67,7 +63,7 @@ void SettingsProfilesCache::profileAddedOrChanged(const UUID & profile_id, const
profiles_by_name.erase(old_profile->getName());
profiles_by_name[new_profile->getName()] = profile_id;
}
settings_for_profiles.clear();
profile_infos_cache.clear();
mergeSettingsAndConstraints();
}
@ -80,7 +76,7 @@ void SettingsProfilesCache::profileRemoved(const UUID & profile_id)
return;
profiles_by_name.erase(it->second->getName());
all_profiles.erase(it);
settings_for_profiles.clear();
profile_infos_cache.clear();
mergeSettingsAndConstraints();
}
@ -142,49 +138,52 @@ void SettingsProfilesCache::mergeSettingsAndConstraintsFor(EnabledSettings & ena
merged_settings.merge(enabled.params.settings_from_enabled_roles);
merged_settings.merge(enabled.params.settings_from_user);
substituteProfiles(merged_settings);
auto info = std::make_shared<SettingsProfilesInfo>(manager);
info->profiles = enabled.params.settings_from_user.toProfileIDs();
substituteProfiles(merged_settings, info->profiles_with_implicit, info->names_of_profiles);
info->settings = merged_settings.toSettingsChanges();
info->constraints = merged_settings.toSettingsConstraints(manager);
auto settings = merged_settings.toSettings();
auto constraints = merged_settings.toSettingsConstraints(manager);
enabled.setSettingsAndConstraints(
std::make_shared<Settings>(std::move(settings)), std::make_shared<SettingsConstraints>(std::move(constraints)));
enabled.setInfo(std::move(info));
}
void SettingsProfilesCache::substituteProfiles(SettingsProfileElements & elements) const
void SettingsProfilesCache::substituteProfiles(
SettingsProfileElements & elements,
std::vector<UUID> & substituted_profiles,
std::unordered_map<UUID, String> & names_of_substituted_profiles) const
{
boost::container::flat_set<UUID> already_substituted;
for (size_t i = 0; i != elements.size();)
/// We should substitute profiles in reversive order because the same profile can occur
/// in `elements` multiple times (with some other settings in between) and in this case
/// the last occurrence should override all the previous ones.
boost::container::flat_set<UUID> substituted_profiles_set;
size_t i = elements.size();
while (i != 0)
{
auto & element = elements[i];
auto & element = elements[--i];
if (!element.parent_profile)
{
++i;
continue;
}
auto parent_profile_id = *element.parent_profile;
auto profile_id = *element.parent_profile;
element.parent_profile.reset();
if (already_substituted.count(parent_profile_id))
{
++i;
if (substituted_profiles_set.count(profile_id))
continue;
}
already_substituted.insert(parent_profile_id);
auto parent_profile = all_profiles.find(parent_profile_id);
if (parent_profile == all_profiles.end())
{
++i;
auto profile_it = all_profiles.find(profile_id);
if (profile_it == all_profiles.end())
continue;
}
const auto & parent_profile_elements = parent_profile->second->elements;
elements.insert(elements.begin() + i, parent_profile_elements.begin(), parent_profile_elements.end());
const auto & profile = profile_it->second;
const auto & profile_elements = profile->elements;
elements.insert(elements.begin() + i, profile_elements.begin(), profile_elements.end());
i += profile_elements.size();
substituted_profiles.push_back(profile_id);
substituted_profiles_set.insert(profile_id);
names_of_substituted_profiles.emplace(profile_id, profile->getName());
}
std::reverse(substituted_profiles.begin(), substituted_profiles.end());
}
std::shared_ptr<const EnabledSettings> SettingsProfilesCache::getEnabledSettings(
const UUID & user_id,
const SettingsProfileElements & settings_from_user,
@ -216,26 +215,26 @@ std::shared_ptr<const EnabledSettings> SettingsProfilesCache::getEnabledSettings
}
std::shared_ptr<const SettingsChanges> SettingsProfilesCache::getProfileSettings(const String & profile_name)
std::shared_ptr<const SettingsProfilesInfo> SettingsProfilesCache::getSettingsProfileInfo(const UUID & profile_id)
{
std::lock_guard lock{mutex};
ensureAllProfilesRead();
auto it = profiles_by_name.find(profile_name);
if (it == profiles_by_name.end())
throw Exception("Settings profile " + backQuote(profile_name) + " not found", ErrorCodes::THERE_IS_NO_PROFILE);
const UUID profile_id = it->second;
auto it2 = settings_for_profiles.find(profile_id);
if (it2 != settings_for_profiles.end())
return it2->second;
if (auto pos = this->profile_infos_cache.get(profile_id))
return *pos;
SettingsProfileElements elements = all_profiles[profile_id]->elements;
substituteProfiles(elements);
auto res = std::make_shared<const SettingsChanges>(elements.toSettingsChanges());
settings_for_profiles.emplace(profile_id, res);
return res;
auto info = std::make_shared<SettingsProfilesInfo>(manager);
info->profiles.push_back(profile_id);
info->profiles_with_implicit.push_back(profile_id);
substituteProfiles(elements, info->profiles_with_implicit, info->names_of_profiles);
info->settings = elements.toSettingsChanges();
info->constraints.merge(elements.toSettingsConstraints(manager));
profile_infos_cache.add(profile_id, info);
return info;
}
}

View File

@ -1,8 +1,7 @@
#pragma once
#include <Access/EnabledSettings.h>
#include <Core/UUID.h>
#include <common/types.h>
#include <Poco/LRUCache.h>
#include <common/scope_guard.h>
#include <map>
#include <unordered_map>
@ -13,9 +12,7 @@ namespace DB
class AccessControlManager;
struct SettingsProfile;
using SettingsProfilePtr = std::shared_ptr<const SettingsProfile>;
class SettingsProfileElements;
class EnabledSettings;
struct SettingsProfilesInfo;
/// Reads and caches all the settings profiles.
class SettingsProfilesCache
@ -32,7 +29,7 @@ public:
const boost::container::flat_set<UUID> & enabled_roles,
const SettingsProfileElements & settings_from_enabled_roles_);
std::shared_ptr<const SettingsChanges> getProfileSettings(const String & profile_name);
std::shared_ptr<const SettingsProfilesInfo> getSettingsProfileInfo(const UUID & profile_id);
private:
void ensureAllProfilesRead();
@ -40,7 +37,7 @@ private:
void profileRemoved(const UUID & profile_id);
void mergeSettingsAndConstraints();
void mergeSettingsAndConstraintsFor(EnabledSettings & enabled) const;
void substituteProfiles(SettingsProfileElements & elements) const;
void substituteProfiles(SettingsProfileElements & elements, std::vector<UUID> & substituted_profiles, std::unordered_map<UUID, String> & names_of_substituted_profiles) const;
const AccessControlManager & manager;
std::unordered_map<UUID, SettingsProfilePtr> all_profiles;
@ -49,7 +46,7 @@ private:
scope_guard subscription;
std::map<EnabledSettings::Params, std::weak_ptr<EnabledSettings>> enabled_settings;
std::optional<UUID> default_profile_id;
std::unordered_map<UUID, std::shared_ptr<const SettingsChanges>> settings_for_profiles;
Poco::LRUCache<UUID, std::shared_ptr<const SettingsProfilesInfo>> profile_infos_cache;
mutable std::mutex mutex;
};
}

View File

@ -0,0 +1,58 @@
#include <Access/SettingsProfilesInfo.h>
#include <Access/SettingsConstraintsAndProfileIDs.h>
#include <common/removeDuplicates.h>
namespace DB
{
bool operator==(const SettingsProfilesInfo & lhs, const SettingsProfilesInfo & rhs)
{
if (lhs.settings != rhs.settings)
return false;
if (lhs.constraints != rhs.constraints)
return false;
if (lhs.profiles != rhs.profiles)
return false;
if (lhs.profiles_with_implicit != rhs.profiles_with_implicit)
return false;
if (lhs.names_of_profiles != rhs.names_of_profiles)
return false;
return true;
}
std::shared_ptr<const SettingsConstraintsAndProfileIDs>
SettingsProfilesInfo::getConstraintsAndProfileIDs(const std::shared_ptr<const SettingsConstraintsAndProfileIDs> & previous) const
{
auto res = std::make_shared<SettingsConstraintsAndProfileIDs>(manager);
res->current_profiles = profiles;
if (previous)
{
res->constraints = previous->constraints;
res->constraints.merge(constraints);
}
else
res->constraints = constraints;
if (previous)
{
res->enabled_profiles.reserve(previous->enabled_profiles.size() + profiles_with_implicit.size());
res->enabled_profiles = previous->enabled_profiles;
}
res->enabled_profiles.insert(res->enabled_profiles.end(), profiles_with_implicit.begin(), profiles_with_implicit.end());
/// If some profile occurs multiple times (with some other settings in between),
/// the latest occurrence overrides all the previous ones.
removeDuplicatesKeepLast(res->current_profiles);
removeDuplicatesKeepLast(res->enabled_profiles);
return res;
}
}

View File

@ -0,0 +1,43 @@
#pragma once
#include <Access/SettingsConstraints.h>
#include <Common/SettingsChanges.h>
#include <Core/UUID.h>
#include <unordered_map>
namespace DB
{
struct SettingsConstraintsAndProfileIDs;
/// Information about the default settings which are applied to an user on login.
struct SettingsProfilesInfo
{
SettingsChanges settings;
SettingsConstraints constraints;
/// Profiles explicitly assigned to the user.
std::vector<UUID> profiles;
/// Profiles assigned to the user both explicitly and implicitly.
/// Implicitly assigned profiles include parent profiles of other assigned profiles,
/// profiles assigned via granted roles, profiles assigned via their own settings,
/// and the main default profile (see the section `default_profile` in the main configuration file).
/// The order of IDs in this vector corresponds the order of applying of these profiles.
std::vector<UUID> profiles_with_implicit;
/// Names of all the profiles in `profiles`.
std::unordered_map<UUID, String> names_of_profiles;
SettingsProfilesInfo(const AccessControlManager & manager_) : constraints(manager_), manager(manager_) {}
std::shared_ptr<const SettingsConstraintsAndProfileIDs> getConstraintsAndProfileIDs(
const std::shared_ptr<const SettingsConstraintsAndProfileIDs> & previous = nullptr) const;
friend bool operator ==(const SettingsProfilesInfo & lhs, const SettingsProfilesInfo & rhs);
friend bool operator !=(const SettingsProfilesInfo & lhs, const SettingsProfilesInfo & rhs) { return !(lhs == rhs); }
private:
const AccessControlManager & manager;
};
}

View File

@ -45,6 +45,7 @@ SRCS(
SettingsProfilesCache.cpp
User.cpp
UsersConfigAccessStorage.cpp
tests/gtest_access_rights_ops.cpp
)

View File

@ -29,6 +29,20 @@ MultiplexedConnections::MultiplexedConnections(Connection & connection, const Se
active_connection_count = 1;
}
MultiplexedConnections::MultiplexedConnections(std::shared_ptr<Connection> connection_ptr_, const Settings & settings_, const ThrottlerPtr & throttler)
: settings(settings_), drain_timeout(settings.drain_timeout), receive_timeout(settings.receive_timeout)
, connection_ptr(connection_ptr_)
{
connection_ptr->setThrottler(throttler);
ReplicaState replica_state;
replica_state.connection = connection_ptr.get();
replica_states.push_back(replica_state);
active_connection_count = 1;
}
MultiplexedConnections::MultiplexedConnections(
std::vector<IConnectionPool::Entry> && connections, const Settings & settings_, const ThrottlerPtr & throttler)
: settings(settings_), drain_timeout(settings.drain_timeout), receive_timeout(settings.receive_timeout)

View File

@ -22,6 +22,8 @@ class MultiplexedConnections final : public IConnections
public:
/// Accepts ready connection.
MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler_);
/// Accepts ready connection and keep it alive before drain
MultiplexedConnections(std::shared_ptr<Connection> connection_, const Settings & settings_, const ThrottlerPtr & throttler_);
/// Accepts a vector of connections to replicas of one shard already taken from pool.
MultiplexedConnections(
@ -79,7 +81,6 @@ private:
/// Mark the replica as invalid.
void invalidateReplica(ReplicaState & replica_state);
private:
const Settings & settings;
/// The following two fields are from settings but can be referenced outside the lifetime of
@ -95,6 +96,8 @@ private:
/// Connection that received last block.
Connection * current_connection = nullptr;
/// Shared connection, may be empty. Used to keep object alive before draining.
std::shared_ptr<Connection> connection_ptr;
bool sent_query = false;
bool cancelled = false;

View File

@ -109,7 +109,7 @@ class IColumn;
M(Bool, compile_expressions, true, "Compile some scalar functions and operators to native code.", 0) \
M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \
M(Bool, compile_aggregate_expressions, true, "Compile aggregate functions to native code.", 0) \
M(UInt64, min_count_to_compile_aggregate_expression, 0, "The number of identical aggregate expressions before they are JIT-compiled", 0) \
M(UInt64, min_count_to_compile_aggregate_expression, 3, "The number of identical aggregate expressions before they are JIT-compiled", 0) \
M(UInt64, group_by_two_level_threshold, 100000, "From what number of keys, a two-level aggregation starts. 0 - the threshold is not set.", 0) \
M(UInt64, group_by_two_level_threshold_bytes, 50000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. Two-level aggregation is used when at least one of the thresholds is triggered.", 0) \
M(Bool, distributed_aggregation_memory_efficient, true, "Is the memory-saving mode of distributed aggregation enabled.", 0) \

View File

@ -1,44 +0,0 @@
#pragma once
#include <DataStreams/IBlockInputStream.h>
namespace DB
{
/** A stream of blocks from which you can read the next block from an explicitly provided list.
* Also see OneBlockInputStream.
*/
class BlocksListBlockInputStream : public IBlockInputStream
{
public:
/// Acquires the ownership of the block list.
BlocksListBlockInputStream(BlocksList && list_)
: list(std::move(list_)), it(list.begin()), end(list.end()) {}
/// Uses a list of blocks lying somewhere else.
BlocksListBlockInputStream(BlocksList::iterator & begin_, BlocksList::iterator & end_)
: it(begin_), end(end_) {}
String getName() const override { return "BlocksList"; }
protected:
Block getHeader() const override { return list.empty() ? Block() : *list.begin(); }
Block readImpl() override
{
if (it == end)
return Block();
Block res = *it;
++it;
return res;
}
private:
BlocksList list;
BlocksList::iterator it;
const BlocksList::iterator end;
};
}

View File

@ -1,46 +0,0 @@
#pragma once
#include <Parsers/IAST.h>
#include <DataStreams/IBlockInputStream.h>
#include <cstddef>
#include <memory>
namespace DB
{
struct BlockIO;
class Context;
struct StorageInMemoryMetadata;
using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
/** Prepares an input stream which produce data containing in INSERT query
* Head of inserting data could be stored in INSERT ast directly
* Remaining (tail) data could be stored in input_buffer_tail_part
*/
class InputStreamFromASTInsertQuery : public IBlockInputStream
{
public:
InputStreamFromASTInsertQuery(
const ASTPtr & ast,
ReadBuffer * input_buffer_tail_part,
const Block & header,
ContextPtr context,
const ASTPtr & input_function);
Block readImpl() override { return res_stream->read(); }
void readPrefixImpl() override { return res_stream->readPrefix(); }
void readSuffixImpl() override { return res_stream->readSuffix(); }
String getName() const override { return "InputStreamFromASTInsertQuery"; }
Block getHeader() const override { return res_stream->getHeader(); }
private:
std::unique_ptr<ReadBuffer> input_buffer_ast_part;
std::unique_ptr<ReadBuffer> input_buffer_contacenated;
BlockInputStreamPtr res_stream;
};
}

View File

@ -1,158 +0,0 @@
#include <algorithm>
#include <DataStreams/LimitBlockInputStream.h>
namespace DB
{
/// gets pointers to all columns of block, which were used for ORDER BY
static ColumnRawPtrs extractSortColumns(const Block & block, const SortDescription & description)
{
size_t size = description.size();
ColumnRawPtrs res;
res.reserve(size);
for (size_t i = 0; i < size; ++i)
{
const IColumn * column = !description[i].column_name.empty()
? block.getByName(description[i].column_name).column.get()
: block.safeGetByPosition(description[i].column_number).column.get();
res.emplace_back(column);
}
return res;
}
LimitBlockInputStream::LimitBlockInputStream(
const BlockInputStreamPtr & input, UInt64 limit_, UInt64 offset_, bool always_read_till_end_,
bool use_limit_as_total_rows_approx, bool with_ties_, const SortDescription & description_)
: limit(limit_), offset(offset_), always_read_till_end(always_read_till_end_), with_ties(with_ties_)
, description(description_)
{
if (use_limit_as_total_rows_approx)
{
addTotalRowsApprox(static_cast<size_t>(limit));
}
children.push_back(input);
}
Block LimitBlockInputStream::readImpl()
{
Block res;
UInt64 rows = 0;
/// pos >= offset + limit and all rows in the end of previous block were equal
/// to row at 'limit' position. So we check current block.
if (!ties_row_ref.empty() && pos >= offset + limit)
{
res = children.back()->read();
rows = res.rows();
if (!res)
return res;
SharedBlockPtr ptr = new detail::SharedBlock(std::move(res));
ptr->sort_columns = extractSortColumns(*ptr, description);
UInt64 len;
for (len = 0; len < rows; ++len)
{
SharedBlockRowRef current_row;
current_row.set(ptr, &ptr->sort_columns, len);
if (current_row != ties_row_ref)
{
ties_row_ref.reset();
break;
}
}
if (len < rows)
{
for (size_t i = 0; i < ptr->columns(); ++i)
ptr->safeGetByPosition(i).column = ptr->safeGetByPosition(i).column->cut(0, len);
}
return *ptr;
}
if (pos >= offset + limit)
{
if (!always_read_till_end)
return res;
else
{
while (children.back()->read())
;
return res;
}
}
do
{
res = children.back()->read();
if (!res)
return res;
rows = res.rows();
pos += rows;
} while (pos <= offset);
SharedBlockPtr ptr = new detail::SharedBlock(std::move(res));
if (with_ties)
ptr->sort_columns = extractSortColumns(*ptr, description);
/// give away the whole block
if (pos >= offset + rows && pos <= offset + limit)
{
/// Save rowref for last row, because probalbly next block begins with the same row.
if (with_ties && pos == offset + limit)
ties_row_ref.set(ptr, &ptr->sort_columns, rows - 1);
return *ptr;
}
/// give away a piece of the block
UInt64 start = std::max(
static_cast<Int64>(0),
static_cast<Int64>(offset) - static_cast<Int64>(pos) + static_cast<Int64>(rows));
UInt64 length = std::min(
static_cast<Int64>(limit), std::min(
static_cast<Int64>(pos) - static_cast<Int64>(offset),
static_cast<Int64>(limit) + static_cast<Int64>(offset) - static_cast<Int64>(pos) + static_cast<Int64>(rows)));
/// check if other rows in current block equals to last one in limit
if (with_ties)
{
ties_row_ref.set(ptr, &ptr->sort_columns, start + length - 1);
for (size_t i = ties_row_ref.row_num + 1; i < rows; ++i)
{
SharedBlockRowRef current_row;
current_row.set(ptr, &ptr->sort_columns, i);
if (current_row == ties_row_ref)
++length;
else
{
ties_row_ref.reset();
break;
}
}
}
if (length == rows)
return *ptr;
for (size_t i = 0; i < ptr->columns(); ++i)
ptr->safeGetByPosition(i).column = ptr->safeGetByPosition(i).column->cut(start, length);
// TODO: we should provide feedback to child-block, so it will know how many rows are actually consumed.
// It's crucial for streaming engines like Kafka.
return *ptr;
}
}

View File

@ -1,47 +0,0 @@
#pragma once
#include <DataStreams/IBlockInputStream.h>
#include <Common/SharedBlockRowRef.h>
#include <Core/SortDescription.h>
namespace DB
{
/** Implements the LIMIT relational operation.
*/
class LimitBlockInputStream : public IBlockInputStream
{
public:
/** If always_read_till_end = false (by default), then after reading enough data,
* returns an empty block, and this causes the query to be canceled.
* If always_read_till_end = true - reads all the data to the end, but ignores them. This is necessary in rare cases:
* when otherwise, due to the cancellation of the request, we would not have received the data for GROUP BY WITH TOTALS from the remote server.
* If use_limit_as_total_rows_approx = true, then addTotalRowsApprox is called to use the limit in progress & stats
* with_ties = true, when query has WITH TIES modifier. If so, description should be provided
* description lets us know which row we should check for equality
*/
LimitBlockInputStream(
const BlockInputStreamPtr & input, UInt64 limit_, UInt64 offset_,
bool always_read_till_end_ = false, bool use_limit_as_total_rows_approx = false,
bool with_ties_ = false, const SortDescription & description_ = {});
String getName() const override { return "Limit"; }
Block getHeader() const override { return children.at(0)->getHeader(); }
protected:
Block readImpl() override;
private:
UInt64 limit;
UInt64 offset;
UInt64 pos = 0;
bool always_read_till_end;
bool with_ties;
const SortDescription description;
SharedBlockRowRef ties_row_ref;
};
}

View File

@ -1,273 +0,0 @@
#include <queue>
#include <common/logger_useful.h>
#include <DataStreams/MergingSortedBlockInputStream.h>
#include <DataStreams/ColumnGathererStream.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
MergingSortedBlockInputStream::MergingSortedBlockInputStream(
const BlockInputStreams & inputs_, SortDescription description_,
size_t max_block_size_, UInt64 limit_, WriteBuffer * out_row_sources_buf_, bool quiet_)
: description(std::move(description_)), max_block_size(max_block_size_), limit(limit_), quiet(quiet_)
, source_blocks(inputs_.size())
, cursors(inputs_.size()), out_row_sources_buf(out_row_sources_buf_)
, log(&Poco::Logger::get("MergingSortedBlockInputStream"))
{
children.insert(children.end(), inputs_.begin(), inputs_.end());
header = children.at(0)->getHeader();
num_columns = header.columns();
}
void MergingSortedBlockInputStream::init(MutableColumns & merged_columns)
{
/// Read the first blocks, initialize the queue.
if (first)
{
first = false;
for (size_t i = 0; i < source_blocks.size(); ++i)
{
Block & block = source_blocks[i];
if (block)
continue;
block = children[i]->read();
const size_t rows = block.rows();
if (rows == 0)
continue;
if (expected_block_size < rows)
expected_block_size = std::min(rows, max_block_size);
cursors[i] = SortCursorImpl(block, description, i);
has_collation |= cursors[i].has_collation;
}
if (has_collation)
queue_with_collation = SortingHeap<SortCursorWithCollation>(cursors);
else
queue_without_collation = SortingHeap<SortCursor>(cursors);
}
/// Let's check that all source blocks have the same structure.
for (const auto & block : source_blocks)
{
if (!block)
continue;
assertBlocksHaveEqualStructure(block, header, getName());
}
merged_columns.resize(num_columns);
for (size_t i = 0; i < num_columns; ++i)
{
merged_columns[i] = header.safeGetByPosition(i).column->cloneEmpty();
merged_columns[i]->reserve(expected_block_size);
}
}
Block MergingSortedBlockInputStream::readImpl()
{
if (finished)
return {};
if (children.size() == 1)
return children[0]->read();
MutableColumns merged_columns;
init(merged_columns);
if (merged_columns.empty())
return {};
if (has_collation)
merge(merged_columns, queue_with_collation);
else
merge(merged_columns, queue_without_collation);
return header.cloneWithColumns(std::move(merged_columns));
}
template <typename TSortCursor>
void MergingSortedBlockInputStream::fetchNextBlock(const TSortCursor & current, SortingHeap<TSortCursor> & queue)
{
size_t order = current->order;
size_t size = cursors.size();
if (order >= size || &cursors[order] != current.impl)
throw Exception("Logical error in MergingSortedBlockInputStream", ErrorCodes::LOGICAL_ERROR);
while (true)
{
source_blocks[order] = children[order]->read();
if (!source_blocks[order])
{
queue.removeTop();
break;
}
if (source_blocks[order].rows())
{
cursors[order].reset(source_blocks[order]);
queue.replaceTop(&cursors[order]);
break;
}
}
}
template <typename TSortingHeap>
void MergingSortedBlockInputStream::merge(MutableColumns & merged_columns, TSortingHeap & queue)
{
size_t merged_rows = 0;
/** Increase row counters.
* Return true if it's time to finish generating the current data block.
*/
auto count_row_and_check_limit = [&, this]()
{
++total_merged_rows;
if (limit && total_merged_rows == limit)
{
// std::cerr << "Limit reached\n";
cancel(false);
finished = true;
return true;
}
++merged_rows;
return merged_rows >= max_block_size;
};
/// Take rows in required order and put them into `merged_columns`, while the number of rows are no more than `max_block_size`
while (queue.isValid())
{
auto current = queue.current();
/** And what if the block is totally less or equal than the rest for the current cursor?
* Or is there only one data source left in the queue? Then you can take the entire block on current cursor.
*/
if (current->isFirst()
&& (queue.size() == 1
|| (queue.size() >= 2 && current.totallyLessOrEquals(queue.nextChild()))))
{
// std::cerr << "current block is totally less or equals\n";
/// If there are already data in the current block, we first return it. We'll get here again the next time we call the merge function.
if (merged_rows != 0)
{
//std::cerr << "merged rows is non-zero\n";
return;
}
/// Actually, current->order stores source number (i.e. cursors[current->order] == current)
size_t source_num = current->order;
if (source_num >= cursors.size())
throw Exception("Logical error in MergingSortedBlockInputStream", ErrorCodes::LOGICAL_ERROR);
for (size_t i = 0; i < num_columns; ++i)
merged_columns[i] = IColumn::mutate(std::move(source_blocks[source_num].getByPosition(i).column));
// std::cerr << "copied columns\n";
merged_rows = merged_columns.at(0)->size();
/// Limit output
if (limit && total_merged_rows + merged_rows > limit)
{
merged_rows = limit - total_merged_rows;
for (size_t i = 0; i < num_columns; ++i)
{
auto & column = merged_columns[i];
column = IColumn::mutate(column->cut(0, merged_rows));
}
cancel(false);
finished = true;
}
/// Write order of rows for other columns
/// this data will be used in grather stream
if (out_row_sources_buf)
{
RowSourcePart row_source(source_num);
for (size_t i = 0; i < merged_rows; ++i)
out_row_sources_buf->write(row_source.data);
}
//std::cerr << "fetching next block\n";
total_merged_rows += merged_rows;
fetchNextBlock(current, queue);
return;
}
// std::cerr << "total_merged_rows: " << total_merged_rows << ", merged_rows: " << merged_rows << "\n";
// std::cerr << "Inserting row\n";
for (size_t i = 0; i < num_columns; ++i)
merged_columns[i]->insertFrom(*current->all_columns[i], current->getRow());
if (out_row_sources_buf)
{
/// Actually, current.impl->order stores source number (i.e. cursors[current.impl->order] == current.impl)
RowSourcePart row_source(current->order);
out_row_sources_buf->write(row_source.data);
}
if (!current->isLast())
{
// std::cerr << "moving to next row\n";
queue.next();
}
else
{
/// We get the next block from the corresponding source, if there is one.
// std::cerr << "It was last row, fetching next block\n";
fetchNextBlock(current, queue);
}
if (count_row_and_check_limit())
return;
}
/// We have read all data. Ask children to cancel providing more data.
cancel(false);
finished = true;
}
void MergingSortedBlockInputStream::readSuffixImpl()
{
if (quiet)
return;
const BlockStreamProfileInfo & profile_info = getProfileInfo();
double seconds = profile_info.total_stopwatch.elapsedSeconds();
if (!seconds)
LOG_DEBUG(log, "Merge sorted {} blocks, {} rows in 0 sec.", profile_info.blocks, profile_info.rows);
else
LOG_DEBUG(log, "Merge sorted {} blocks, {} rows in {} sec., {} rows/sec., {}/sec",
profile_info.blocks, profile_info.rows, seconds,
profile_info.rows / seconds,
ReadableSize(profile_info.bytes / seconds));
}
}

View File

@ -1,87 +0,0 @@
#pragma once
#include <Core/SortDescription.h>
#include <Core/SortCursor.h>
#include <IO/WriteHelpers.h>
#include <DataStreams/IBlockInputStream.h>
namespace Poco { class Logger; }
namespace DB
{
/** Merges several sorted streams into one sorted stream.
*/
class MergingSortedBlockInputStream : public IBlockInputStream
{
public:
/** limit - if isn't 0, then we can produce only first limit rows in sorted order.
* out_row_sources - if isn't nullptr, then at the end of execution it should contain part numbers of each read row (and needed flag)
* quiet - don't log profiling info
*/
MergingSortedBlockInputStream(
const BlockInputStreams & inputs_, SortDescription description_, size_t max_block_size_,
UInt64 limit_ = 0, WriteBuffer * out_row_sources_buf_ = nullptr, bool quiet_ = false);
String getName() const override { return "MergingSorted"; }
Block getHeader() const override { return header; }
protected:
Block readImpl() override;
void readSuffixImpl() override;
/// Initializes the queue and the columns of next result block.
void init(MutableColumns & merged_columns);
/// Gets the next block from the source corresponding to the `current`.
template <typename TSortCursor>
void fetchNextBlock(const TSortCursor & current, SortingHeap<TSortCursor> & queue);
Block header;
const SortDescription description;
const size_t max_block_size;
UInt64 limit;
UInt64 total_merged_rows = 0;
bool first = true;
bool has_collation = false;
bool quiet = false;
/// May be smaller or equal to max_block_size. To do 'reserve' for columns.
size_t expected_block_size = 0;
/// Blocks currently being merged.
size_t num_columns = 0;
Blocks source_blocks;
SortCursorImpls cursors;
SortingHeap<SortCursor> queue_without_collation;
SortingHeap<SortCursorWithCollation> queue_with_collation;
/// Used in Vertical merge algorithm to gather non-PK/non-index columns (on next step)
/// If it is not nullptr then it should be populated during execution
WriteBuffer * out_row_sources_buf;
private:
/** We support two different cursors - with Collation and without.
* Templates are used instead of polymorphic SortCursor and calls to virtual functions.
*/
template <typename TSortingHeap>
void merge(MutableColumns & merged_columns, TSortingHeap & queue);
Poco::Logger * log;
/// Read is finished.
bool finished = false;
};
}

View File

@ -34,13 +34,20 @@ namespace ErrorCodes
extern const int DUPLICATED_PART_UUIDS;
}
RemoteQueryExecutor::RemoteQueryExecutor(
const String & query_, const Block & header_, ContextPtr context_,
const Scalars & scalars_, const Tables & external_tables_,
QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
: header(header_), query(query_), context(context_), scalars(scalars_)
, external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_)
{}
RemoteQueryExecutor::RemoteQueryExecutor(
Connection & connection,
const String & query_, const Block & header_, ContextPtr context_,
ThrottlerPtr throttler, const Scalars & scalars_, const Tables & external_tables_,
QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
: header(header_), query(query_), context(context_)
, scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_), sync_draining(true)
: RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, task_iterator_)
{
create_connections = [this, &connection, throttler]()
{
@ -48,6 +55,19 @@ RemoteQueryExecutor::RemoteQueryExecutor(
};
}
RemoteQueryExecutor::RemoteQueryExecutor(
std::shared_ptr<Connection> connection_ptr,
const String & query_, const Block & header_, ContextPtr context_,
ThrottlerPtr throttler, const Scalars & scalars_, const Tables & external_tables_,
QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
: RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, task_iterator_)
{
create_connections = [this, connection_ptr, throttler]()
{
return std::make_shared<MultiplexedConnections>(connection_ptr, context->getSettingsRef(), throttler);
};
}
RemoteQueryExecutor::RemoteQueryExecutor(
const ConnectionPoolWithFailoverPtr & pool_,
std::vector<IConnectionPool::Entry> && connections_,

View File

@ -43,6 +43,13 @@ public:
ThrottlerPtr throttler_ = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {});
/// Takes already set connection.
RemoteQueryExecutor(
std::shared_ptr<Connection> connection,
const String & query_, const Block & header_, ContextPtr context_,
ThrottlerPtr throttler_ = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {});
/// Accepts several connections already taken from pool.
RemoteQueryExecutor(
const ConnectionPoolWithFailoverPtr & pool,
@ -105,6 +112,11 @@ public:
const Block & getHeader() const { return header; }
private:
RemoteQueryExecutor(
const String & query_, const Block & header_, ContextPtr context_,
const Scalars & scalars_, const Tables & external_tables_,
QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_);
Block header;
Block totals;
Block extremes;
@ -124,9 +136,6 @@ private:
/// Initiator identifier for distributed task processing
std::shared_ptr<TaskIterator> task_iterator;
/// Drain connection synchronously when finishing.
bool sync_draining = false;
std::function<std::shared_ptr<IConnections>()> create_connections;
/// Hold a shared reference to the connection pool so that asynchronous connection draining will
/// work safely. Make sure it's the first member so that we don't destruct it too early.

View File

@ -4,6 +4,9 @@
#include <DataStreams/NativeBlockInputStream.h>
#include <DataStreams/NativeBlockOutputStream.h>
#include <DataStreams/copyData.h>
#include <Processors/QueryPipeline.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/ISource.h>
#include <Compression/CompressedReadBuffer.h>
#include <Compression/CompressedWriteBuffer.h>
#include <IO/ReadBufferFromFile.h>
@ -32,32 +35,38 @@ struct TemporaryFileStream
{}
/// Flush data from input stream into file for future reading
static void write(const std::string & path, const Block & header, IBlockInputStream & input,
std::atomic<bool> * is_cancelled, const std::string & codec)
static void write(const std::string & path, const Block & header, QueryPipeline pipeline, const std::string & codec)
{
WriteBufferFromFile file_buf(path);
CompressedWriteBuffer compressed_buf(file_buf, CompressionCodecFactory::instance().get(codec, {}));
NativeBlockOutputStream output(compressed_buf, 0, header);
copyData(input, output, is_cancelled);
PullingPipelineExecutor executor(pipeline);
output.writePrefix();
Block block;
while (executor.pull(block))
output.write(block);
output.writeSuffix();
compressed_buf.finalize();
}
};
class TemporaryFileLazyInputStream : public IBlockInputStream
class TemporaryFileLazySource : public ISource
{
public:
TemporaryFileLazyInputStream(const std::string & path_, const Block & header_)
: path(path_)
, header(header_)
TemporaryFileLazySource(const std::string & path_, const Block & header_)
: ISource(header_)
, path(path_)
, done(false)
{}
String getName() const override { return "TemporaryFile"; }
Block getHeader() const override { return header; }
void readSuffix() override {}
String getName() const override { return "TemporaryFileLazySource"; }
protected:
Block readImpl() override
Chunk generate() override
{
if (done)
return {};
@ -71,7 +80,7 @@ protected:
done = true;
stream.reset();
}
return block;
return Chunk(block.getColumns(), block.rows());
}
private:

View File

@ -1,11 +1,10 @@
#include <gtest/gtest.h>
#include <Core/Block.h>
#include <Columns/ColumnVector.h>
#include <DataStreams/BlocksListBlockInputStream.h>
#include <Processors/Sources/BlocksListSource.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnsNumber.h>
#include <Processors/Pipe.h>
#include <Processors/Sources/SourceFromInputStream.h>
#include <Processors/Merges/MergingSortedTransform.h>
#include <Processors/Executors/PipelineExecutingBlockInputStream.h>
#include <Processors/QueryPipeline.h>
@ -40,7 +39,7 @@ static Pipe getInputStreams(const std::vector<std::string> & column_names, const
size_t start = stride;
while (blocks_count--)
blocks.push_back(getBlockWithSize(column_names, block_size_in_bytes, stride, start));
pipes.emplace_back(std::make_shared<SourceFromInputStream>(std::make_shared<BlocksListBlockInputStream>(std::move(blocks))));
pipes.emplace_back(std::make_shared<BlocksListSource>(std::move(blocks)));
}
return Pipe::unitePipes(std::move(pipes));
@ -57,7 +56,7 @@ static Pipe getInputStreamsEqualStride(const std::vector<std::string> & column_n
size_t start = i;
while (blocks_count--)
blocks.push_back(getBlockWithSize(column_names, block_size_in_bytes, stride, start));
pipes.emplace_back(std::make_shared<SourceFromInputStream>(std::make_shared<BlocksListBlockInputStream>(std::move(blocks))));
pipes.emplace_back(std::make_shared<BlocksListSource>(std::move(blocks)));
i++;
}
return Pipe::unitePipes(std::move(pipes));

View File

@ -2,8 +2,10 @@
#include <gtest/gtest.h>
#include <Columns/ColumnsNumber.h>
#include <DataStreams/BlocksListBlockInputStream.h>
#include <DataStreams/CheckSortedBlockInputStream.h>
#include <Processors/Sources/BlocksListSource.h>
#include <Processors/Transforms/CheckSortedTransform.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/QueryPipeline.h>
#include <DataTypes/DataTypesNumber.h>
@ -89,14 +91,22 @@ TEST(CheckSortedBlockInputStream, CheckGoodCase)
for (size_t i = 0; i < 3; ++i)
blocks.push_back(getSortedBlockWithSize(key_columns, 10, 1, i * 10));
BlockInputStreamPtr stream = std::make_shared<BlocksListBlockInputStream>(std::move(blocks));
Pipe pipe(std::make_shared<BlocksListSource>(std::move(blocks)));
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<CheckSortedTransform>(header, sort_description);
});
CheckSortedBlockInputStream sorted(stream, sort_description);
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
EXPECT_NO_THROW(sorted.read());
EXPECT_NO_THROW(sorted.read());
EXPECT_NO_THROW(sorted.read());
EXPECT_EQ(sorted.read(), Block());
PullingPipelineExecutor executor(pipeline);
Chunk chunk;
EXPECT_NO_THROW(executor.pull(chunk));
EXPECT_NO_THROW(executor.pull(chunk));
EXPECT_NO_THROW(executor.pull(chunk));
EXPECT_FALSE(executor.pull(chunk));
}
TEST(CheckSortedBlockInputStream, CheckBadLastRow)
@ -109,14 +119,21 @@ TEST(CheckSortedBlockInputStream, CheckBadLastRow)
blocks.push_back(getSortedBlockWithSize(key_columns, 100, 1, 0));
blocks.push_back(getSortedBlockWithSize(key_columns, 100, 1, 300));
BlockInputStreamPtr stream = std::make_shared<BlocksListBlockInputStream>(std::move(blocks));
Pipe pipe(std::make_shared<BlocksListSource>(std::move(blocks)));
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<CheckSortedTransform>(header, sort_description);
});
CheckSortedBlockInputStream sorted(stream, sort_description);
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
PullingPipelineExecutor executor(pipeline);
EXPECT_NO_THROW(sorted.read());
EXPECT_NO_THROW(sorted.read());
EXPECT_THROW(sorted.read(), DB::Exception);
Chunk chunk;
EXPECT_NO_THROW(executor.pull(chunk));
EXPECT_NO_THROW(executor.pull(chunk));
EXPECT_THROW(executor.pull(chunk), DB::Exception);
}
@ -127,11 +144,19 @@ TEST(CheckSortedBlockInputStream, CheckUnsortedBlock1)
BlocksList blocks;
blocks.push_back(getUnSortedBlockWithSize(key_columns, 100, 1, 0, 5, 1, 77));
BlockInputStreamPtr stream = std::make_shared<BlocksListBlockInputStream>(std::move(blocks));
Pipe pipe(std::make_shared<BlocksListSource>(std::move(blocks)));
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<CheckSortedTransform>(header, sort_description);
});
CheckSortedBlockInputStream sorted(stream, sort_description);
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
EXPECT_THROW(sorted.read(), DB::Exception);
PullingPipelineExecutor executor(pipeline);
Chunk chunk;
EXPECT_THROW(executor.pull(chunk), DB::Exception);
}
TEST(CheckSortedBlockInputStream, CheckUnsortedBlock2)
@ -141,11 +166,19 @@ TEST(CheckSortedBlockInputStream, CheckUnsortedBlock2)
BlocksList blocks;
blocks.push_back(getUnSortedBlockWithSize(key_columns, 100, 1, 0, 99, 2, 77));
BlockInputStreamPtr stream = std::make_shared<BlocksListBlockInputStream>(std::move(blocks));
Pipe pipe(std::make_shared<BlocksListSource>(std::move(blocks)));
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<CheckSortedTransform>(header, sort_description);
});
CheckSortedBlockInputStream sorted(stream, sort_description);
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
EXPECT_THROW(sorted.read(), DB::Exception);
PullingPipelineExecutor executor(pipeline);
Chunk chunk;
EXPECT_THROW(executor.pull(chunk), DB::Exception);
}
TEST(CheckSortedBlockInputStream, CheckUnsortedBlock3)
@ -155,11 +188,19 @@ TEST(CheckSortedBlockInputStream, CheckUnsortedBlock3)
BlocksList blocks;
blocks.push_back(getUnSortedBlockWithSize(key_columns, 100, 1, 0, 50, 0, 77));
BlockInputStreamPtr stream = std::make_shared<BlocksListBlockInputStream>(std::move(blocks));
Pipe pipe(std::make_shared<BlocksListSource>(std::move(blocks)));
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<CheckSortedTransform>(header, sort_description);
});
CheckSortedBlockInputStream sorted(stream, sort_description);
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
EXPECT_THROW(sorted.read(), DB::Exception);
PullingPipelineExecutor executor(pipeline);
Chunk chunk;
EXPECT_THROW(executor.pull(chunk), DB::Exception);
}
TEST(CheckSortedBlockInputStream, CheckEqualBlock)
@ -171,11 +212,19 @@ TEST(CheckSortedBlockInputStream, CheckEqualBlock)
blocks.push_back(getEqualValuesBlockWithSize(key_columns, 10));
blocks.push_back(getEqualValuesBlockWithSize(key_columns, 1));
BlockInputStreamPtr stream = std::make_shared<BlocksListBlockInputStream>(std::move(blocks));
Pipe pipe(std::make_shared<BlocksListSource>(std::move(blocks)));
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<CheckSortedTransform>(header, sort_description);
});
CheckSortedBlockInputStream sorted(stream, sort_description);
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
EXPECT_NO_THROW(sorted.read());
EXPECT_NO_THROW(sorted.read());
EXPECT_NO_THROW(sorted.read());
PullingPipelineExecutor executor(pipeline);
Chunk chunk;
EXPECT_NO_THROW(executor.pull(chunk));
EXPECT_NO_THROW(executor.pull(chunk));
EXPECT_NO_THROW(executor.pull(chunk));
}

View File

@ -14,12 +14,10 @@ NO_COMPILER_WARNINGS()
SRCS(
AddingDefaultBlockOutputStream.cpp
AddingDefaultsBlockInputStream.cpp
AsynchronousBlockInputStream.cpp
BlockIO.cpp
BlockStreamProfileInfo.cpp
CheckConstraintsBlockOutputStream.cpp
CheckSortedBlockInputStream.cpp
ColumnGathererStream.cpp
ConvertingBlockInputStream.cpp
CountingBlockOutputStream.cpp
@ -28,11 +26,8 @@ SRCS(
ExpressionBlockInputStream.cpp
IBlockInputStream.cpp
ITTLAlgorithm.cpp
InputStreamFromASTInsertQuery.cpp
InternalTextLogsRowOutputStream.cpp
LimitBlockInputStream.cpp
MaterializingBlockInputStream.cpp
MergingSortedBlockInputStream.cpp
MongoDBBlockInputStream.cpp
NativeBlockInputStream.cpp
NativeBlockOutputStream.cpp

View File

@ -0,0 +1,88 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Interpreters/Context.h>
#include <Access/AccessControlManager.h>
#include <Access/User.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeArray.h>
namespace DB
{
namespace
{
enum class Kind
{
CURRENT_PROFILES,
ENABLED_PROFILES,
DEFAULT_PROFILES,
};
template <Kind kind>
class FunctionCurrentProfiles : public IFunction
{
public:
static constexpr auto name = (kind == Kind::CURRENT_PROFILES) ? "currentProfiles" : ((kind == Kind::ENABLED_PROFILES) ? "enabledProfiles" : "defaultProfiles");
static FunctionPtr create(const ContextPtr & context) { return std::make_shared<FunctionCurrentProfiles>(context); }
String getName() const override { return name; }
explicit FunctionCurrentProfiles(const ContextPtr & context)
{
const auto & manager = context->getAccessControlManager();
std::vector<UUID> profile_ids;
if constexpr (kind == Kind::CURRENT_PROFILES)
{
profile_ids = context->getCurrentProfiles();
}
else if constexpr (kind == Kind::ENABLED_PROFILES)
{
profile_ids = context->getEnabledProfiles();
}
else
{
static_assert(kind == Kind::DEFAULT_PROFILES);
if (auto user = context->getUser())
profile_ids = user->settings.toProfileIDs();
}
profile_names = manager.tryReadNames(profile_ids);
}
size_t getNumberOfArguments() const override { return 0; }
bool isDeterministic() const override { return false; }
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
{
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
{
auto col_res = ColumnArray::create(ColumnString::create());
ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData());
ColumnArray::Offsets & res_offsets = col_res->getOffsets();
for (const String & profile_name : profile_names)
res_strings.insertData(profile_name.data(), profile_name.length());
res_offsets.push_back(res_strings.size());
return ColumnConst::create(std::move(col_res), input_rows_count);
}
private:
Strings profile_names;
};
}
void registerFunctionCurrentProfiles(FunctionFactory & factory)
{
factory.registerFunction<FunctionCurrentProfiles<Kind::CURRENT_PROFILES>>();
factory.registerFunction<FunctionCurrentProfiles<Kind::ENABLED_PROFILES>>();
factory.registerFunction<FunctionCurrentProfiles<Kind::DEFAULT_PROFILES>>();
}
}

View File

@ -0,0 +1,44 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Interpreters/Context.h>
#include <DataTypes/DataTypeString.h>
#include <Core/Field.h>
namespace DB
{
class FunctionInitialQueryID : public IFunction
{
const String initial_query_id;
public:
static constexpr auto name = "initialQueryID";
static FunctionPtr create(ContextPtr context)
{
return std::make_shared<FunctionInitialQueryID>(context->getClientInfo().initial_query_id);
}
explicit FunctionInitialQueryID(const String & initial_query_id_) : initial_query_id(initial_query_id_) {}
inline String getName() const override { return name; }
inline size_t getNumberOfArguments() const override { return 0; }
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
{
return std::make_shared<DataTypeString>();
}
inline bool isDeterministic() const override { return false; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
{
return DataTypeString().createColumnConst(input_rows_count, initial_query_id);
}
};
void registerFunctionInitialQueryID(FunctionFactory & factory)
{
factory.registerFunction<FunctionInitialQueryID>();
factory.registerAlias("initial_query_id", FunctionInitialQueryID::name, FunctionFactory::CaseInsensitive);
}
}

44
src/Functions/queryID.cpp Normal file
View File

@ -0,0 +1,44 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Interpreters/Context.h>
#include <DataTypes/DataTypeString.h>
#include <Core/Field.h>
namespace DB
{
class FunctionQueryID : public IFunction
{
const String query_id;
public:
static constexpr auto name = "queryID";
static FunctionPtr create(ContextPtr context)
{
return std::make_shared<FunctionQueryID>(context->getClientInfo().current_query_id);
}
explicit FunctionQueryID(const String & query_id_) : query_id(query_id_) {}
inline String getName() const override { return name; }
inline size_t getNumberOfArguments() const override { return 0; }
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
{
return std::make_shared<DataTypeString>();
}
inline bool isDeterministic() const override { return false; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
{
return DataTypeString().createColumnConst(input_rows_count, query_id)->convertToFullColumnIfConst();
}
};
void registerFunctionQueryID(FunctionFactory & factory)
{
factory.registerFunction<FunctionQueryID>();
factory.registerAlias("query_id", FunctionQueryID::name, FunctionFactory::CaseInsensitive);
}
}

View File

@ -9,6 +9,7 @@ class FunctionFactory;
void registerFunctionCurrentDatabase(FunctionFactory &);
void registerFunctionCurrentUser(FunctionFactory &);
void registerFunctionCurrentProfiles(FunctionFactory &);
void registerFunctionHostName(FunctionFactory &);
void registerFunctionFQDN(FunctionFactory &);
void registerFunctionVisibleWidth(FunctionFactory &);
@ -74,6 +75,8 @@ void registerFunctionFile(FunctionFactory & factory);
void registerFunctionConnectionId(FunctionFactory & factory);
void registerFunctionPartitionId(FunctionFactory & factory);
void registerFunctionIsIPAddressContainedIn(FunctionFactory &);
void registerFunctionQueryID(FunctionFactory & factory);
void registerFunctionInitialQueryID(FunctionFactory & factory);
#if USE_ICU
void registerFunctionConvertCharset(FunctionFactory &);
@ -83,6 +86,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
{
registerFunctionCurrentDatabase(factory);
registerFunctionCurrentUser(factory);
registerFunctionCurrentProfiles(factory);
registerFunctionHostName(factory);
registerFunctionFQDN(factory);
registerFunctionVisibleWidth(factory);
@ -148,6 +152,8 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
registerFunctionConnectionId(factory);
registerFunctionPartitionId(factory);
registerFunctionIsIPAddressContainedIn(factory);
registerFunctionQueryID(factory);
registerFunctionInitialQueryID(factory);
#if USE_ICU
registerFunctionConvertCharset(factory);

67
src/IO/OpenedFile.cpp Normal file
View File

@ -0,0 +1,67 @@
#include <unistd.h>
#include <fcntl.h>
#include <Common/ProfileEvents.h>
#include <Common/Exception.h>
#include <IO/OpenedFile.h>
namespace ProfileEvents
{
extern const Event FileOpen;
}
namespace DB
{
namespace ErrorCodes
{
extern const int FILE_DOESNT_EXIST;
extern const int CANNOT_OPEN_FILE;
extern const int CANNOT_CLOSE_FILE;
}
void OpenedFile::open(int flags)
{
ProfileEvents::increment(ProfileEvents::FileOpen);
fd = ::open(file_name.c_str(), (flags == -1 ? 0 : flags) | O_RDONLY | O_CLOEXEC);
if (-1 == fd)
throwFromErrnoWithPath("Cannot open file " + file_name, file_name,
errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
}
std::string OpenedFile::getFileName() const
{
return file_name;
}
OpenedFile::OpenedFile(const std::string & file_name_, int flags)
: file_name(file_name_)
{
open(flags);
}
OpenedFile::~OpenedFile()
{
if (fd != -1)
close(); /// Exceptions will lead to std::terminate and that's Ok.
}
void OpenedFile::close()
{
if (0 != ::close(fd))
throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE);
fd = -1;
metric_increment.destroy();
}
}

39
src/IO/OpenedFile.h Normal file
View File

@ -0,0 +1,39 @@
#pragma once
#include <Common/CurrentMetrics.h>
#include <memory>
namespace CurrentMetrics
{
extern const Metric OpenFileForRead;
}
namespace DB
{
/// RAII for readonly opened file descriptor.
class OpenedFile
{
public:
OpenedFile(const std::string & file_name_, int flags);
~OpenedFile();
/// Close prematurally.
void close();
int getFD() const { return fd; }
std::string getFileName() const;
private:
std::string file_name;
int fd = -1;
CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForRead};
void open(int flags);
};
}

74
src/IO/OpenedFileCache.h Normal file
View File

@ -0,0 +1,74 @@
#pragma once
#include <map>
#include <mutex>
#include <Core/Types.h>
#include <Common/ProfileEvents.h>
#include <IO/OpenedFile.h>
namespace ProfileEvents
{
extern const Event OpenedFileCacheHits;
extern const Event OpenedFileCacheMisses;
}
namespace DB
{
/** Cache of opened files for reading.
* It allows to share file descriptors when doing reading with 'pread' syscalls on readonly files.
* Note: open/close of files is very cheap on Linux and we should not bother doing it 10 000 times a second.
* (This may not be the case on Windows with WSL. This is also not the case if strace is active. Neither when some eBPF is loaded).
* But sometimes we may end up opening one file multiple times, that increases chance exhausting opened files limit.
*/
class OpenedFileCache
{
private:
using Key = std::pair<std::string /* path */, int /* flags */>;
using OpenedFileWeakPtr = std::weak_ptr<OpenedFile>;
using Files = std::map<Key, OpenedFileWeakPtr>;
Files files;
std::mutex mutex;
public:
using OpenedFilePtr = std::shared_ptr<OpenedFile>;
OpenedFilePtr get(const std::string & path, int flags)
{
Key key(path, flags);
std::lock_guard lock(mutex);
auto [it, inserted] = files.emplace(key, OpenedFilePtr{});
if (!inserted)
if (auto res = it->second.lock())
return res;
OpenedFilePtr res
{
new OpenedFile(path, flags),
[key, this](auto ptr)
{
{
std::lock_guard another_lock(mutex);
files.erase(key);
}
delete ptr;
}
};
it->second = res;
return res;
}
};
using OpenedFileCachePtr = std::shared_ptr<OpenedFileCache>;
}

View File

@ -88,4 +88,7 @@ void ReadBufferFromFile::close()
metric_increment.destroy();
}
OpenedFileCache ReadBufferFromFilePReadWithCache::cache;
}

View File

@ -1,12 +1,14 @@
#pragma once
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/OpenedFileCache.h>
#include <Common/CurrentMetrics.h>
#ifndef O_DIRECT
#define O_DIRECT 00040000
#endif
namespace CurrentMetrics
{
extern const Metric OpenFileForRead;
@ -60,4 +62,31 @@ public:
}
};
/** Similar to ReadBufferFromFilePRead but also transparently shares open file descriptors.
*/
class ReadBufferFromFilePReadWithCache : public ReadBufferFromFileDescriptorPRead
{
private:
static OpenedFileCache cache;
std::string file_name;
OpenedFileCache::OpenedFilePtr file;
public:
ReadBufferFromFilePReadWithCache(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1,
char * existing_memory = nullptr, size_t alignment = 0)
: ReadBufferFromFileDescriptorPRead(-1, buf_size, existing_memory, alignment),
file_name(file_name_)
{
file = cache.get(file_name, flags);
fd = file->getFD();
}
std::string getFileName() const override
{
return file_name;
}
};
}

View File

@ -661,7 +661,7 @@ namespace S3
/// S3 specification requires at least 3 and at most 63 characters in bucket name.
/// https://docs.aws.amazon.com/awscloudtrail/latest/userguide/cloudtrail-s3-bucket-naming-requirements.html
if (bucket.length() < 3 || bucket.length() > 63)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Key name is empty in path style S3 URI: {} ({})", quoteString(key), uri.toString());
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket name length is out of bounds in virtual hosted style S3 URI: {} ({})", quoteString(bucket), uri.toString());
}
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket or key name are invalid in S3 URI: {}", uri.toString());

View File

@ -75,7 +75,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
/// Attempt to open a file with O_DIRECT
try
{
auto res = std::make_unique<ReadBufferFromFile>(
auto res = std::make_unique<ReadBufferFromFilePReadWithCache>(
filename, buffer_size, (flags == -1 ? O_RDONLY | O_CLOEXEC : flags) | O_DIRECT, existing_memory, alignment);
ProfileEvents::increment(ProfileEvents::CreatedReadBufferDirectIO);
return res;
@ -92,7 +92,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
#endif
ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary);
return std::make_unique<ReadBufferFromFile>(filename, buffer_size, flags, existing_memory, alignment);
return std::make_unique<ReadBufferFromFilePReadWithCache>(filename, buffer_size, flags, existing_memory, alignment);
}
}

View File

@ -293,14 +293,14 @@ Aggregator::Aggregator(const Params & params_)
aggregation_state_cache = AggregatedDataVariants::createCache(method_chosen, cache_settings);
#if USE_EMBEDDED_COMPILER
compileAggregateFunctions();
compileAggregateFunctionsIfNeeded();
#endif
}
#if USE_EMBEDDED_COMPILER
void Aggregator::compileAggregateFunctions()
void Aggregator::compileAggregateFunctionsIfNeeded()
{
static std::unordered_map<UInt128, UInt64, UInt128Hash> aggregate_functions_description_to_count;
static std::mutex mtx;
@ -362,7 +362,7 @@ void Aggregator::compileAggregateFunctions()
{
LOG_TRACE(log, "Compile expression {}", functions_description);
auto compiled_aggregate_functions = compileAggregateFunctons(getJITInstance(), functions_to_compile, functions_description);
auto compiled_aggregate_functions = compileAggregateFunctions(getJITInstance(), functions_to_compile, functions_description);
return std::make_shared<CompiledAggregateFunctionsHolder>(std::move(compiled_aggregate_functions));
});
@ -371,7 +371,7 @@ void Aggregator::compileAggregateFunctions()
else
{
LOG_TRACE(log, "Compile expression {}", functions_description);
auto compiled_aggregate_functions = compileAggregateFunctons(getJITInstance(), functions_to_compile, functions_description);
auto compiled_aggregate_functions = compileAggregateFunctions(getJITInstance(), functions_to_compile, functions_description);
compiled_aggregate_functions_holder = std::make_shared<CompiledAggregateFunctionsHolder>(std::move(compiled_aggregate_functions));
}
}

View File

@ -1093,7 +1093,7 @@ private:
/** Try to compile aggregate functions.
*/
void compileAggregateFunctions();
void compileAggregateFunctionsIfNeeded();
/** Select the aggregation method based on the number and types of keys. */
AggregatedDataVariants::Type chooseAggregationMethod();

View File

@ -42,7 +42,8 @@
#include <Access/User.h>
#include <Access/Credentials.h>
#include <Access/SettingsProfile.h>
#include <Access/SettingsConstraints.h>
#include <Access/SettingsProfilesInfo.h>
#include <Access/SettingsConstraintsAndProfileIDs.h>
#include <Access/ExternalAuthenticators.h>
#include <Access/GSSAcceptor.h>
#include <Dictionaries/Embedded/GeoDictionariesLoader.h>
@ -801,7 +802,9 @@ void Context::setUser(const Credentials & credentials, const Poco::Net::SocketAd
current_roles.clear();
use_default_roles = true;
setSettings(*access->getDefaultSettings());
auto default_profile_info = access->getDefaultProfileInfo();
settings_constraints_and_current_profiles = default_profile_info->getConstraintsAndProfileIDs();
applySettingsChanges(default_profile_info->settings);
}
void Context::setUser(const String & name, const String & password, const Poco::Net::SocketAddress & address)
@ -936,19 +939,41 @@ std::optional<QuotaUsage> Context::getQuotaUsage() const
}
void Context::setProfile(const String & profile_name)
void Context::setCurrentProfile(const String & profile_name)
{
SettingsChanges profile_settings_changes = *getAccessControlManager().getProfileSettings(profile_name);
auto lock = getLock();
try
{
checkSettingsConstraints(profile_settings_changes);
UUID profile_id = getAccessControlManager().getID<SettingsProfile>(profile_name);
setCurrentProfile(profile_id);
}
catch (Exception & e)
{
e.addMessage(", while trying to set settings profile {}", profile_name);
throw;
}
applySettingsChanges(profile_settings_changes);
}
void Context::setCurrentProfile(const UUID & profile_id)
{
auto lock = getLock();
auto profile_info = getAccessControlManager().getSettingsProfileInfo(profile_id);
checkSettingsConstraints(profile_info->settings);
applySettingsChanges(profile_info->settings);
settings_constraints_and_current_profiles = profile_info->getConstraintsAndProfileIDs(settings_constraints_and_current_profiles);
}
std::vector<UUID> Context::getCurrentProfiles() const
{
auto lock = getLock();
return settings_constraints_and_current_profiles->current_profiles;
}
std::vector<UUID> Context::getEnabledProfiles() const
{
auto lock = getLock();
return settings_constraints_and_current_profiles->enabled_profiles;
}
@ -1147,7 +1172,7 @@ void Context::setSetting(const StringRef & name, const String & value)
auto lock = getLock();
if (name == "profile")
{
setProfile(value);
setCurrentProfile(value);
return;
}
settings.set(std::string_view{name}, value);
@ -1162,7 +1187,7 @@ void Context::setSetting(const StringRef & name, const Field & value)
auto lock = getLock();
if (name == "profile")
{
setProfile(value.safeGet<String>());
setCurrentProfile(value.safeGet<String>());
return;
}
settings.set(std::string_view{name}, value);
@ -1198,27 +1223,31 @@ void Context::applySettingsChanges(const SettingsChanges & changes)
void Context::checkSettingsConstraints(const SettingChange & change) const
{
getSettingsConstraints()->check(settings, change);
getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, change);
}
void Context::checkSettingsConstraints(const SettingsChanges & changes) const
{
getSettingsConstraints()->check(settings, changes);
getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes);
}
void Context::checkSettingsConstraints(SettingsChanges & changes) const
{
getSettingsConstraints()->check(settings, changes);
getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes);
}
void Context::clampToSettingsConstraints(SettingsChanges & changes) const
{
getSettingsConstraints()->clamp(settings, changes);
getSettingsConstraintsAndCurrentProfiles()->constraints.clamp(settings, changes);
}
std::shared_ptr<const SettingsConstraints> Context::getSettingsConstraints() const
std::shared_ptr<const SettingsConstraintsAndProfileIDs> Context::getSettingsConstraintsAndCurrentProfiles() const
{
return getAccess()->getSettingsConstraints();
auto lock = getLock();
if (settings_constraints_and_current_profiles)
return settings_constraints_and_current_profiles;
static auto no_constraints_or_profiles = std::make_shared<SettingsConstraintsAndProfileIDs>(getAccessControlManager());
return no_constraints_or_profiles;
}
@ -2409,13 +2438,13 @@ void Context::setDefaultProfiles(const Poco::Util::AbstractConfiguration & confi
getAccessControlManager().setDefaultProfileName(shared->default_profile_name);
shared->system_profile_name = config.getString("system_profile", shared->default_profile_name);
setProfile(shared->system_profile_name);
setCurrentProfile(shared->system_profile_name);
applySettingsQuirks(settings, &Poco::Logger::get("SettingsQuirks"));
shared->buffer_profile_name = config.getString("buffer_profile", shared->system_profile_name);
buffer_context = Context::createCopy(shared_from_this());
buffer_context->setProfile(shared->buffer_profile_name);
buffer_context->setCurrentProfile(shared->buffer_profile_name);
}
String Context::getDefaultProfileName() const

View File

@ -89,7 +89,7 @@ class ICompressionCodec;
class AccessControlManager;
class Credentials;
class GSSAcceptorContext;
class SettingsConstraints;
struct SettingsConstraintsAndProfileIDs;
class RemoteHostFilter;
struct StorageID;
class IDisk;
@ -177,6 +177,7 @@ private:
std::optional<UUID> user_id;
std::vector<UUID> current_roles;
bool use_default_roles = false;
std::shared_ptr<const SettingsConstraintsAndProfileIDs> settings_constraints_and_current_profiles;
std::shared_ptr<const ContextAccess> access;
std::shared_ptr<const EnabledRowPolicies> initial_row_policy;
String current_database;
@ -378,6 +379,11 @@ public:
boost::container::flat_set<UUID> getEnabledRoles() const;
std::shared_ptr<const EnabledRolesInfo> getRolesInfo() const;
void setCurrentProfile(const String & profile_name);
void setCurrentProfile(const UUID & profile_id);
std::vector<UUID> getCurrentProfiles() const;
std::vector<UUID> getEnabledProfiles() const;
/// Checks access rights.
/// Empty database means the current database.
void checkAccess(const AccessFlags & flags) const;
@ -516,7 +522,7 @@ public:
void clampToSettingsConstraints(SettingsChanges & changes) const;
/// Returns the current constraints (can return null).
std::shared_ptr<const SettingsConstraints> getSettingsConstraints() const;
std::shared_ptr<const SettingsConstraintsAndProfileIDs> getSettingsConstraintsAndCurrentProfiles() const;
const EmbeddedDictionaries & getEmbeddedDictionaries() const;
const ExternalDictionariesLoader & getExternalDictionariesLoader() const;
@ -810,8 +816,6 @@ private:
template <typename... Args>
void checkAccessImpl(const Args &... args) const;
void setProfile(const String & profile);
EmbeddedDictionaries & getEmbeddedDictionariesImpl(bool throw_on_error) const;
void checkCanBeDropped(const String & database, const String & table, const size_t & size, const size_t & max_size_to_drop) const;

View File

@ -28,6 +28,15 @@ namespace
const ASTGrantQuery & query,
const std::vector<UUID> & roles_to_grant_or_revoke)
{
if (!query.is_revoke)
{
if (query.replace_access)
grantee.access = {};
if (query.replace_granted_roles)
grantee.granted_roles = {};
}
if (!query.access_rights_elements.empty())
{
if (query.is_revoke)

View File

@ -4,8 +4,7 @@
#include <DataStreams/AddingDefaultBlockOutputStream.h>
#include <DataStreams/CheckConstraintsBlockOutputStream.h>
#include <DataStreams/CountingBlockOutputStream.h>
#include <DataStreams/InputStreamFromASTInsertQuery.h>
#include <DataStreams/NullAndDoCopyBlockInputStream.h>
#include <Processors/Transforms/getSourceFromFromASTInsertQuery.h>
#include <DataStreams/PushingToViewsBlockOutputStream.h>
#include <DataStreams/SquashingBlockOutputStream.h>
#include <DataStreams/copyData.h>
@ -351,9 +350,13 @@ BlockIO InterpreterInsertQuery::execute()
}
else if (query.data && !query.has_tail) /// can execute without additional data
{
// res.out = std::move(out_streams.at(0));
res.in = std::make_shared<InputStreamFromASTInsertQuery>(query_ptr, nullptr, query_sample_block, getContext(), nullptr);
res.in = std::make_shared<NullAndDoCopyBlockInputStream>(res.in, out_streams.at(0));
auto pipe = getSourceFromFromASTInsertQuery(query_ptr, nullptr, query_sample_block, getContext(), nullptr);
res.pipeline.init(std::move(pipe));
res.pipeline.resize(1);
res.pipeline.setSinks([&](const Block &, Pipe::StreamType)
{
return std::make_shared<SinkToOutputStream>(out_streams.at(0));
});
}
else
res.out = std::move(out_streams.at(0));

View File

@ -563,8 +563,10 @@ static void compileInsertAggregatesIntoResultColumns(llvm::Module & module, cons
b.CreateRetVoid();
}
CompiledAggregateFunctions compileAggregateFunctons(CHJIT & jit, const std::vector<AggregateFunctionWithOffset> & functions, std::string functions_dump_name)
CompiledAggregateFunctions compileAggregateFunctions(CHJIT & jit, const std::vector<AggregateFunctionWithOffset> & functions, std::string functions_dump_name)
{
Stopwatch watch;
std::string create_aggregate_states_functions_name = functions_dump_name + "_create";
std::string add_aggregate_states_functions_name = functions_dump_name + "_add";
std::string merge_aggregate_states_functions_name = functions_dump_name + "_merge";
@ -588,6 +590,10 @@ CompiledAggregateFunctions compileAggregateFunctons(CHJIT & jit, const std::vect
assert(merge_aggregate_states_function);
assert(insert_aggregate_states_function);
ProfileEvents::increment(ProfileEvents::CompileExpressionsMicroseconds, watch.elapsedMicroseconds());
ProfileEvents::increment(ProfileEvents::CompileExpressionsBytes, compiled_module.size);
ProfileEvents::increment(ProfileEvents::CompileFunction);
CompiledAggregateFunctions compiled_aggregate_functions
{
.create_aggregate_states_function = create_aggregate_states_function,

View File

@ -78,7 +78,7 @@ struct CompiledAggregateFunctions
* JITMergeAggregateStatesFunction will merge aggregate states for aggregate functions.
* JITInsertAggregateStatesIntoColumnsFunction will insert aggregate states for aggregate functions into result columns.
*/
CompiledAggregateFunctions compileAggregateFunctons(CHJIT & jit, const std::vector<AggregateFunctionWithOffset> & functions, std::string functions_dump_name);
CompiledAggregateFunctions compileAggregateFunctions(CHJIT & jit, const std::vector<AggregateFunctionWithOffset> & functions, std::string functions_dump_name);
}

View File

@ -3,7 +3,6 @@
#include <Columns/ColumnNullable.h>
#include <Core/NamesAndTypes.h>
#include <Core/SortCursor.h>
#include <DataStreams/BlocksListBlockInputStream.h>
#include <DataStreams/TemporaryFileStream.h>
#include <DataStreams/materializeBlock.h>
#include <DataTypes/DataTypeNullable.h>
@ -12,10 +11,10 @@
#include <Interpreters/TableJoin.h>
#include <Interpreters/join_common.h>
#include <Interpreters/sortBlock.h>
#include <Processors/Executors/PipelineExecutingBlockInputStream.h>
#include <Processors/Sources/BlocksListSource.h>
#include <Processors/QueryPipeline.h>
#include <Processors/Sources/SourceFromInputStream.h>
#include <Processors/Transforms/MergeSortingTransform.h>
#include <Processors/Executors/PipelineExecutingBlockInputStream.h>
namespace DB
@ -577,8 +576,7 @@ void MergeJoin::mergeInMemoryRightBlocks()
if (right_blocks.empty())
return;
auto stream = std::make_shared<BlocksListBlockInputStream>(std::move(right_blocks.blocks));
Pipe source(std::make_shared<SourceFromInputStream>(std::move(stream)));
Pipe source(std::make_shared<BlocksListSource>(std::move(right_blocks.blocks)));
right_blocks.clear();
QueryPipeline pipeline;

View File

@ -17,7 +17,7 @@
#include <Processors/QueryPlan/FilterStep.h>
#include <Processors/QueryPlan/ReadFromPreparedSource.h>
#include <Processors/Executors/PipelineExecutingBlockInputStream.h>
#include <DataStreams/CheckSortedBlockInputStream.h>
#include <Processors/Transforms/CheckSortedTransform.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
@ -901,12 +901,18 @@ BlockInputStreamPtr MutationsInterpreter::execute()
select_interpreter->buildQueryPlan(plan);
auto pipeline = addStreamsForLaterStages(stages, plan);
BlockInputStreamPtr result_stream = std::make_shared<PipelineExecutingBlockInputStream>(std::move(*pipeline));
/// Sometimes we update just part of columns (for example UPDATE mutation)
/// in this case we don't read sorting key, so just we don't check anything.
if (auto sort_desc = getStorageSortDescriptionIfPossible(result_stream->getHeader()))
result_stream = std::make_shared<CheckSortedBlockInputStream>(result_stream, *sort_desc);
if (auto sort_desc = getStorageSortDescriptionIfPossible(pipeline->getHeader()))
{
pipeline->addSimpleTransform([&](const Block & header)
{
return std::make_shared<CheckSortedTransform>(header, *sort_desc);
});
}
BlockInputStreamPtr result_stream = std::make_shared<PipelineExecutingBlockInputStream>(std::move(*pipeline));
if (!updated_header)
updated_header = std::make_unique<Block>(result_stream->getHeader());

View File

@ -1,7 +1,9 @@
#include <Core/SortCursor.h>
#include <Interpreters/SortedBlocksWriter.h>
#include <DataStreams/MergingSortedBlockInputStream.h>
#include <DataStreams/OneBlockInputStream.h>
#include <Processors/QueryPipeline.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Processors/Merges/MergingSortedTransform.h>
#include <DataStreams/TemporaryFileStream.h>
#include <DataStreams/materializeBlock.h>
#include <Disks/IVolume.h>
@ -10,40 +12,36 @@
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_ENOUGH_SPACE;
}
namespace
{
std::unique_ptr<TemporaryFile> flushToFile(const String & tmp_path, const Block & header, IBlockInputStream & stream, const String & codec)
std::unique_ptr<TemporaryFile> flushToFile(const String & tmp_path, const Block & header, QueryPipeline pipeline, const String & codec)
{
auto tmp_file = createTemporaryFile(tmp_path);
std::atomic<bool> is_cancelled{false};
TemporaryFileStream::write(tmp_file->path(), header, stream, &is_cancelled, codec);
if (is_cancelled)
throw Exception("Cannot flush MergeJoin data on disk. No space at " + tmp_path, ErrorCodes::NOT_ENOUGH_SPACE);
TemporaryFileStream::write(tmp_file->path(), header, std::move(pipeline), codec);
return tmp_file;
}
SortedBlocksWriter::SortedFiles flushToManyFiles(const String & tmp_path, const Block & header, IBlockInputStream & stream,
SortedBlocksWriter::SortedFiles flushToManyFiles(const String & tmp_path, const Block & header, QueryPipeline pipeline,
const String & codec, std::function<void(const Block &)> callback = [](const Block &){})
{
std::vector<std::unique_ptr<TemporaryFile>> files;
PullingPipelineExecutor executor(pipeline);
while (Block block = stream.read())
Block block;
while (executor.pull(block))
{
if (!block.rows())
continue;
callback(block);
OneBlockInputStream block_stream(block);
auto tmp_file = flushToFile(tmp_path, header, block_stream, codec);
QueryPipeline one_block_pipeline;
Chunk chunk(block.getColumns(), block.rows());
one_block_pipeline.init(Pipe(std::make_shared<SourceFromSingleChunk>(block.cloneEmpty(), std::move(chunk))));
auto tmp_file = flushToFile(tmp_path, header, std::move(one_block_pipeline), codec);
files.emplace_back(std::move(tmp_file));
}
@ -119,23 +117,30 @@ SortedBlocksWriter::TmpFilePtr SortedBlocksWriter::flush(const BlocksList & bloc
{
const std::string path = getPath();
if (blocks.empty())
Pipes pipes;
pipes.reserve(blocks.size());
for (const auto & block : blocks)
if (auto num_rows = block.rows())
pipes.emplace_back(std::make_shared<SourceFromSingleChunk>(block.cloneEmpty(), Chunk(block.getColumns(), num_rows)));
if (pipes.empty())
return {};
if (blocks.size() == 1)
QueryPipeline pipeline;
pipeline.init(Pipe::unitePipes(std::move(pipes)));
if (pipeline.getNumStreams() > 1)
{
OneBlockInputStream sorted_input(blocks.front());
return flushToFile(path, sample_block, sorted_input, codec);
auto transform = std::make_shared<MergingSortedTransform>(
pipeline.getHeader(),
pipeline.getNumStreams(),
sort_description,
rows_in_block);
pipeline.addTransform(std::move(transform));
}
BlockInputStreams inputs;
inputs.reserve(blocks.size());
for (const auto & block : blocks)
if (block.rows())
inputs.push_back(std::make_shared<OneBlockInputStream>(block));
MergingSortedBlockInputStream sorted_input(inputs, sort_description, rows_in_block);
return flushToFile(path, sample_block, sorted_input, codec);
return flushToFile(path, sample_block, std::move(pipeline), codec);
}
SortedBlocksWriter::PremergedFiles SortedBlocksWriter::premerge()
@ -158,8 +163,8 @@ SortedBlocksWriter::PremergedFiles SortedBlocksWriter::premerge()
if (!blocks.empty())
files.emplace_back(flush(blocks));
BlockInputStreams inputs;
inputs.reserve(num_files_for_merge);
Pipes pipes;
pipes.reserve(num_files_for_merge);
/// Merge by parts to save memory. It's possible to exchange disk I/O and memory by num_files_for_merge.
{
@ -170,13 +175,26 @@ SortedBlocksWriter::PremergedFiles SortedBlocksWriter::premerge()
{
for (const auto & file : files)
{
inputs.emplace_back(streamFromFile(file));
pipes.emplace_back(streamFromFile(file));
if (inputs.size() == num_files_for_merge || &file == &files.back())
if (pipes.size() == num_files_for_merge || &file == &files.back())
{
MergingSortedBlockInputStream sorted_input(inputs, sort_description, rows_in_block);
new_files.emplace_back(flushToFile(getPath(), sample_block, sorted_input, codec));
inputs.clear();
QueryPipeline pipeline;
pipeline.init(Pipe::unitePipes(std::move(pipes)));
pipes = Pipes();
if (pipeline.getNumStreams() > 1)
{
auto transform = std::make_shared<MergingSortedTransform>(
pipeline.getHeader(),
pipeline.getNumStreams(),
sort_description,
rows_in_block);
pipeline.addTransform(std::move(transform));
}
new_files.emplace_back(flushToFile(getPath(), sample_block, std::move(pipeline), codec));
}
}
@ -185,22 +203,35 @@ SortedBlocksWriter::PremergedFiles SortedBlocksWriter::premerge()
}
for (const auto & file : files)
inputs.emplace_back(streamFromFile(file));
pipes.emplace_back(streamFromFile(file));
}
return PremergedFiles{std::move(files), std::move(inputs)};
return PremergedFiles{std::move(files), Pipe::unitePipes(std::move(pipes))};
}
SortedBlocksWriter::SortedFiles SortedBlocksWriter::finishMerge(std::function<void(const Block &)> callback)
{
PremergedFiles files = premerge();
MergingSortedBlockInputStream sorted_input(files.streams, sort_description, rows_in_block);
return flushToManyFiles(getPath(), sample_block, sorted_input, codec, callback);
QueryPipeline pipeline;
pipeline.init(std::move(files.pipe));
if (pipeline.getNumStreams() > 1)
{
auto transform = std::make_shared<MergingSortedTransform>(
pipeline.getHeader(),
pipeline.getNumStreams(),
sort_description,
rows_in_block);
pipeline.addTransform(std::move(transform));
}
return flushToManyFiles(getPath(), sample_block, std::move(pipeline), codec, callback);
}
BlockInputStreamPtr SortedBlocksWriter::streamFromFile(const TmpFilePtr & file) const
Pipe SortedBlocksWriter::streamFromFile(const TmpFilePtr & file) const
{
return std::make_shared<TemporaryFileLazyInputStream>(file->path(), materializeBlock(sample_block));
return Pipe(std::make_shared<TemporaryFileLazySource>(file->path(), materializeBlock(sample_block)));
}
String SortedBlocksWriter::getPath() const
@ -250,18 +281,35 @@ Block SortedBlocksBuffer::mergeBlocks(Blocks && blocks) const
size_t num_rows = 0;
{ /// Merge sort blocks
BlockInputStreams inputs;
inputs.reserve(blocks.size());
Pipes pipes;
pipes.reserve(blocks.size());
for (auto & block : blocks)
{
num_rows += block.rows();
inputs.emplace_back(std::make_shared<OneBlockInputStream>(block));
Chunk chunk(block.getColumns(), block.rows());
pipes.emplace_back(std::make_shared<SourceFromSingleChunk>(block.cloneEmpty(), std::move(chunk)));
}
Blocks tmp_blocks;
MergingSortedBlockInputStream stream(inputs, sort_description, num_rows);
while (const auto & block = stream.read())
QueryPipeline pipeline;
pipeline.init(Pipe::unitePipes(std::move(pipes)));
if (pipeline.getNumStreams() > 1)
{
auto transform = std::make_shared<MergingSortedTransform>(
pipeline.getHeader(),
pipeline.getNumStreams(),
sort_description,
num_rows);
pipeline.addTransform(std::move(transform));
}
PullingPipelineExecutor executor(pipeline);
Block block;
while (executor.pull(block))
tmp_blocks.emplace_back(block);
blocks.swap(tmp_blocks);

View File

@ -6,6 +6,7 @@
#include <Common/filesystemHelpers.h>
#include <Core/Block.h>
#include <Core/SortDescription.h>
#include <Processors/Pipe.h>
#include <DataStreams/SizeLimits.h>
#include <DataStreams/IBlockStream_fwd.h>
@ -17,6 +18,8 @@ class TableJoin;
class MergeJoinCursor;
struct MergeJoinEqualRange;
class Pipe;
class IVolume;
using VolumePtr = std::shared_ptr<IVolume>;
@ -56,7 +59,7 @@ struct SortedBlocksWriter
struct PremergedFiles
{
SortedFiles files;
BlockInputStreams streams;
Pipe pipe;
};
static constexpr const size_t num_streams = 2;
@ -94,7 +97,7 @@ struct SortedBlocksWriter
}
String getPath() const;
BlockInputStreamPtr streamFromFile(const TmpFilePtr & file) const;
Pipe streamFromFile(const TmpFilePtr & file) const;
void insert(Block && block);
TmpFilePtr flush(const BlocksList & blocks) const;

View File

@ -14,11 +14,10 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataStreams/NullBlockOutputStream.h>
#include <DataStreams/NullAndDoCopyBlockInputStream.h>
#include <DataStreams/copyData.h>
#include <Processors/NullSink.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
@ -168,48 +167,72 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context,
return getDistributedDDLStatus(node_path, entry, context);
}
class DDLQueryStatusSource final : public SourceWithProgress
{
public:
DDLQueryStatusSource(
const String & zk_node_path, const DDLLogEntry & entry, ContextPtr context_, const std::optional<Strings> & hosts_to_wait = {});
String getName() const override { return "DDLQueryStatus"; }
Chunk generate() override;
Status prepare() override;
private:
static Strings getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path);
Strings getNewAndUpdate(const Strings & current_list_of_finished_hosts);
std::pair<String, UInt16> parseHostAndPort(const String & host_id) const;
String node_path;
ContextPtr context;
Stopwatch watch;
Poco::Logger * log;
NameSet waiting_hosts; /// hosts from task host list
NameSet finished_hosts; /// finished hosts from host list
NameSet ignoring_hosts; /// appeared hosts that are not in hosts list
Strings current_active_hosts; /// Hosts that were in active state at the last check
size_t num_hosts_finished = 0;
/// Save the first detected error and throw it at the end of execution
std::unique_ptr<Exception> first_exception;
Int64 timeout_seconds = 120;
bool by_hostname = true;
bool throw_on_timeout = true;
bool timeout_exceeded = false;
};
BlockIO getDistributedDDLStatus(const String & node_path, const DDLLogEntry & entry, ContextPtr context, const std::optional<Strings> & hosts_to_wait)
{
BlockIO io;
if (context->getSettingsRef().distributed_ddl_task_timeout == 0)
return io;
BlockInputStreamPtr stream = std::make_shared<DDLQueryStatusInputStream>(node_path, entry, context, hosts_to_wait);
if (context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NONE)
{
/// Wait for query to finish, but ignore output
auto null_output = std::make_shared<NullBlockOutputStream>(stream->getHeader());
stream = std::make_shared<NullAndDoCopyBlockInputStream>(std::move(stream), std::move(null_output));
}
ProcessorPtr processor = std::make_shared<DDLQueryStatusSource>(node_path, entry, context, hosts_to_wait);
io.pipeline.init(Pipe{processor});
if (context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NONE)
io.pipeline.setSinks([](const Block & header, QueryPipeline::StreamType){ return std::make_shared<EmptySink>(header); });
io.in = std::move(stream);
return io;
}
DDLQueryStatusInputStream::DDLQueryStatusInputStream(const String & zk_node_path, const DDLLogEntry & entry, ContextPtr context_,
const std::optional<Strings> & hosts_to_wait)
: node_path(zk_node_path)
, context(context_)
, watch(CLOCK_MONOTONIC_COARSE)
, log(&Poco::Logger::get("DDLQueryStatusInputStream"))
static Block getSampleBlock(ContextPtr context_, bool hosts_to_wait)
{
if (context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::THROW ||
context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NONE)
throw_on_timeout = true;
else if (context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT ||
context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NEVER_THROW)
throw_on_timeout = false;
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown output mode");
auto output_mode = context_->getSettingsRef().distributed_ddl_output_mode;
auto maybe_make_nullable = [&](const DataTypePtr & type) -> DataTypePtr
{
if (throw_on_timeout)
if (output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::NONE)
return type;
return std::make_shared<DataTypeNullable>(type);
};
sample = Block{
Block res = Block{
{std::make_shared<DataTypeString>(), "host"},
{std::make_shared<DataTypeUInt16>(), "port"},
{maybe_make_nullable(std::make_shared<DataTypeInt64>()), "status"},
@ -218,11 +241,27 @@ DDLQueryStatusInputStream::DDLQueryStatusInputStream(const String & zk_node_path
{std::make_shared<DataTypeUInt64>(), "num_hosts_active"},
};
if (hosts_to_wait)
res.erase("port");
return res;
}
DDLQueryStatusSource::DDLQueryStatusSource(
const String & zk_node_path, const DDLLogEntry & entry, ContextPtr context_, const std::optional<Strings> & hosts_to_wait)
: SourceWithProgress(getSampleBlock(context_, hosts_to_wait.has_value()), true)
, node_path(zk_node_path)
, context(context_)
, watch(CLOCK_MONOTONIC_COARSE)
, log(&Poco::Logger::get("DDLQueryStatusInputStream"))
{
auto output_mode = context->getSettingsRef().distributed_ddl_output_mode;
throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::NONE;
if (hosts_to_wait)
{
waiting_hosts = NameSet(hosts_to_wait->begin(), hosts_to_wait->end());
by_hostname = false;
sample.erase("port");
}
else
{
@ -231,11 +270,10 @@ DDLQueryStatusInputStream::DDLQueryStatusInputStream(const String & zk_node_path
}
addTotalRowsApprox(waiting_hosts.size());
timeout_seconds = context->getSettingsRef().distributed_ddl_task_timeout;
}
std::pair<String, UInt16> DDLQueryStatusInputStream::parseHostAndPort(const String & host_id) const
std::pair<String, UInt16> DDLQueryStatusSource::parseHostAndPort(const String & host_id) const
{
String host = host_id;
UInt16 port = 0;
@ -248,37 +286,28 @@ std::pair<String, UInt16> DDLQueryStatusInputStream::parseHostAndPort(const Stri
return {host, port};
}
Block DDLQueryStatusInputStream::readImpl()
Chunk DDLQueryStatusSource::generate()
{
Block res;
bool all_hosts_finished = num_hosts_finished >= waiting_hosts.size();
/// Seems like num_hosts_finished cannot be strictly greater than waiting_hosts.size()
assert(num_hosts_finished <= waiting_hosts.size());
if (all_hosts_finished || timeout_exceeded)
{
bool throw_if_error_on_host = context->getSettingsRef().distributed_ddl_output_mode != DistributedDDLOutputMode::NEVER_THROW;
if (first_exception && throw_if_error_on_host)
throw Exception(*first_exception);
return res;
}
if (all_hosts_finished || timeout_exceeded)
return {};
auto zookeeper = context->getZooKeeper();
size_t try_number = 0;
while (res.rows() == 0)
while (true)
{
if (isCancelled())
{
bool throw_if_error_on_host = context->getSettingsRef().distributed_ddl_output_mode != DistributedDDLOutputMode::NEVER_THROW;
if (first_exception && throw_if_error_on_host)
throw Exception(*first_exception);
return res;
}
return {};
if (timeout_seconds >= 0 && watch.elapsedSeconds() > timeout_seconds)
{
timeout_exceeded = true;
size_t num_unfinished_hosts = waiting_hosts.size() - num_hosts_finished;
size_t num_active_hosts = current_active_hosts.size();
@ -286,10 +315,13 @@ Block DDLQueryStatusInputStream::readImpl()
"There are {} unfinished hosts ({} of them are currently active), "
"they are going to execute the query in background";
if (throw_on_timeout)
throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, msg_format,
node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
{
if (!first_exception)
first_exception = std::make_unique<Exception>(ErrorCodes::TIMEOUT_EXCEEDED, msg_format,
node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
return {};
}
timeout_exceeded = true;
LOG_INFO(log, msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
NameSet unfinished_hosts = waiting_hosts;
@ -297,7 +329,7 @@ Block DDLQueryStatusInputStream::readImpl()
unfinished_hosts.erase(host_id);
/// Query is not finished on the rest hosts, so fill the corresponding rows with NULLs.
MutableColumns columns = sample.cloneEmptyColumns();
MutableColumns columns = output.getHeader().cloneEmptyColumns();
for (const String & host_id : unfinished_hosts)
{
auto [host, port] = parseHostAndPort(host_id);
@ -310,8 +342,7 @@ Block DDLQueryStatusInputStream::readImpl()
columns[num++]->insert(num_unfinished_hosts);
columns[num++]->insert(num_active_hosts);
}
res = sample.cloneWithColumns(std::move(columns));
return res;
return Chunk(std::move(columns), unfinished_hosts.size());
}
if (num_hosts_finished != 0 || try_number != 0)
@ -321,9 +352,13 @@ Block DDLQueryStatusInputStream::readImpl()
if (!zookeeper->exists(node_path))
{
throw Exception(ErrorCodes::UNFINISHED,
"Cannot provide query execution status. The query's node {} has been deleted by the cleaner since it was finished (or its lifetime is expired)",
node_path);
/// Paradoxically, this exception will be throw even in case of "never_throw" mode.
if (!first_exception)
first_exception = std::make_unique<Exception>(ErrorCodes::UNFINISHED,
"Cannot provide query execution status. The query's node {} has been deleted by the cleaner"
" since it was finished (or its lifetime is expired)", node_path);
return {};
}
Strings new_hosts = getNewAndUpdate(getChildrenAllowNoNode(zookeeper, fs::path(node_path) / "finished"));
@ -333,7 +368,7 @@ Block DDLQueryStatusInputStream::readImpl()
current_active_hosts = getChildrenAllowNoNode(zookeeper, fs::path(node_path) / "active");
MutableColumns columns = sample.cloneEmptyColumns();
MutableColumns columns = output.getHeader().cloneEmptyColumns();
for (const String & host_id : new_hosts)
{
ExecutionStatus status(-1, "Cannot obtain error message");
@ -345,8 +380,11 @@ Block DDLQueryStatusInputStream::readImpl()
auto [host, port] = parseHostAndPort(host_id);
if (status.code != 0 && first_exception == nullptr)
if (status.code != 0 && !first_exception
&& context->getSettingsRef().distributed_ddl_output_mode != DistributedDDLOutputMode::NEVER_THROW)
{
first_exception = std::make_unique<Exception>(status.code, "There was an error on [{}:{}]: {}", host, port, status.message);
}
++num_hosts_finished;
@ -359,13 +397,34 @@ Block DDLQueryStatusInputStream::readImpl()
columns[num++]->insert(waiting_hosts.size() - num_hosts_finished);
columns[num++]->insert(current_active_hosts.size());
}
res = sample.cloneWithColumns(std::move(columns));
}
return res;
return Chunk(std::move(columns), new_hosts.size());
}
}
Strings DDLQueryStatusInputStream::getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path)
IProcessor::Status DDLQueryStatusSource::prepare()
{
/// This method is overloaded to throw exception after all data is read.
/// Exception is pushed into pipe (instead of simply being thrown) to ensure the order of data processing and exception.
if (finished)
{
if (first_exception)
{
if (!output.canPush())
return Status::PortFull;
output.pushException(std::make_exception_ptr(*first_exception));
}
output.finish();
return Status::Finished;
}
else
return SourceWithProgress::prepare();
}
Strings DDLQueryStatusSource::getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path)
{
Strings res;
Coordination::Error code = zookeeper->tryGetChildren(node_path, res);
@ -374,7 +433,7 @@ Strings DDLQueryStatusInputStream::getChildrenAllowNoNode(const std::shared_ptr<
return res;
}
Strings DDLQueryStatusInputStream::getNewAndUpdate(const Strings & current_list_of_finished_hosts)
Strings DDLQueryStatusSource::getNewAndUpdate(const Strings & current_list_of_finished_hosts)
{
Strings diff;
for (const String & host : current_list_of_finished_hosts)
@ -384,7 +443,7 @@ Strings DDLQueryStatusInputStream::getNewAndUpdate(const Strings & current_list_
if (!ignoring_hosts.count(host))
{
ignoring_hosts.emplace(host);
LOG_INFO(log, "Unexpected host {} appeared in task {}", host, node_path);
LOG_INFO(log, "Unexpected host {} appeared in task {}", host, node_path);
}
continue;
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <DataStreams/BlockIO.h>
#include <Processors/Sources/SourceWithProgress.h>
#include <Interpreters/Context_fwd.h>
#include <Parsers/IAST_fwd.h>
#include <DataStreams/IBlockInputStream.h>
@ -22,54 +23,12 @@ struct DDLLogEntry;
bool isSupportedAlterType(int type);
/// Pushes distributed DDL query to the queue.
/// Returns DDLQueryStatusInputStream, which reads results of query execution on each host in the cluster.
/// Returns DDLQueryStatusSource, which reads results of query execution on each host in the cluster.
BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, ContextPtr context);
BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, ContextPtr context, const AccessRightsElements & query_requires_access);
BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, ContextPtr context, AccessRightsElements && query_requires_access);
BlockIO getDistributedDDLStatus(const String & node_path, const DDLLogEntry & entry, ContextPtr context, const std::optional<Strings> & hosts_to_wait = {});
class DDLQueryStatusInputStream final : public IBlockInputStream
{
public:
DDLQueryStatusInputStream(const String & zk_node_path, const DDLLogEntry & entry, ContextPtr context_, const std::optional<Strings> & hosts_to_wait = {});
String getName() const override { return "DDLQueryStatusInputStream"; }
Block getHeader() const override { return sample; }
Block getSampleBlock() const { return sample.cloneEmpty(); }
Block readImpl() override;
private:
static Strings getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path);
Strings getNewAndUpdate(const Strings & current_list_of_finished_hosts);
std::pair<String, UInt16> parseHostAndPort(const String & host_id) const;
String node_path;
ContextPtr context;
Stopwatch watch;
Poco::Logger * log;
Block sample;
NameSet waiting_hosts; /// hosts from task host list
NameSet finished_hosts; /// finished hosts from host list
NameSet ignoring_hosts; /// appeared hosts that are not in hosts list
Strings current_active_hosts; /// Hosts that were in active state at the last check
size_t num_hosts_finished = 0;
/// Save the first detected error and throw it at the end of execution
std::unique_ptr<Exception> first_exception;
Int64 timeout_seconds = 120;
bool by_hostname = true;
bool throw_on_timeout = true;
bool timeout_exceeded = false;
};
BlockIO getDistributedDDLStatus(
const String & node_path, const DDLLogEntry & entry, ContextPtr context, const std::optional<Strings> & hosts_to_wait = {});
}

View File

@ -11,7 +11,7 @@
#include <DataStreams/BlockIO.h>
#include <DataStreams/copyData.h>
#include <DataStreams/IBlockInputStream.h>
#include <DataStreams/InputStreamFromASTInsertQuery.h>
#include <Processors/Transforms/getSourceFromFromASTInsertQuery.h>
#include <DataStreams/CountingBlockOutputStream.h>
#include <Parsers/ASTIdentifier.h>
@ -53,6 +53,7 @@
#include <Processors/Transforms/LimitsCheckingTransform.h>
#include <Processors/Transforms/MaterializingTransform.h>
#include <Processors/Formats/IOutputFormat.h>
#include <Processors/Sources/SinkToOutputStream.h>
namespace ProfileEvents
@ -512,9 +513,9 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
StoragePtr storage = context->executeTableFunction(input_function);
auto & input_storage = dynamic_cast<StorageInput &>(*storage);
auto input_metadata_snapshot = input_storage.getInMemoryMetadataPtr();
BlockInputStreamPtr input_stream = std::make_shared<InputStreamFromASTInsertQuery>(
auto pipe = getSourceFromFromASTInsertQuery(
ast, istr, input_metadata_snapshot->getSampleBlock(), context, input_function);
input_storage.setInputStream(input_stream);
input_storage.setPipe(std::move(pipe));
}
}
}
@ -992,8 +993,17 @@ void executeQuery(
{
if (streams.out)
{
InputStreamFromASTInsertQuery in(ast, &istr, streams.out->getHeader(), context, nullptr);
copyData(in, *streams.out);
auto pipe = getSourceFromFromASTInsertQuery(ast, &istr, streams.out->getHeader(), context, nullptr);
pipeline.init(std::move(pipe));
pipeline.resize(1);
pipeline.setSinks([&](const Block &, Pipe::StreamType)
{
return std::make_shared<SinkToOutputStream>(streams.out);
});
auto executor = pipeline.execute();
executor->execute(pipeline.getNumThreads());
}
else if (streams.in)
{

View File

@ -102,7 +102,9 @@ ASTPtr ASTGrantQuery::clone() const
void ASTGrantQuery::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
{
settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << (attach_mode ? "ATTACH " : "") << (is_revoke ? "REVOKE" : "GRANT")
settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << (attach_mode ? "ATTACH " : "")
<< (settings.hilite ? hilite_keyword : "") << ((!is_revoke && (replace_access || replace_granted_roles)) ? "REPLACE " : "") << (settings.hilite ? hilite_none : "")
<< (settings.hilite ? hilite_keyword : "") << (is_revoke ? "REVOKE" : "GRANT")
<< (settings.hilite ? IAST::hilite_none : "");
if (!access_rights_elements.sameOptions())

View File

@ -24,6 +24,8 @@ public:
AccessRightsElements access_rights_elements;
std::shared_ptr<ASTRolesOrUsersSet> roles;
bool admin_option = false;
bool replace_access = false;
bool replace_granted_roles = false;
std::shared_ptr<ASTRolesOrUsersSet> grantees;
String getID(char) const override;

View File

@ -1555,26 +1555,37 @@ bool ParserUnsignedInteger::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
bool ParserStringLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
if (pos->type != TokenType::StringLiteral)
if (pos->type != TokenType::StringLiteral && pos->type != TokenType::HereDoc)
return false;
String s;
ReadBufferFromMemory in(pos->begin, pos->size());
try
if (pos->type == TokenType::StringLiteral)
{
readQuotedStringWithSQLStyle(s, in);
}
catch (const Exception &)
{
expected.add(pos, "string literal");
return false;
}
ReadBufferFromMemory in(pos->begin, pos->size());
if (in.count() != pos->size())
try
{
readQuotedStringWithSQLStyle(s, in);
}
catch (const Exception &)
{
expected.add(pos, "string literal");
return false;
}
if (in.count() != pos->size())
{
expected.add(pos, "string literal");
return false;
}
}
else if (pos->type == TokenType::HereDoc)
{
expected.add(pos, "string literal");
return false;
std::string_view here_doc(pos->begin, pos->size());
size_t heredoc_size = here_doc.find('$', 1) + 1;
assert(heredoc_size != std::string_view::npos);
s = String(pos->begin + heredoc_size, pos->size() - heredoc_size * 2);
}
auto literal = std::make_shared<ASTLiteral>(s);

View File

@ -308,6 +308,7 @@ protected:
/** String in single quotes.
* String in heredoc $here$txt$here$ equivalent to 'txt'.
*/
class ParserStringLiteral : public IParserBase
{

View File

@ -2,7 +2,6 @@
#include <Common/StringUtils/StringUtils.h>
#include <common/find_symbols.h>
namespace DB
{
@ -338,10 +337,33 @@ Token Lexer::nextTokenImpl()
}
default:
if (*pos == '$' && ((pos + 1 < end && !isWordCharASCII(pos[1])) || pos + 1 == end))
if (*pos == '$')
{
/// Capture standalone dollar sign
return Token(TokenType::DollarSign, token_begin, ++pos);
/// Try to capture dollar sign as start of here doc
std::string_view token_stream(pos, end - pos);
auto heredoc_name_end_position = token_stream.find('$', 1);
if (heredoc_name_end_position != std::string::npos)
{
size_t heredoc_size = heredoc_name_end_position + 1;
std::string_view heredoc = {token_stream.data(), heredoc_size};
size_t heredoc_end_position = token_stream.find(heredoc, heredoc_size);
if (heredoc_end_position != std::string::npos)
{
pos += heredoc_end_position;
pos += heredoc_size;
return Token(TokenType::HereDoc, token_begin, pos);
}
}
if (((pos + 1 < end && !isWordCharASCII(pos[1])) || pos + 1 == end))
{
/// Capture standalone dollar sign
return Token(TokenType::DollarSign, token_begin, ++pos);
}
}
if (isWordCharASCII(*pos) || *pos == '$')
{

View File

@ -33,6 +33,8 @@ namespace DB
\
M(Asterisk) /** Could be used as multiplication operator or on it's own: "SELECT *" */ \
\
M(HereDoc) \
\
M(DollarSign) \
M(Plus) \
M(Minus) \

View File

@ -231,6 +231,7 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
if (attach_mode && !ParserKeyword{"ATTACH"}.ignore(pos, expected))
return false;
bool is_replace = false;
bool is_revoke = false;
if (ParserKeyword{"REVOKE"}.ignore(pos, expected))
is_revoke = true;
@ -271,6 +272,9 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
grant_option = true;
else if (ParserKeyword{"WITH ADMIN OPTION"}.ignore(pos, expected))
admin_option = true;
if (ParserKeyword{"WITH REPLACE OPTION"}.ignore(pos, expected))
is_replace = true;
}
if (cluster.empty())
@ -287,6 +291,17 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
element.grant_option = true;
}
bool replace_access = false;
bool replace_role = false;
if (is_replace)
{
if (roles)
replace_role = true;
else
replace_access = true;
}
if (!is_revoke)
eraseNonGrantable(elements);
@ -300,6 +315,8 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
query->roles = std::move(roles);
query->grantees = std::move(grantees);
query->admin_option = admin_option;
query->replace_access = replace_access;
query->replace_granted_roles = replace_role;
return true;
}

View File

@ -72,12 +72,16 @@ public:
size_t getCurrentUnitNumber() const { return current_unit_number; }
void setCurrentUnitNumber(size_t current_unit_number_) { current_unit_number = current_unit_number_; }
void addBuffer(std::unique_ptr<ReadBuffer> buffer) { owned_buffers.emplace_back(std::move(buffer)); }
protected:
ColumnMappingPtr column_mapping{};
private:
/// Number of currently parsed chunk (if parallel parsing is enabled)
size_t current_unit_number = 0;
std::vector<std::unique_ptr<ReadBuffer>> owned_buffers;
};
}

View File

@ -394,7 +394,7 @@ public:
pushData({.chunk = std::move(chunk), .exception = {}});
}
void ALWAYS_INLINE push(std::exception_ptr exception)
void ALWAYS_INLINE pushException(std::exception_ptr exception)
{
pushData({.chunk = {}, .exception = std::move(exception)});
}

View File

@ -7,6 +7,9 @@
namespace DB
{
/** Reads all data into queue.
* After all data has been read - output it in the same order.
*/
class QueueBuffer : public IAccumulatingTransform
{
private:

View File

@ -0,0 +1,47 @@
#pragma once
#include <Processors/Sources/SourceWithProgress.h>
namespace DB
{
/** A stream of blocks from which you can read the next block from an explicitly provided list.
* Also see OneBlockInputStream.
*/
class BlocksListSource : public SourceWithProgress
{
public:
/// Acquires the ownership of the block list.
explicit BlocksListSource(BlocksList && list_)
: SourceWithProgress(list_.empty() ? Block() : list_.front().cloneEmpty())
, list(std::move(list_)), it(list.begin()), end(list.end()) {}
/// Uses a list of blocks lying somewhere else.
BlocksListSource(BlocksList::iterator & begin_, BlocksList::iterator & end_)
: SourceWithProgress(begin_ == end_ ? Block() : begin_->cloneEmpty())
, it(begin_), end(end_) {}
String getName() const override { return "BlocksListSource"; }
protected:
Chunk generate() override
{
if (it == end)
return {};
Block res = *it;
++it;
size_t num_rows = res.rows();
return Chunk(res.getColumns(), num_rows);
}
private:
BlocksList list;
BlocksList::iterator it;
const BlocksList::iterator end;
};
}

View File

@ -2,7 +2,8 @@
#include <Functions/FunctionHelpers.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/inplaceBlockConversions.h>
#include <DataStreams/AddingDefaultsBlockInputStream.h>
#include <Processors/Formats/IInputFormat.h>
#include <Processors/Transforms/AddingDefaultsTransform.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnsCommon.h>
@ -128,31 +129,32 @@ static MutableColumnPtr mixColumns(const ColumnWithTypeAndName & col_read,
}
AddingDefaultsBlockInputStream::AddingDefaultsBlockInputStream(
const BlockInputStreamPtr & input,
AddingDefaultsTransform::AddingDefaultsTransform(
const Block & header,
const ColumnsDescription & columns_,
IInputFormat & input_format_,
ContextPtr context_)
: columns(columns_)
: ISimpleTransform(header, header, true)
, columns(columns_)
, column_defaults(columns.getDefaults())
, input_format(input_format_)
, context(context_)
{
children.push_back(input);
header = input->getHeader();
}
Block AddingDefaultsBlockInputStream::readImpl()
void AddingDefaultsTransform::transform(Chunk & chunk)
{
Block res = children.back()->read();
if (!res)
return res;
if (column_defaults.empty())
return res;
return;
const BlockMissingValues & block_missing_values = children.back()->getMissingValues();
const BlockMissingValues & block_missing_values = input_format.getMissingValues();
if (block_missing_values.empty())
return res;
return;
const auto & header = getOutputPort().getHeader();
size_t num_rows = chunk.getNumRows();
auto res = header.cloneWithColumns(chunk.detachColumns());
/// res block already has all columns values, with default value for type
/// (not value specified in table). We identify which columns we need to
@ -170,7 +172,7 @@ Block AddingDefaultsBlockInputStream::readImpl()
}
if (!evaluate_block.columns())
evaluate_block.insert({ColumnConst::create(ColumnUInt8::create(1, 0), res.rows()), std::make_shared<DataTypeUInt8>(), "_dummy"});
evaluate_block.insert({ColumnConst::create(ColumnUInt8::create(1, 0), num_rows), std::make_shared<DataTypeUInt8>(), "_dummy"});
auto dag = evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), columns, context, false);
if (dag)
@ -224,7 +226,7 @@ Block AddingDefaultsBlockInputStream::readImpl()
res.setColumns(std::move(mutation));
}
return res;
chunk.setColumns(res.getColumns(), num_rows);
}
}

View File

@ -1,31 +1,33 @@
#pragma once
#include <DataStreams/IBlockInputStream.h>
#include <Processors/ISimpleTransform.h>
#include <Storages/ColumnsDescription.h>
namespace DB
{
class IInputFormat;
/// Adds defaults to columns using BlockDelayedDefaults bitmask attached to Block by child InputStream.
class AddingDefaultsBlockInputStream : public IBlockInputStream
class AddingDefaultsTransform : public ISimpleTransform
{
public:
AddingDefaultsBlockInputStream(
const BlockInputStreamPtr & input,
AddingDefaultsTransform(
const Block & header,
const ColumnsDescription & columns_,
IInputFormat & input_format_,
ContextPtr context_);
String getName() const override { return "AddingDefaults"; }
Block getHeader() const override { return header; }
String getName() const override { return "AddingDefaultsTransform"; }
protected:
Block readImpl() override;
void transform(Chunk & chunk) override;
private:
Block header;
const ColumnsDescription columns;
const ColumnDefaults column_defaults;
IInputFormat & input_format;
ContextPtr context;
};

View File

@ -1,4 +1,4 @@
#include <DataStreams/CheckSortedBlockInputStream.h>
#include <Processors/Transforms/CheckSortedTransform.h>
#include <Common/FieldVisitorDump.h>
#include <Common/quoteString.h>
#include <Core/SortDescription.h>
@ -12,20 +12,20 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
CheckSortedBlockInputStream::CheckSortedBlockInputStream(
const BlockInputStreamPtr & input_,
CheckSortedTransform::CheckSortedTransform(
const Block & header_,
const SortDescription & sort_description_)
: header(input_->getHeader())
: ISimpleTransform(header_, header_, false)
, sort_description_map(addPositionsToSortDescriptions(sort_description_))
{
children.push_back(input_);
}
SortDescriptionsWithPositions
CheckSortedBlockInputStream::addPositionsToSortDescriptions(const SortDescription & sort_description)
CheckSortedTransform::addPositionsToSortDescriptions(const SortDescription & sort_description)
{
SortDescriptionsWithPositions result;
result.reserve(sort_description.size());
const auto & header = getInputPort().getHeader();
for (SortColumnDescription description_copy : sort_description)
{
@ -39,11 +39,11 @@ CheckSortedBlockInputStream::addPositionsToSortDescriptions(const SortDescriptio
}
Block CheckSortedBlockInputStream::readImpl()
void CheckSortedTransform::transform(Chunk & chunk)
{
Block block = children.back()->read();
if (!block || block.rows() == 0)
return block;
size_t num_rows = chunk.getNumRows();
if (num_rows == 0)
return;
auto check = [this](const Columns & left, size_t left_index, const Columns & right, size_t right_index)
{
@ -70,23 +70,20 @@ Block CheckSortedBlockInputStream::readImpl()
}
};
auto block_columns = block.getColumns();
const auto & chunk_columns = chunk.getColumns();
if (!last_row.empty())
check(last_row, 0, block_columns, 0);
check(last_row, 0, chunk_columns, 0);
size_t rows = block.rows();
for (size_t i = 1; i < rows; ++i)
check(block_columns, i - 1, block_columns, i);
for (size_t i = 1; i < num_rows; ++i)
check(chunk_columns, i - 1, chunk_columns, i);
last_row.clear();
for (size_t i = 0; i < block.columns(); ++i)
for (const auto & chunk_column : chunk_columns)
{
auto column = block_columns[i]->cloneEmpty();
column->insertFrom(*block_columns[i], rows - 1);
auto column = chunk_column->cloneEmpty();
column->insertFrom(*chunk_column, num_rows - 1);
last_row.emplace_back(std::move(column));
}
return block;
}
}

View File

@ -1,5 +1,5 @@
#pragma once
#include <DataStreams/IBlockInputStream.h>
#include <Processors/ISimpleTransform.h>
#include <Core/SortDescription.h>
#include <Columns/IColumn.h>
@ -9,26 +9,23 @@ using SortDescriptionsWithPositions = std::vector<SortColumnDescription>;
/// Streams checks that flow of blocks is sorted in the sort_description order
/// Othrewise throws exception in readImpl function.
class CheckSortedBlockInputStream : public IBlockInputStream
class CheckSortedTransform : public ISimpleTransform
{
public:
CheckSortedBlockInputStream(
const BlockInputStreamPtr & input_,
CheckSortedTransform(
const Block & header_,
const SortDescription & sort_description_);
String getName() const override { return "CheckingSorted"; }
String getName() const override { return "CheckSortedTransform"; }
Block getHeader() const override { return header; }
protected:
Block readImpl() override;
void transform(Chunk & chunk) override;
private:
Block header;
SortDescriptionsWithPositions sort_description_map;
Columns last_row;
private:
/// Just checks, that all sort_descriptions has column_number
SortDescriptionsWithPositions addPositionsToSortDescriptions(const SortDescription & sort_description);
};

View File

@ -197,6 +197,16 @@ WindowTransform::WindowTransform(const Block & input_header_,
, input_header(input_header_)
, window_description(window_description_)
{
// Materialize all columns in header, because we materialize all columns
// in chunks and it's convenient if they match.
auto input_columns = input_header.getColumns();
for (auto & column : input_columns)
{
column = std::move(column)->convertToFullColumnIfConst();
}
input_header.setColumns(std::move(input_columns));
// Initialize window function workspaces.
workspaces.reserve(functions.size());
for (const auto & f : functions)
{
@ -851,6 +861,8 @@ void WindowTransform::updateAggregationState()
assert(prev_frame_start <= prev_frame_end);
assert(prev_frame_start <= frame_start);
assert(prev_frame_end <= frame_end);
assert(partition_start <= frame_start);
assert(frame_end <= partition_end);
// We might have to reset aggregation state and/or add some rows to it.
// Figure out what to do.
@ -1044,13 +1056,10 @@ void WindowTransform::appendChunk(Chunk & chunk)
block.output_columns.back()->reserve(block.rows);
}
// As a debugging aid, assert that chunk have the same C++ type of
// columns, because we often have to work across chunks.
if (blocks.size() > 1)
{
assertSameColumns(blocks.front().input_columns,
blocks.back().input_columns);
}
// As a debugging aid, assert that all chunks have the same C++ type of
// columns, that also matches the input header, because we often have to
// work across chunks.
assertSameColumns(input_header.getColumns(), block.input_columns);
}
// Start the calculations. First, advance the partition end.

View File

@ -1,13 +1,16 @@
#include <Parsers/ASTInsertQuery.h>
#include <Interpreters/Context.h>
#include <Interpreters/InterpreterSetQuery.h>
#include <Formats/FormatFactory.h>
#include <IO/ConcatReadBuffer.h>
#include <IO/ReadBufferFromMemory.h>
#include <DataStreams/BlockIO.h>
#include <DataStreams/InputStreamFromASTInsertQuery.h>
#include <DataStreams/AddingDefaultsBlockInputStream.h>
#include <Processors/Transforms/getSourceFromFromASTInsertQuery.h>
#include <Processors/Transforms/AddingDefaultsTransform.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/IStorage.h>
#include <Processors/Pipe.h>
#include <Processors/Formats/IInputFormat.h>
namespace DB
@ -20,7 +23,7 @@ namespace ErrorCodes
}
InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery(
Pipe getSourceFromFromASTInsertQuery(
const ASTPtr & ast,
ReadBuffer * input_buffer_tail_part,
const Block & header,
@ -42,7 +45,7 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery(
/// Data could be in parsed (ast_insert_query.data) and in not parsed yet (input_buffer_tail_part) part of query.
input_buffer_ast_part = std::make_unique<ReadBufferFromMemory>(
auto input_buffer_ast_part = std::make_unique<ReadBufferFromMemory>(
ast_insert_query->data, ast_insert_query->data ? ast_insert_query->end - ast_insert_query->data : 0);
ConcatReadBuffer::ReadBuffers buffers;
@ -56,9 +59,10 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery(
* - because 'query.data' could refer to memory piece, used as buffer for 'input_buffer_tail_part'.
*/
input_buffer_contacenated = std::make_unique<ConcatReadBuffer>(buffers);
auto input_buffer_contacenated = std::make_unique<ConcatReadBuffer>(buffers);
res_stream = context->getInputFormat(format, *input_buffer_contacenated, header, context->getSettings().max_insert_block_size);
auto source = FormatFactory::instance().getInput(format, *input_buffer_contacenated, header, context, context->getSettings().max_insert_block_size);
Pipe pipe(source);
if (context->getSettingsRef().input_format_defaults_for_omitted_fields && ast_insert_query->table_id && !input_function)
{
@ -66,8 +70,18 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery(
auto metadata_snapshot = storage->getInMemoryMetadataPtr();
const auto & columns = metadata_snapshot->getColumns();
if (columns.hasDefaults())
res_stream = std::make_shared<AddingDefaultsBlockInputStream>(res_stream, columns, context);
{
pipe.addSimpleTransform([&](const Block & cur_header)
{
return std::make_shared<AddingDefaultsTransform>(cur_header, columns, *source, context);
});
}
}
source->addBuffer(std::move(input_buffer_ast_part));
source->addBuffer(std::move(input_buffer_contacenated));
return pipe;
}
}

View File

@ -0,0 +1,26 @@
#pragma once
#include <Parsers/IAST.h>
#include <Interpreters/Context_fwd.h>
#include <cstddef>
#include <memory>
namespace DB
{
/** Prepares a pipe which produce data containing in INSERT query
* Head of inserting data could be stored in INSERT ast directly
* Remaining (tail) data could be stored in input_buffer_tail_part
*/
class Pipe;
Pipe getSourceFromFromASTInsertQuery(
const ASTPtr & ast,
ReadBuffer * input_buffer_tail_part,
const Block & header,
ContextPtr context,
const ASTPtr & input_function);
}

View File

@ -139,10 +139,12 @@ SRCS(
Sources/SinkToOutputStream.cpp
Sources/SourceFromInputStream.cpp
Sources/SourceWithProgress.cpp
Transforms/AddingDefaultsTransform.cpp
Transforms/AddingSelectorTransform.cpp
Transforms/AggregatingInOrderTransform.cpp
Transforms/AggregatingTransform.cpp
Transforms/ArrayJoinTransform.cpp
Transforms/CheckSortedTransform.cpp
Transforms/CopyTransform.cpp
Transforms/CreatingSetsTransform.cpp
Transforms/CubeTransform.cpp
@ -165,6 +167,7 @@ SRCS(
Transforms/SortingTransform.cpp
Transforms/TotalsHavingTransform.cpp
Transforms/WindowTransform.cpp
Transforms/getSourceFromFromASTInsertQuery.cpp
printPipeline.cpp
)

View File

@ -5,7 +5,7 @@
#include <Columns/ColumnsNumber.h>
#include <Common/CurrentThread.h>
#include <Common/SettingsChanges.h>
#include <DataStreams/AddingDefaultsBlockInputStream.h>
#include <Processors/Transforms/AddingDefaultsTransform.h>
#include <DataStreams/AsynchronousBlockInputStream.h>
#include <DataTypes/DataTypeFactory.h>
#include <Interpreters/Context.h>
@ -20,6 +20,10 @@
#include <Parsers/ASTQueryWithOutput.h>
#include <Parsers/ParserQuery.h>
#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/Formats/IInputFormat.h>
#include <Processors/QueryPipeline.h>
#include <Formats/FormatFactory.h>
#include <Server/IServer.h>
#include <Storages/IStorage.h>
#include <Poco/FileStream.h>
@ -547,7 +551,8 @@ namespace
std::optional<ReadBufferFromCallback> read_buffer;
std::optional<WriteBufferFromString> write_buffer;
BlockInputStreamPtr block_input_stream;
std::unique_ptr<QueryPipeline> pipeline;
std::unique_ptr<PullingPipelineExecutor> pipeline_executor;
BlockOutputStreamPtr block_output_stream;
bool need_input_data_from_insert_query = true;
bool need_input_data_from_query_info = true;
@ -755,16 +760,16 @@ namespace
throw Exception("Unexpected context in Input initializer", ErrorCodes::LOGICAL_ERROR);
input_function_is_used = true;
initializeBlockInputStream(input_storage->getInMemoryMetadataPtr()->getSampleBlock());
block_input_stream->readPrefix();
});
query_context->setInputBlocksReaderCallback([this](ContextPtr context) -> Block
{
if (context != query_context)
throw Exception("Unexpected context in InputBlocksReader", ErrorCodes::LOGICAL_ERROR);
auto block = block_input_stream->read();
if (!block)
block_input_stream->readSuffix();
Block block;
while (!block && pipeline_executor->pull(block));
return block;
});
@ -797,13 +802,15 @@ namespace
/// So we mustn't touch the input stream from other thread.
initializeBlockInputStream(io.out->getHeader());
block_input_stream->readPrefix();
io.out->writePrefix();
while (auto block = block_input_stream->read())
io.out->write(block);
Block block;
while (pipeline_executor->pull(block))
{
if (block)
io.out->write(block);
}
block_input_stream->readSuffix();
io.out->writeSuffix();
}
@ -866,9 +873,11 @@ namespace
return {nullptr, 0}; /// no more input data
});
assert(!block_input_stream);
block_input_stream = query_context->getInputFormat(
input_format, *read_buffer, header, query_context->getSettings().max_insert_block_size);
assert(!pipeline);
pipeline = std::make_unique<QueryPipeline>();
auto source = FormatFactory::instance().getInput(
input_format, *read_buffer, header, query_context, query_context->getSettings().max_insert_block_size);
pipeline->init(Pipe(source));
/// Add default values if necessary.
if (ast)
@ -881,10 +890,17 @@ namespace
StoragePtr storage = DatabaseCatalog::instance().getTable(table_id, query_context);
const auto & columns = storage->getInMemoryMetadataPtr()->getColumns();
if (!columns.empty())
block_input_stream = std::make_shared<AddingDefaultsBlockInputStream>(block_input_stream, columns, query_context);
{
pipeline->addSimpleTransform([&](const Block & cur_header)
{
return std::make_shared<AddingDefaultsTransform>(cur_header, columns, *source, query_context);
});
}
}
}
}
pipeline_executor = std::make_unique<PullingPipelineExecutor>(*pipeline);
}
void Call::createExternalTables()
@ -1196,7 +1212,8 @@ namespace
void Call::close()
{
responder.reset();
block_input_stream.reset();
pipeline_executor.reset();
pipeline.reset();
block_output_stream.reset();
read_buffer.reset();
write_buffer.reset();

View File

@ -506,12 +506,15 @@ bool ColumnsDescription::hasColumnOrSubcolumn(GetFlags flags, const String & col
void ColumnsDescription::addSubcolumnsToList(NamesAndTypesList & source_list) const
{
NamesAndTypesList subcolumns_list;
for (const auto & col : source_list)
{
auto range = subcolumns.get<1>().equal_range(col.name);
if (range.first != range.second)
source_list.insert(source_list.end(), range.first, range.second);
subcolumns_list.insert(subcolumns_list.end(), range.first, range.second);
}
source_list.splice(source_list.end(), std::move(subcolumns_list));
}
NamesAndTypesList ColumnsDescription::getAllWithSubcolumns() const

View File

@ -2,6 +2,7 @@
#include <DataStreams/NativeBlockInputStream.h>
#include <DataStreams/ConvertingBlockInputStream.h>
#include <DataStreams/OneBlockInputStream.h>
#include <Processors/Sources/SourceWithProgress.h>
#include <Common/escapeForFileName.h>
#include <Common/CurrentMetrics.h>
#include <Common/StringUtils/StringUtils.h>
@ -902,50 +903,78 @@ private:
}
};
class DirectoryMonitorBlockInputStream : public IBlockInputStream
class DirectoryMonitorSource : public SourceWithProgress
{
public:
explicit DirectoryMonitorBlockInputStream(const String & file_name)
: in(file_name)
, decompressing_in(in)
, block_in(decompressing_in, DBMS_TCP_PROTOCOL_VERSION)
, log{&Poco::Logger::get("DirectoryMonitorBlockInputStream")}
{
readDistributedHeader(in, log);
block_in.readPrefix();
first_block = block_in.read();
header = first_block.cloneEmpty();
struct Data
{
std::unique_ptr<ReadBufferFromFile> in;
std::unique_ptr<CompressedReadBuffer> decompressing_in;
std::unique_ptr<NativeBlockInputStream> block_in;
Poco::Logger * log = nullptr;
Block first_block;
explicit Data(const String & file_name)
{
in = std::make_unique<ReadBufferFromFile>(file_name);
decompressing_in = std::make_unique<CompressedReadBuffer>(*in);
block_in = std::make_unique<NativeBlockInputStream>(*decompressing_in, DBMS_TCP_PROTOCOL_VERSION);
log = &Poco::Logger::get("DirectoryMonitorSource");
readDistributedHeader(*in, log);
block_in->readPrefix();
first_block = block_in->read();
}
Data(Data &&) = default;
};
explicit DirectoryMonitorSource(const String & file_name)
: DirectoryMonitorSource(Data(file_name))
{
}
String getName() const override { return "DirectoryMonitor"; }
explicit DirectoryMonitorSource(Data data_)
: SourceWithProgress(data_.first_block.cloneEmpty())
, data(std::move(data_))
{
}
String getName() const override { return "DirectoryMonitorSource"; }
protected:
Block getHeader() const override { return header; }
Block readImpl() override
Chunk generate() override
{
if (first_block)
return std::move(first_block);
if (data.first_block)
{
size_t num_rows = data.first_block.rows();
Chunk res(data.first_block.getColumns(), num_rows);
data.first_block.clear();
return res;
}
return block_in.read();
auto block = data.block_in->read();
if (!block)
{
data.block_in->readSuffix();
return {};
}
size_t num_rows = block.rows();
return Chunk(block.getColumns(), num_rows);
}
void readSuffix() override { block_in.readSuffix(); }
private:
ReadBufferFromFile in;
CompressedReadBuffer decompressing_in;
NativeBlockInputStream block_in;
Block first_block;
Block header;
Poco::Logger * log;
Data data;
};
BlockInputStreamPtr StorageDistributedDirectoryMonitor::createStreamFromFile(const String & file_name)
ProcessorPtr StorageDistributedDirectoryMonitor::createSourceFromFile(const String & file_name)
{
return std::make_shared<DirectoryMonitorBlockInputStream>(file_name);
return std::make_shared<DirectoryMonitorSource>(file_name);
}
bool StorageDistributedDirectoryMonitor::addAndSchedule(size_t file_size, size_t ms)

Some files were not shown because too many files have changed in this diff Show More