mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 00:30:49 +00:00
Merge remote-tracking branch 'origin/master' into distinct-in-order-sqlancer-crashes
This commit is contained in:
commit
1ead0d7dac
@ -67,6 +67,8 @@ public:
|
||||
|
||||
Message(
|
||||
const std::string & source, const std::string & text, Priority prio, const char * file, int line, std::string_view fmt_str = {});
|
||||
Message(
|
||||
std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str);
|
||||
/// Creates a Message with the given source, text, priority,
|
||||
/// source file path and line.
|
||||
///
|
||||
|
@ -60,6 +60,19 @@ Message::Message(const std::string& source, const std::string& text, Priority pr
|
||||
}
|
||||
|
||||
|
||||
Message::Message(std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str):
|
||||
_source(std::move(source)),
|
||||
_text(std::move(text)),
|
||||
_prio(prio),
|
||||
_tid(0),
|
||||
_file(file),
|
||||
_line(line),
|
||||
_pMap(0),
|
||||
_fmt_str(fmt_str)
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
Message::Message(const Message& msg):
|
||||
_source(msg._source),
|
||||
_text(msg._text),
|
||||
|
@ -14,6 +14,7 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
|
||||
|
||||
# Stress tests and upgrade check uses similar code that was placed
|
||||
# in a separate bash library. See tests/ci/stress_tests.lib
|
||||
source /usr/share/clickhouse-test/ci/attach_gdb.lib
|
||||
source /usr/share/clickhouse-test/ci/stress_tests.lib
|
||||
|
||||
install_packages package_folder
|
||||
@ -52,7 +53,7 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
|
||||
|
||||
start
|
||||
|
||||
shellcheck disable=SC2086 # No quotes because I want to split it into words.
|
||||
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
|
||||
/s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS
|
||||
chmod 777 -R /var/lib/clickhouse
|
||||
clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary"
|
||||
|
@ -16,6 +16,7 @@ ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_pre
|
||||
|
||||
# Stress tests and upgrade check uses similar code that was placed
|
||||
# in a separate bash library. See tests/ci/stress_tests.lib
|
||||
source /usr/share/clickhouse-test/ci/attach_gdb.lib
|
||||
source /usr/share/clickhouse-test/ci/stress_tests.lib
|
||||
|
||||
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
|
||||
@ -61,6 +62,7 @@ configure
|
||||
|
||||
# it contains some new settings, but we can safely remove it
|
||||
rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
|
||||
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
|
||||
|
||||
start
|
||||
@ -90,6 +92,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
|
||||
|
||||
# it contains some new settings, but we can safely remove it
|
||||
rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
|
||||
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
|
||||
|
||||
start
|
||||
|
@ -65,6 +65,40 @@ XML substitution example:
|
||||
|
||||
Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element.
|
||||
|
||||
## Encrypting Configuration {#encryption}
|
||||
|
||||
You can use symmetric encryption to encrypt a configuration element, for example, a password field. To do so, first configure the [encryption codec](../sql-reference/statements/create/table.md#encryption-codecs), then add attribute `encryption_codec` with the name of the encryption codec as value to the element to encrypt.
|
||||
|
||||
Unlike attributes `from_zk`, `from_env` and `incl` (or element `include`), no substitution, i.e. decryption of the encrypted value, is performed in the preprocessed file. Decryption happens only at runtime in the server process.
|
||||
|
||||
Example:
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<encryption_codecs>
|
||||
<aes_128_gcm_siv>
|
||||
<key_hex>00112233445566778899aabbccddeeff</key_hex>
|
||||
</aes_128_gcm_siv>
|
||||
</encryption_codecs>
|
||||
<interserver_http_credentials>
|
||||
<user>admin</user>
|
||||
<password encryption_codec="AES_128_GCM_SIV">961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85</password>
|
||||
</interserver_http_credentials>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
To get the encrypted value `encrypt_decrypt` example application may be used.
|
||||
|
||||
Example:
|
||||
|
||||
``` bash
|
||||
./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd
|
||||
```
|
||||
|
||||
``` text
|
||||
961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85
|
||||
```
|
||||
|
||||
## User Settings {#user-settings}
|
||||
|
||||
The `config.xml` file can specify a separate config with user settings, profiles, and quotas. The relative path to this config is set in the `users_config` element. By default, it is `users.xml`. If `users_config` is omitted, the user settings, profiles, and quotas are specified directly in `config.xml`.
|
||||
|
@ -213,7 +213,7 @@ Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC
|
||||
Syntax:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_name MODIFY column_name REMOVE property;
|
||||
ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property;
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
@ -414,3 +414,29 @@ Will do sync syscall.
|
||||
```sql
|
||||
SYSTEM SYNC FILE CACHE [ON CLUSTER cluster_name]
|
||||
```
|
||||
|
||||
|
||||
### SYSTEM STOP LISTEN
|
||||
|
||||
Closes the socket and gracefully terminates the existing connections to the server on the specified port with the specified protocol.
|
||||
|
||||
However, if the corresponding protocol settings were not specified in the clickhouse-server configuration, this command will have no effect.
|
||||
|
||||
```sql
|
||||
SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol']
|
||||
```
|
||||
|
||||
- If `CUSTOM 'protocol'` modifier is specified, the custom protocol with the specified name defined in the protocols section of the server configuration will be stopped.
|
||||
- If `QUERIES ALL` modifier is specified, all protocols are stopped.
|
||||
- If `QUERIES DEFAULT` modifier is specified, all default protocols are stopped.
|
||||
- If `QUERIES CUSTOM` modifier is specified, all custom protocols are stopped.
|
||||
|
||||
### SYSTEM START LISTEN
|
||||
|
||||
Allows new connections to be established on the specified protocols.
|
||||
|
||||
However, if the server on the specified port and protocol was not stopped using the SYSTEM STOP LISTEN command, this command will have no effect.
|
||||
|
||||
```sql
|
||||
SYSTEM START LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol']
|
||||
```
|
||||
|
@ -85,6 +85,40 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
|
||||
|
||||
Сервер следит за изменениями конфигурационных файлов, а также файлов и ZooKeeper-узлов, которые были использованы при выполнении подстановок и переопределений, и перезагружает настройки пользователей и кластеров на лету. То есть, можно изменять кластера, пользователей и их настройки без перезапуска сервера.
|
||||
|
||||
## Шифрование {#encryption}
|
||||
|
||||
Вы можете использовать симметричное шифрование для зашифровки элемента конфигурации, например, поля password. Чтобы это сделать, сначала настройте [кодек шифрования](../sql-reference/statements/create/table.md#encryption-codecs), затем добавьте аттибут`encryption_codec` с именем кодека шифрования как значение к элементу, который надо зашифровать.
|
||||
|
||||
В отличии от аттрибутов `from_zk`, `from_env` и `incl` (или элемента `include`), подстановка, т.е. расшифровка зашифрованного значения, не выподняется в файле предобработки. Расшифровка происходит только во время исполнения в серверном процессе.
|
||||
|
||||
Пример:
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<encryption_codecs>
|
||||
<aes_128_gcm_siv>
|
||||
<key_hex>00112233445566778899aabbccddeeff</key_hex>
|
||||
</aes_128_gcm_siv>
|
||||
</encryption_codecs>
|
||||
<interserver_http_credentials>
|
||||
<user>admin</user>
|
||||
<password encryption_codec="AES_128_GCM_SIV">961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85</password>
|
||||
</interserver_http_credentials>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
Чтобы получить зашифрованное значение может быть использовано приложение-пример `encrypt_decrypt` .
|
||||
|
||||
Пример:
|
||||
|
||||
``` bash
|
||||
./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd
|
||||
```
|
||||
|
||||
``` text
|
||||
961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85
|
||||
```
|
||||
|
||||
## Примеры записи конфигурации на YAML {#example}
|
||||
|
||||
Здесь можно рассмотреть пример реальной конфигурации записанной на YAML: [config.yaml.example](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.yaml.example).
|
||||
|
@ -182,7 +182,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)
|
||||
Синтаксис:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_name MODIFY column_name REMOVE property;
|
||||
ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property;
|
||||
```
|
||||
|
||||
**Пример**
|
||||
|
@ -65,6 +65,7 @@ if (BUILD_STANDALONE_KEEPER)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusRequestHandler.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusMetricsWriter.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ServerType.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp
|
||||
@ -80,6 +81,7 @@ if (BUILD_STANDALONE_KEEPER)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp
|
||||
|
@ -747,6 +747,7 @@ try
|
||||
|
||||
std::lock_guard lock(servers_lock);
|
||||
metrics.reserve(servers_to_start_before_tables.size() + servers.size());
|
||||
|
||||
for (const auto & server : servers_to_start_before_tables)
|
||||
metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()});
|
||||
|
||||
@ -1456,6 +1457,24 @@ try
|
||||
access_control.reload(AccessControl::ReloadMode::USERS_CONFIG_ONLY);
|
||||
});
|
||||
|
||||
global_context->setStopServersCallback([&](const ServerType & server_type)
|
||||
{
|
||||
stopServers(servers, server_type);
|
||||
});
|
||||
|
||||
global_context->setStartServersCallback([&](const ServerType & server_type)
|
||||
{
|
||||
createServers(
|
||||
config(),
|
||||
listen_hosts,
|
||||
listen_try,
|
||||
server_pool,
|
||||
async_metrics,
|
||||
servers,
|
||||
/* start_servers= */ true,
|
||||
server_type);
|
||||
});
|
||||
|
||||
/// Limit on total number of concurrently executed queries.
|
||||
global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries);
|
||||
|
||||
@ -1997,7 +2016,8 @@ void Server::createServers(
|
||||
Poco::ThreadPool & server_pool,
|
||||
AsynchronousMetrics & async_metrics,
|
||||
std::vector<ProtocolServerAdapter> & servers,
|
||||
bool start_servers)
|
||||
bool start_servers,
|
||||
const ServerType & server_type)
|
||||
{
|
||||
const Settings & settings = global_context->getSettingsRef();
|
||||
|
||||
@ -2011,6 +2031,9 @@ void Server::createServers(
|
||||
|
||||
for (const auto & protocol : protocols)
|
||||
{
|
||||
if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol))
|
||||
continue;
|
||||
|
||||
std::vector<std::string> hosts;
|
||||
if (config.has("protocols." + protocol + ".host"))
|
||||
hosts.push_back(config.getString("protocols." + protocol + ".host"));
|
||||
@ -2057,162 +2080,190 @@ void Server::createServers(
|
||||
|
||||
for (const auto & listen_host : listen_hosts)
|
||||
{
|
||||
/// HTTP
|
||||
const char * port_name = "http_port";
|
||||
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
{
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, listen_host, port);
|
||||
socket.setReceiveTimeout(settings.http_receive_timeout);
|
||||
socket.setSendTimeout(settings.http_send_timeout);
|
||||
const char * port_name;
|
||||
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"http://" + address.toString(),
|
||||
std::make_unique<HTTPServer>(
|
||||
httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params));
|
||||
});
|
||||
|
||||
/// HTTPS
|
||||
port_name = "https_port";
|
||||
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
if (server_type.shouldStart(ServerType::Type::HTTP))
|
||||
{
|
||||
/// HTTP
|
||||
port_name = "http_port";
|
||||
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
{
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, listen_host, port);
|
||||
socket.setReceiveTimeout(settings.http_receive_timeout);
|
||||
socket.setSendTimeout(settings.http_send_timeout);
|
||||
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"http://" + address.toString(),
|
||||
std::make_unique<HTTPServer>(
|
||||
httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params));
|
||||
});
|
||||
}
|
||||
|
||||
if (server_type.shouldStart(ServerType::Type::HTTPS))
|
||||
{
|
||||
/// HTTPS
|
||||
port_name = "https_port";
|
||||
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
{
|
||||
#if USE_SSL
|
||||
Poco::Net::SecureServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
|
||||
socket.setReceiveTimeout(settings.http_receive_timeout);
|
||||
socket.setSendTimeout(settings.http_send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"https://" + address.toString(),
|
||||
std::make_unique<HTTPServer>(
|
||||
httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params));
|
||||
Poco::Net::SecureServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
|
||||
socket.setReceiveTimeout(settings.http_receive_timeout);
|
||||
socket.setSendTimeout(settings.http_send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"https://" + address.toString(),
|
||||
std::make_unique<HTTPServer>(
|
||||
httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params));
|
||||
#else
|
||||
UNUSED(port);
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support.");
|
||||
UNUSED(port);
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support.");
|
||||
#endif
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/// TCP
|
||||
port_name = "tcp_port";
|
||||
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
if (server_type.shouldStart(ServerType::Type::TCP))
|
||||
{
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, listen_host, port);
|
||||
socket.setReceiveTimeout(settings.receive_timeout);
|
||||
socket.setSendTimeout(settings.send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"native protocol (tcp): " + address.toString(),
|
||||
std::make_unique<TCPServer>(
|
||||
new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false),
|
||||
server_pool,
|
||||
socket,
|
||||
new Poco::Net::TCPServerParams));
|
||||
});
|
||||
/// TCP
|
||||
port_name = "tcp_port";
|
||||
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
{
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, listen_host, port);
|
||||
socket.setReceiveTimeout(settings.receive_timeout);
|
||||
socket.setSendTimeout(settings.send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"native protocol (tcp): " + address.toString(),
|
||||
std::make_unique<TCPServer>(
|
||||
new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false),
|
||||
server_pool,
|
||||
socket,
|
||||
new Poco::Net::TCPServerParams));
|
||||
});
|
||||
}
|
||||
|
||||
/// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt
|
||||
port_name = "tcp_with_proxy_port";
|
||||
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
if (server_type.shouldStart(ServerType::Type::TCP_WITH_PROXY))
|
||||
{
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, listen_host, port);
|
||||
socket.setReceiveTimeout(settings.receive_timeout);
|
||||
socket.setSendTimeout(settings.send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"native protocol (tcp) with PROXY: " + address.toString(),
|
||||
std::make_unique<TCPServer>(
|
||||
new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true),
|
||||
server_pool,
|
||||
socket,
|
||||
new Poco::Net::TCPServerParams));
|
||||
});
|
||||
/// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt
|
||||
port_name = "tcp_with_proxy_port";
|
||||
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
{
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, listen_host, port);
|
||||
socket.setReceiveTimeout(settings.receive_timeout);
|
||||
socket.setSendTimeout(settings.send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"native protocol (tcp) with PROXY: " + address.toString(),
|
||||
std::make_unique<TCPServer>(
|
||||
new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true),
|
||||
server_pool,
|
||||
socket,
|
||||
new Poco::Net::TCPServerParams));
|
||||
});
|
||||
}
|
||||
|
||||
/// TCP with SSL
|
||||
port_name = "tcp_port_secure";
|
||||
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
if (server_type.shouldStart(ServerType::Type::TCP_SECURE))
|
||||
{
|
||||
#if USE_SSL
|
||||
Poco::Net::SecureServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
|
||||
socket.setReceiveTimeout(settings.receive_timeout);
|
||||
socket.setSendTimeout(settings.send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"secure native protocol (tcp_secure): " + address.toString(),
|
||||
std::make_unique<TCPServer>(
|
||||
new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false),
|
||||
server_pool,
|
||||
socket,
|
||||
new Poco::Net::TCPServerParams));
|
||||
#else
|
||||
UNUSED(port);
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
|
||||
#endif
|
||||
});
|
||||
/// TCP with SSL
|
||||
port_name = "tcp_port_secure";
|
||||
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
{
|
||||
#if USE_SSL
|
||||
Poco::Net::SecureServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
|
||||
socket.setReceiveTimeout(settings.receive_timeout);
|
||||
socket.setSendTimeout(settings.send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"secure native protocol (tcp_secure): " + address.toString(),
|
||||
std::make_unique<TCPServer>(
|
||||
new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false),
|
||||
server_pool,
|
||||
socket,
|
||||
new Poco::Net::TCPServerParams));
|
||||
#else
|
||||
UNUSED(port);
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
|
||||
#endif
|
||||
});
|
||||
}
|
||||
|
||||
port_name = "mysql_port";
|
||||
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
if (server_type.shouldStart(ServerType::Type::MYSQL))
|
||||
{
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
|
||||
socket.setReceiveTimeout(Poco::Timespan());
|
||||
socket.setSendTimeout(settings.send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"MySQL compatibility protocol: " + address.toString(),
|
||||
std::make_unique<TCPServer>(new MySQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
|
||||
});
|
||||
port_name = "mysql_port";
|
||||
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
{
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
|
||||
socket.setReceiveTimeout(Poco::Timespan());
|
||||
socket.setSendTimeout(settings.send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"MySQL compatibility protocol: " + address.toString(),
|
||||
std::make_unique<TCPServer>(new MySQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
|
||||
});
|
||||
}
|
||||
|
||||
port_name = "postgresql_port";
|
||||
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
if (server_type.shouldStart(ServerType::Type::POSTGRESQL))
|
||||
{
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
|
||||
socket.setReceiveTimeout(Poco::Timespan());
|
||||
socket.setSendTimeout(settings.send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"PostgreSQL compatibility protocol: " + address.toString(),
|
||||
std::make_unique<TCPServer>(new PostgreSQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
|
||||
});
|
||||
port_name = "postgresql_port";
|
||||
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
{
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
|
||||
socket.setReceiveTimeout(Poco::Timespan());
|
||||
socket.setSendTimeout(settings.send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"PostgreSQL compatibility protocol: " + address.toString(),
|
||||
std::make_unique<TCPServer>(new PostgreSQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
|
||||
});
|
||||
}
|
||||
|
||||
#if USE_GRPC
|
||||
port_name = "grpc_port";
|
||||
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
if (server_type.shouldStart(ServerType::Type::GRPC))
|
||||
{
|
||||
Poco::Net::SocketAddress server_address(listen_host, port);
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"gRPC protocol: " + server_address.toString(),
|
||||
std::make_unique<GRPCServer>(*this, makeSocketAddress(listen_host, port, &logger())));
|
||||
});
|
||||
port_name = "grpc_port";
|
||||
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
{
|
||||
Poco::Net::SocketAddress server_address(listen_host, port);
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"gRPC protocol: " + server_address.toString(),
|
||||
std::make_unique<GRPCServer>(*this, makeSocketAddress(listen_host, port, &logger())));
|
||||
});
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Prometheus (if defined and not setup yet with http_port)
|
||||
port_name = "prometheus.port";
|
||||
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
if (server_type.shouldStart(ServerType::Type::PROMETHEUS))
|
||||
{
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, listen_host, port);
|
||||
socket.setReceiveTimeout(settings.http_receive_timeout);
|
||||
socket.setSendTimeout(settings.http_send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"Prometheus: http://" + address.toString(),
|
||||
std::make_unique<HTTPServer>(
|
||||
httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params));
|
||||
});
|
||||
/// Prometheus (if defined and not setup yet with http_port)
|
||||
port_name = "prometheus.port";
|
||||
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
{
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, listen_host, port);
|
||||
socket.setReceiveTimeout(settings.http_receive_timeout);
|
||||
socket.setSendTimeout(settings.http_send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"Prometheus: http://" + address.toString(),
|
||||
std::make_unique<HTTPServer>(
|
||||
httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params));
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -2223,7 +2274,8 @@ void Server::createInterserverServers(
|
||||
Poco::ThreadPool & server_pool,
|
||||
AsynchronousMetrics & async_metrics,
|
||||
std::vector<ProtocolServerAdapter> & servers,
|
||||
bool start_servers)
|
||||
bool start_servers,
|
||||
const ServerType & server_type)
|
||||
{
|
||||
const Settings & settings = global_context->getSettingsRef();
|
||||
|
||||
@ -2235,52 +2287,97 @@ void Server::createInterserverServers(
|
||||
/// Now iterate over interserver_listen_hosts
|
||||
for (const auto & interserver_listen_host : interserver_listen_hosts)
|
||||
{
|
||||
/// Interserver IO HTTP
|
||||
const char * port_name = "interserver_http_port";
|
||||
createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
{
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, interserver_listen_host, port);
|
||||
socket.setReceiveTimeout(settings.http_receive_timeout);
|
||||
socket.setSendTimeout(settings.http_send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
interserver_listen_host,
|
||||
port_name,
|
||||
"replica communication (interserver): http://" + address.toString(),
|
||||
std::make_unique<HTTPServer>(
|
||||
httpContext(),
|
||||
createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"),
|
||||
server_pool,
|
||||
socket,
|
||||
http_params));
|
||||
});
|
||||
const char * port_name;
|
||||
|
||||
port_name = "interserver_https_port";
|
||||
createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTP))
|
||||
{
|
||||
/// Interserver IO HTTP
|
||||
port_name = "interserver_http_port";
|
||||
createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
{
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, interserver_listen_host, port);
|
||||
socket.setReceiveTimeout(settings.http_receive_timeout);
|
||||
socket.setSendTimeout(settings.http_send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
interserver_listen_host,
|
||||
port_name,
|
||||
"replica communication (interserver): http://" + address.toString(),
|
||||
std::make_unique<HTTPServer>(
|
||||
httpContext(),
|
||||
createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"),
|
||||
server_pool,
|
||||
socket,
|
||||
http_params));
|
||||
});
|
||||
}
|
||||
|
||||
if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTPS))
|
||||
{
|
||||
port_name = "interserver_https_port";
|
||||
createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
|
||||
{
|
||||
#if USE_SSL
|
||||
Poco::Net::SecureServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, interserver_listen_host, port, /* secure = */ true);
|
||||
socket.setReceiveTimeout(settings.http_receive_timeout);
|
||||
socket.setSendTimeout(settings.http_send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
interserver_listen_host,
|
||||
port_name,
|
||||
"secure replica communication (interserver): https://" + address.toString(),
|
||||
std::make_unique<HTTPServer>(
|
||||
httpContext(),
|
||||
createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"),
|
||||
server_pool,
|
||||
socket,
|
||||
http_params));
|
||||
Poco::Net::SecureServerSocket socket;
|
||||
auto address = socketBindListen(config, socket, interserver_listen_host, port, /* secure = */ true);
|
||||
socket.setReceiveTimeout(settings.http_receive_timeout);
|
||||
socket.setSendTimeout(settings.http_send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
interserver_listen_host,
|
||||
port_name,
|
||||
"secure replica communication (interserver): https://" + address.toString(),
|
||||
std::make_unique<HTTPServer>(
|
||||
httpContext(),
|
||||
createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"),
|
||||
server_pool,
|
||||
socket,
|
||||
http_params));
|
||||
#else
|
||||
UNUSED(port);
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
|
||||
UNUSED(port);
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
|
||||
#endif
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Server::stopServers(
|
||||
std::vector<ProtocolServerAdapter> & servers,
|
||||
const ServerType & server_type
|
||||
) const
|
||||
{
|
||||
Poco::Logger * log = &logger();
|
||||
|
||||
/// Remove servers once all their connections are closed
|
||||
auto check_server = [&log](const char prefix[], auto & server)
|
||||
{
|
||||
if (!server.isStopping())
|
||||
return false;
|
||||
size_t current_connections = server.currentConnections();
|
||||
LOG_DEBUG(log, "Server {}{}: {} ({} connections)",
|
||||
server.getDescription(),
|
||||
prefix,
|
||||
!current_connections ? "finished" : "waiting",
|
||||
current_connections);
|
||||
return !current_connections;
|
||||
};
|
||||
|
||||
std::erase_if(servers, std::bind_front(check_server, " (from one of previous remove)"));
|
||||
|
||||
for (auto & server : servers)
|
||||
{
|
||||
if (!server.isStopping())
|
||||
{
|
||||
const std::string server_port_name = server.getPortName();
|
||||
|
||||
if (server_type.shouldStop(server_port_name))
|
||||
server.stop();
|
||||
}
|
||||
}
|
||||
|
||||
std::erase_if(servers, std::bind_front(check_server, ""));
|
||||
}
|
||||
|
||||
void Server::updateServers(
|
||||
Poco::Util::AbstractConfiguration & config,
|
||||
Poco::ThreadPool & server_pool,
|
||||
|
@ -3,8 +3,9 @@
|
||||
#include <Server/IServer.h>
|
||||
|
||||
#include <Daemon/BaseDaemon.h>
|
||||
#include "Server/HTTP/HTTPContext.h"
|
||||
#include <Server/HTTP/HTTPContext.h>
|
||||
#include <Server/TCPProtocolStackFactory.h>
|
||||
#include <Server/ServerType.h>
|
||||
#include <Poco/Net/HTTPServerParams.h>
|
||||
|
||||
/** Server provides three interfaces:
|
||||
@ -106,7 +107,8 @@ private:
|
||||
Poco::ThreadPool & server_pool,
|
||||
AsynchronousMetrics & async_metrics,
|
||||
std::vector<ProtocolServerAdapter> & servers,
|
||||
bool start_servers = false);
|
||||
bool start_servers = false,
|
||||
const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL));
|
||||
|
||||
void createInterserverServers(
|
||||
Poco::Util::AbstractConfiguration & config,
|
||||
@ -115,7 +117,8 @@ private:
|
||||
Poco::ThreadPool & server_pool,
|
||||
AsynchronousMetrics & async_metrics,
|
||||
std::vector<ProtocolServerAdapter> & servers,
|
||||
bool start_servers = false);
|
||||
bool start_servers = false,
|
||||
const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL));
|
||||
|
||||
void updateServers(
|
||||
Poco::Util::AbstractConfiguration & config,
|
||||
@ -123,6 +126,11 @@ private:
|
||||
AsynchronousMetrics & async_metrics,
|
||||
std::vector<ProtocolServerAdapter> & servers,
|
||||
std::vector<ProtocolServerAdapter> & servers_to_start_before_tables);
|
||||
|
||||
void stopServers(
|
||||
std::vector<ProtocolServerAdapter> & servers,
|
||||
const ServerType & server_type
|
||||
) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -187,6 +187,7 @@ enum class AccessType
|
||||
M(SYSTEM_THREAD_FUZZER, "SYSTEM START THREAD FUZZER, SYSTEM STOP THREAD FUZZER, START THREAD FUZZER, STOP THREAD FUZZER", GLOBAL, SYSTEM) \
|
||||
M(SYSTEM_UNFREEZE, "SYSTEM UNFREEZE", GLOBAL, SYSTEM) \
|
||||
M(SYSTEM_FAILPOINT, "SYSTEM ENABLE FAILPOINT, SYSTEM DISABLE FAILPOINT", GLOBAL, SYSTEM) \
|
||||
M(SYSTEM_LISTEN, "SYSTEM START LISTEN, SYSTEM STOP LISTEN", GLOBAL, SYSTEM) \
|
||||
M(SYSTEM, "", GROUP, ALL) /* allows to execute SYSTEM {SHUTDOWN|RELOAD CONFIG|...} */ \
|
||||
\
|
||||
M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\
|
||||
|
@ -1,4 +1,26 @@
|
||||
#include "Allocator.h"
|
||||
|
||||
template class Allocator<false>;
|
||||
template class Allocator<true>;
|
||||
/** Keep definition of this constant in cpp file; otherwise its value
|
||||
* is inlined into allocator code making it impossible to override it
|
||||
* in third-party code.
|
||||
*
|
||||
* Note: extern may seem redundant, but is actually needed due to bug in GCC.
|
||||
* See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html
|
||||
*/
|
||||
#ifdef NDEBUG
|
||||
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 128 * (1ULL << 20);
|
||||
#else
|
||||
/**
|
||||
* In debug build, use small mmap threshold to reproduce more memory
|
||||
* stomping bugs. Along with ASLR it will hopefully detect more issues than
|
||||
* ASan. The program may fail due to the limit on number of memory mappings.
|
||||
*
|
||||
* Not too small to avoid too quick exhaust of memory mappings.
|
||||
*/
|
||||
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 16384;
|
||||
#endif
|
||||
|
||||
template class Allocator<false, false>;
|
||||
template class Allocator<true, false>;
|
||||
template class Allocator<false, true>;
|
||||
template class Allocator<true, true>;
|
||||
|
@ -36,26 +36,51 @@
|
||||
#include <Common/Allocator_fwd.h>
|
||||
|
||||
|
||||
/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Many modern allocators (for example, tcmalloc) do not do a mremap for
|
||||
* realloc, even in case of large enough chunks of memory. Although this allows
|
||||
* you to increase performance and reduce memory consumption during realloc.
|
||||
* To fix this, we do mremap manually if the chunk of memory is large enough.
|
||||
* The threshold (64 MB) is chosen quite large, since changing the address
|
||||
* space is very slow, especially in the case of a large number of threads. We
|
||||
* expect that the set of operations mmap/something to do/mremap can only be
|
||||
* performed about 1000 times per second.
|
||||
*
|
||||
* P.S. This is also required, because tcmalloc can not allocate a chunk of
|
||||
* memory greater than 16 GB.
|
||||
*
|
||||
* P.P.S. Note that MMAP_THRESHOLD symbol is intentionally made weak. It allows
|
||||
* to override it during linkage when using ClickHouse as a library in
|
||||
* third-party applications which may already use own allocator doing mmaps
|
||||
* in the implementation of alloc/realloc.
|
||||
*/
|
||||
extern const size_t MMAP_THRESHOLD;
|
||||
|
||||
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric MMappedAllocs;
|
||||
extern const Metric MMappedAllocBytes;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||
extern const int CANNOT_MUNMAP;
|
||||
extern const int CANNOT_MREMAP;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** Previously there was a code which tried to use manual mmap and mremap (clickhouse_mremap.h) for large allocations/reallocations (64MB+).
|
||||
* Most modern allocators (including jemalloc) don't use mremap, so the idea was to take advantage from mremap system call for large reallocs.
|
||||
* Actually jemalloc had support for mremap, but it was intentionally removed from codebase https://github.com/jemalloc/jemalloc/commit/e2deab7a751c8080c2b2cdcfd7b11887332be1bb.
|
||||
* Our performance tests also shows that without manual mmap/mremap/munmap clickhouse is overall faster for about 1-2% and up to 5-7x for some types of queries.
|
||||
* That is why we don't do manuall mmap/mremap/munmap here and completely rely on jemalloc for allocations of any size.
|
||||
*/
|
||||
|
||||
/** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
|
||||
* Also used in hash tables.
|
||||
* The interface is different from std::allocator
|
||||
@ -63,8 +88,10 @@ namespace ErrorCodes
|
||||
* - passing the size into the `free` method;
|
||||
* - by the presence of the `alignment` argument;
|
||||
* - the possibility of zeroing memory (used in hash tables);
|
||||
* - random hint address for mmap
|
||||
* - mmap_threshold for using mmap less or more
|
||||
*/
|
||||
template <bool clear_memory_>
|
||||
template <bool clear_memory_, bool mmap_populate>
|
||||
class Allocator
|
||||
{
|
||||
public:
|
||||
@ -82,7 +109,7 @@ public:
|
||||
try
|
||||
{
|
||||
checkSize(size);
|
||||
freeNoTrack(buf);
|
||||
freeNoTrack(buf, size);
|
||||
CurrentMemoryTracker::free(size);
|
||||
}
|
||||
catch (...)
|
||||
@ -105,26 +132,49 @@ public:
|
||||
/// nothing to do.
|
||||
/// BTW, it's not possible to change alignment while doing realloc.
|
||||
}
|
||||
else if (alignment <= MALLOC_MIN_ALIGNMENT)
|
||||
else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD
|
||||
&& alignment <= MALLOC_MIN_ALIGNMENT)
|
||||
{
|
||||
/// Resize malloc'd memory region with no special alignment requirement.
|
||||
CurrentMemoryTracker::realloc(old_size, new_size);
|
||||
|
||||
void * new_buf = ::realloc(buf, new_size);
|
||||
if (nullptr == new_buf)
|
||||
{
|
||||
DB::throwFromErrno(
|
||||
fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
}
|
||||
DB::throwFromErrno(fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
|
||||
buf = new_buf;
|
||||
if constexpr (clear_memory)
|
||||
if (new_size > old_size)
|
||||
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
|
||||
}
|
||||
else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD)
|
||||
{
|
||||
/// Resize mmap'd memory region.
|
||||
CurrentMemoryTracker::realloc(old_size, new_size);
|
||||
|
||||
// On apple and freebsd self-implemented mremap used (common/mremap.h)
|
||||
buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE,
|
||||
PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
|
||||
if (MAP_FAILED == buf)
|
||||
DB::throwFromErrno(fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.",
|
||||
ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_MREMAP);
|
||||
|
||||
/// No need for zero-fill, because mmap guarantees it.
|
||||
}
|
||||
else if (new_size < MMAP_THRESHOLD)
|
||||
{
|
||||
/// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once.
|
||||
CurrentMemoryTracker::realloc(old_size, new_size);
|
||||
|
||||
void * new_buf = allocNoTrack(new_size, alignment);
|
||||
memcpy(new_buf, buf, std::min(old_size, new_size));
|
||||
freeNoTrack(buf, old_size);
|
||||
buf = new_buf;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
|
||||
|
||||
void * new_buf = alloc(new_size, alignment);
|
||||
memcpy(new_buf, buf, std::min(old_size, new_size));
|
||||
free(buf, old_size);
|
||||
@ -142,38 +192,83 @@ protected:
|
||||
|
||||
static constexpr bool clear_memory = clear_memory_;
|
||||
|
||||
// Freshly mmapped pages are copy-on-write references to a global zero page.
|
||||
// On the first write, a page fault occurs, and an actual writable page is
|
||||
// allocated. If we are going to use this memory soon, such as when resizing
|
||||
// hash tables, it makes sense to pre-fault the pages by passing
|
||||
// MAP_POPULATE to mmap(). This takes some time, but should be faster
|
||||
// overall than having a hot loop interrupted by page faults.
|
||||
// It is only supported on Linux.
|
||||
static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS
|
||||
#if defined(OS_LINUX)
|
||||
| (mmap_populate ? MAP_POPULATE : 0)
|
||||
#endif
|
||||
;
|
||||
|
||||
private:
|
||||
void * allocNoTrack(size_t size, size_t alignment)
|
||||
{
|
||||
void * buf;
|
||||
if (alignment <= MALLOC_MIN_ALIGNMENT)
|
||||
{
|
||||
if constexpr (clear_memory)
|
||||
buf = ::calloc(size, 1);
|
||||
else
|
||||
buf = ::malloc(size);
|
||||
size_t mmap_min_alignment = ::getPageSize();
|
||||
|
||||
if (nullptr == buf)
|
||||
DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
if (size >= MMAP_THRESHOLD)
|
||||
{
|
||||
if (alignment > mmap_min_alignment)
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS,
|
||||
"Too large alignment {}: more than page size when allocating {}.",
|
||||
ReadableSize(alignment), ReadableSize(size));
|
||||
|
||||
buf = mmap(getMmapHint(), size, PROT_READ | PROT_WRITE,
|
||||
mmap_flags, -1, 0);
|
||||
if (MAP_FAILED == buf)
|
||||
DB::throwFromErrno(fmt::format("Allocator: Cannot mmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
/// No need for zero-fill, because mmap guarantees it.
|
||||
|
||||
CurrentMetrics::add(CurrentMetrics::MMappedAllocs);
|
||||
CurrentMetrics::add(CurrentMetrics::MMappedAllocBytes, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
buf = nullptr;
|
||||
int res = posix_memalign(&buf, alignment, size);
|
||||
if (alignment <= MALLOC_MIN_ALIGNMENT)
|
||||
{
|
||||
if constexpr (clear_memory)
|
||||
buf = ::calloc(size, 1);
|
||||
else
|
||||
buf = ::malloc(size);
|
||||
|
||||
if (0 != res)
|
||||
DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)),
|
||||
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
|
||||
if (nullptr == buf)
|
||||
DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
}
|
||||
else
|
||||
{
|
||||
buf = nullptr;
|
||||
int res = posix_memalign(&buf, alignment, size);
|
||||
|
||||
if constexpr (clear_memory)
|
||||
memset(buf, 0, size);
|
||||
if (0 != res)
|
||||
DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)),
|
||||
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
|
||||
|
||||
if constexpr (clear_memory)
|
||||
memset(buf, 0, size);
|
||||
}
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
void freeNoTrack(void * buf)
|
||||
void freeNoTrack(void * buf, size_t size)
|
||||
{
|
||||
::free(buf);
|
||||
if (size >= MMAP_THRESHOLD)
|
||||
{
|
||||
if (0 != munmap(buf, size))
|
||||
DB::throwFromErrno(fmt::format("Allocator: Cannot munmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_MUNMAP);
|
||||
|
||||
CurrentMetrics::sub(CurrentMetrics::MMappedAllocs);
|
||||
CurrentMetrics::sub(CurrentMetrics::MMappedAllocBytes, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
::free(buf);
|
||||
}
|
||||
}
|
||||
|
||||
void checkSize(size_t size)
|
||||
@ -182,6 +277,21 @@ private:
|
||||
if (size >= 0x8000000000000000ULL)
|
||||
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Too large size ({}) passed to allocator. It indicates an error.", size);
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
/// In debug builds, request mmap() at random addresses (a kind of ASLR), to
|
||||
/// reproduce more memory stomping bugs. Note that Linux doesn't do it by
|
||||
/// default. This may lead to worse TLB performance.
|
||||
void * getMmapHint()
|
||||
{
|
||||
return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(thread_local_rng));
|
||||
}
|
||||
#else
|
||||
void * getMmapHint()
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
@ -257,5 +367,7 @@ constexpr size_t allocatorInitialBytes<AllocatorWithStackMemory<
|
||||
|
||||
/// Prevent implicit template instantiation of Allocator
|
||||
|
||||
extern template class Allocator<false>;
|
||||
extern template class Allocator<true>;
|
||||
extern template class Allocator<false, false>;
|
||||
extern template class Allocator<true, false>;
|
||||
extern template class Allocator<false, true>;
|
||||
extern template class Allocator<true, true>;
|
||||
|
@ -3,7 +3,7 @@
|
||||
* This file provides forward declarations for Allocator.
|
||||
*/
|
||||
|
||||
template <bool clear_memory_>
|
||||
template <bool clear_memory_, bool mmap_populate = false>
|
||||
class Allocator;
|
||||
|
||||
template <typename Base, size_t N = 64, size_t Alignment = 1>
|
||||
|
@ -26,6 +26,14 @@
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
#if USE_SSL
|
||||
#include <format>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
#include <Compression/ICompressionCodec.h>
|
||||
#include <Compression/CompressionCodecEncrypted.h>
|
||||
#include <boost/algorithm/hex.hpp>
|
||||
#endif
|
||||
|
||||
#define PREPROCESSED_SUFFIX "-preprocessed"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
@ -39,6 +47,9 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int FILE_DOESNT_EXIST;
|
||||
extern const int CANNOT_LOAD_CONFIG;
|
||||
#if USE_SSL
|
||||
extern const int BAD_ARGUMENTS;
|
||||
#endif
|
||||
}
|
||||
|
||||
/// For cutting preprocessed path to this base
|
||||
@ -177,6 +188,72 @@ static void mergeAttributes(Element & config_element, Element & with_element)
|
||||
with_element_attributes->release();
|
||||
}
|
||||
|
||||
#if USE_SSL
|
||||
|
||||
std::string ConfigProcessor::encryptValue(const std::string & codec_name, const std::string & value)
|
||||
{
|
||||
EncryptionMethod method = getEncryptionMethod(codec_name);
|
||||
CompressionCodecEncrypted codec(method);
|
||||
|
||||
Memory<> memory;
|
||||
memory.resize(codec.getCompressedReserveSize(static_cast<UInt32>(value.size())));
|
||||
auto bytes_written = codec.compress(value.data(), static_cast<UInt32>(value.size()), memory.data());
|
||||
auto encrypted_value = std::string(memory.data(), bytes_written);
|
||||
std::string hex_value;
|
||||
boost::algorithm::hex(encrypted_value.begin(), encrypted_value.end(), std::back_inserter(hex_value));
|
||||
return hex_value;
|
||||
}
|
||||
|
||||
std::string ConfigProcessor::decryptValue(const std::string & codec_name, const std::string & value)
|
||||
{
|
||||
EncryptionMethod method = getEncryptionMethod(codec_name);
|
||||
CompressionCodecEncrypted codec(method);
|
||||
|
||||
Memory<> memory;
|
||||
std::string encrypted_value;
|
||||
|
||||
try
|
||||
{
|
||||
boost::algorithm::unhex(value, std::back_inserter(encrypted_value));
|
||||
}
|
||||
catch (const std::exception &)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read encrypted text, check for valid characters [0-9a-fA-F] and length");
|
||||
}
|
||||
|
||||
memory.resize(codec.readDecompressedBlockSize(encrypted_value.data()));
|
||||
codec.decompress(encrypted_value.data(), static_cast<UInt32>(encrypted_value.size()), memory.data());
|
||||
std::string decrypted_value = std::string(memory.data(), memory.size());
|
||||
return decrypted_value;
|
||||
}
|
||||
|
||||
void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root)
|
||||
{
|
||||
for (Node * node = config_root->firstChild(); node; node = node->nextSibling())
|
||||
{
|
||||
if (node->nodeType() == Node::ELEMENT_NODE)
|
||||
{
|
||||
Element & element = dynamic_cast<Element &>(*node);
|
||||
if (element.hasAttribute("encryption_codec"))
|
||||
{
|
||||
const NodeListPtr children = element.childNodes();
|
||||
if (children->length() != 1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encrypted node {} cannot contain nested elements", node->nodeName());
|
||||
|
||||
Node * text_node = node->firstChild();
|
||||
if (text_node->nodeType() != Node::TEXT_NODE)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encrypted node {} should have text node", node->nodeName());
|
||||
|
||||
auto encryption_codec = element.getAttribute("encryption_codec");
|
||||
text_node->setNodeValue(decryptValue(encryption_codec, text_node->getNodeValue()));
|
||||
}
|
||||
decryptRecursive(node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, const Node * with_root)
|
||||
{
|
||||
const NodeListPtr with_nodes = with_root->childNodes();
|
||||
@ -694,7 +771,19 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes(
|
||||
return LoadedConfig{configuration, has_zk_includes, !processed_successfully, config_xml, path};
|
||||
}
|
||||
|
||||
void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config, std::string preprocessed_dir)
|
||||
#if USE_SSL
|
||||
|
||||
void ConfigProcessor::decryptEncryptedElements(LoadedConfig & loaded_config)
|
||||
{
|
||||
CompressionCodecEncrypted::Configuration::instance().tryLoad(*loaded_config.configuration, "encryption_codecs");
|
||||
Node * config_root = getRootNode(loaded_config.preprocessed_xml.get());
|
||||
decryptRecursive(config_root);
|
||||
loaded_config.configuration = new Poco::Util::XMLConfiguration(loaded_config.preprocessed_xml);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void ConfigProcessor::savePreprocessedConfig(LoadedConfig & loaded_config, std::string preprocessed_dir)
|
||||
{
|
||||
try
|
||||
{
|
||||
@ -749,6 +838,12 @@ void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config,
|
||||
{
|
||||
LOG_WARNING(log, "Couldn't save preprocessed config to {}: {}", preprocessed_path, e.displayText());
|
||||
}
|
||||
|
||||
#if USE_SSL
|
||||
std::string preprocessed_file_name = fs::path(preprocessed_path).filename();
|
||||
if (preprocessed_file_name == "config.xml" || preprocessed_file_name == std::format("config{}.xml", PREPROCESSED_SUFFIX))
|
||||
decryptEncryptedElements(loaded_config);
|
||||
#endif
|
||||
}
|
||||
|
||||
void ConfigProcessor::setConfigPath(const std::string & config_path)
|
||||
|
@ -97,7 +97,7 @@ public:
|
||||
|
||||
/// Save preprocessed config to specified directory.
|
||||
/// If preprocessed_dir is empty - calculate from loaded_config.path + /preprocessed_configs/
|
||||
void savePreprocessedConfig(const LoadedConfig & loaded_config, std::string preprocessed_dir);
|
||||
void savePreprocessedConfig(LoadedConfig & loaded_config, std::string preprocessed_dir);
|
||||
|
||||
/// Set path of main config.xml. It will be cut from all configs placed to preprocessed_configs/
|
||||
static void setConfigPath(const std::string & config_path);
|
||||
@ -109,6 +109,14 @@ public:
|
||||
/// Is the file named as result of config preprocessing, not as original files.
|
||||
static bool isPreprocessedFile(const std::string & config_path);
|
||||
|
||||
#if USE_SSL
|
||||
/// Encrypt text value
|
||||
static std::string encryptValue(const std::string & codec_name, const std::string & value);
|
||||
|
||||
/// Decrypt value
|
||||
static std::string decryptValue(const std::string & codec_name, const std::string & value);
|
||||
#endif
|
||||
|
||||
static inline const auto SUBSTITUTION_ATTRS = {"incl", "from_zk", "from_env"};
|
||||
|
||||
private:
|
||||
@ -127,6 +135,13 @@ private:
|
||||
|
||||
using NodePtr = Poco::AutoPtr<Poco::XML::Node>;
|
||||
|
||||
#if USE_SSL
|
||||
void decryptRecursive(Poco::XML::Node * config_root);
|
||||
|
||||
/// Decrypt elements in config with specified encryption attributes
|
||||
void decryptEncryptedElements(LoadedConfig & loaded_config);
|
||||
#endif
|
||||
|
||||
void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root);
|
||||
|
||||
void merge(XMLDocumentPtr config, XMLDocumentPtr with);
|
||||
|
@ -110,9 +110,23 @@ namespace
|
||||
}
|
||||
else
|
||||
{
|
||||
Poco::AutoPtr<Poco::XML::Element> xml_key = xml_document->createElement(key);
|
||||
parent_xml_node.appendChild(xml_key);
|
||||
processNode(value_node, *xml_key);
|
||||
if (key == "#text" && value_node.IsScalar())
|
||||
{
|
||||
for (Node * child_node = parent_xml_node.firstChild(); child_node; child_node = child_node->nextSibling())
|
||||
if (child_node->nodeType() == Node::TEXT_NODE)
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_YAML,
|
||||
"YAMLParser has encountered node with several text nodes "
|
||||
"and cannot continue parsing of the file");
|
||||
std::string value = value_node.as<std::string>();
|
||||
Poco::AutoPtr<Poco::XML::Text> xml_value = xml_document->createTextNode(value);
|
||||
parent_xml_node.appendChild(xml_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
Poco::AutoPtr<Poco::XML::Element> xml_key = xml_document->createElement(key);
|
||||
parent_xml_node.appendChild(xml_key);
|
||||
processNode(value_node, *xml_key);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -173,6 +173,8 @@
|
||||
M(PartsInMemory, "In-memory parts.") \
|
||||
M(MMappedFiles, "Total number of mmapped files.") \
|
||||
M(MMappedFileBytes, "Sum size of mmapped file regions.") \
|
||||
M(MMappedAllocs, "Total number of mmapped allocations") \
|
||||
M(MMappedAllocBytes, "Sum bytes of mmapped allocations") \
|
||||
M(AsynchronousReadWait, "Number of threads waiting for asynchronous read.") \
|
||||
M(PendingAsyncInsert, "Number of asynchronous inserts that are waiting for flush.") \
|
||||
M(KafkaConsumers, "Number of active Kafka consumers") \
|
||||
|
@ -8,7 +8,7 @@
|
||||
* table, so it makes sense to pre-fault the pages so that page faults don't
|
||||
* interrupt the resize loop. Set the allocator parameter accordingly.
|
||||
*/
|
||||
using HashTableAllocator = Allocator<true /* clear_memory */>;
|
||||
using HashTableAllocator = Allocator<true /* clear_memory */, true /* mmap_populate */>;
|
||||
|
||||
template <size_t initial_bytes = 64>
|
||||
using HashTableAllocatorWithStackMemory = AllocatorWithStackMemory<HashTableAllocator, initial_bytes>;
|
||||
|
@ -113,13 +113,19 @@ public:
|
||||
if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
|
||||
{
|
||||
memcpy(&n[0], p, 8);
|
||||
n[0] &= -1ULL >> s;
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
n[0] &= -1ULL >> s;
|
||||
else
|
||||
n[0] &= -1ULL << s;
|
||||
}
|
||||
else
|
||||
{
|
||||
const char * lp = x.data + x.size - 8;
|
||||
memcpy(&n[0], lp, 8);
|
||||
n[0] >>= s;
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
n[0] >>= s;
|
||||
else
|
||||
n[0] <<= s;
|
||||
}
|
||||
auto res = hash(k8);
|
||||
auto buck = getBucketFromHash(res);
|
||||
@ -131,7 +137,10 @@ public:
|
||||
memcpy(&n[0], p, 8);
|
||||
const char * lp = x.data + x.size - 8;
|
||||
memcpy(&n[1], lp, 8);
|
||||
n[1] >>= s;
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
n[1] >>= s;
|
||||
else
|
||||
n[1] <<= s;
|
||||
auto res = hash(k16);
|
||||
auto buck = getBucketFromHash(res);
|
||||
keyHolderDiscardKey(key_holder);
|
||||
@ -142,7 +151,10 @@ public:
|
||||
memcpy(&n[0], p, 16);
|
||||
const char * lp = x.data + x.size - 8;
|
||||
memcpy(&n[2], lp, 8);
|
||||
n[2] >>= s;
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
n[2] >>= s;
|
||||
else
|
||||
n[2] <<= s;
|
||||
auto res = hash(k24);
|
||||
auto buck = getBucketFromHash(res);
|
||||
keyHolderDiscardKey(key_holder);
|
||||
|
@ -27,15 +27,9 @@ struct Interval
|
||||
};
|
||||
|
||||
template <typename IntervalStorageType>
|
||||
bool operator<(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
|
||||
auto operator<=>(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
|
||||
{
|
||||
return std::tie(lhs.left, lhs.right) < std::tie(rhs.left, rhs.right);
|
||||
}
|
||||
|
||||
template <typename IntervalStorageType>
|
||||
bool operator<=(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
|
||||
{
|
||||
return std::tie(lhs.left, lhs.right) <= std::tie(rhs.left, rhs.right);
|
||||
return std::tie(lhs.left, lhs.right) <=> std::tie(rhs.left, rhs.right);
|
||||
}
|
||||
|
||||
template <typename IntervalStorageType>
|
||||
@ -44,24 +38,6 @@ bool operator==(const Interval<IntervalStorageType> & lhs, const Interval<Interv
|
||||
return std::tie(lhs.left, lhs.right) == std::tie(rhs.left, rhs.right);
|
||||
}
|
||||
|
||||
template <typename IntervalStorageType>
|
||||
bool operator!=(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
|
||||
{
|
||||
return std::tie(lhs.left, lhs.right) != std::tie(rhs.left, rhs.right);
|
||||
}
|
||||
|
||||
template <typename IntervalStorageType>
|
||||
bool operator>(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
|
||||
{
|
||||
return std::tie(lhs.left, lhs.right) > std::tie(rhs.left, rhs.right);
|
||||
}
|
||||
|
||||
template <typename IntervalStorageType>
|
||||
bool operator>=(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
|
||||
{
|
||||
return std::tie(lhs.left, lhs.right) >= std::tie(rhs.left, rhs.right);
|
||||
}
|
||||
|
||||
struct IntervalTreeVoidValue
|
||||
{
|
||||
};
|
||||
|
@ -43,6 +43,17 @@ struct PreformattedMessage
|
||||
operator const std::string & () const { return text; }
|
||||
operator std::string () && { return std::move(text); }
|
||||
operator fmt::format_string<> () const { UNREACHABLE(); }
|
||||
|
||||
void apply(std::string & out_text, std::string_view & out_format_string) const &
|
||||
{
|
||||
out_text = text;
|
||||
out_format_string = format_string;
|
||||
}
|
||||
void apply(std::string & out_text, std::string_view & out_format_string) &&
|
||||
{
|
||||
out_text = std::move(text);
|
||||
out_format_string = format_string;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename... Args>
|
||||
@ -99,10 +110,33 @@ template <typename T> constexpr std::string_view tryGetStaticFormatString(T && x
|
||||
}
|
||||
}
|
||||
|
||||
/// Constexpr ifs are not like ifdefs, and compiler still checks that unneeded code can be compiled
|
||||
/// This template is useful to avoid compilation failures when condition of some "constexpr if" is false
|
||||
template<bool enable> struct ConstexprIfsAreNotIfdefs
|
||||
{
|
||||
template <typename T> constexpr static std::string_view getStaticFormatString(T &&) { return {}; }
|
||||
template <typename T> static PreformattedMessage getPreformatted(T &&) { return {}; }
|
||||
};
|
||||
|
||||
template<> struct ConstexprIfsAreNotIfdefs<true>
|
||||
{
|
||||
template <typename T> consteval static std::string_view getStaticFormatString(T && x)
|
||||
{
|
||||
/// See tryGetStaticFormatString(...)
|
||||
static_assert(!std::is_same_v<std::string, std::decay_t<T>>);
|
||||
static_assert(std::is_nothrow_convertible<T, const char * const>::value);
|
||||
static_assert(!std::is_pointer<T>::value);
|
||||
return std::string_view(x);
|
||||
}
|
||||
|
||||
template <typename T> static T && getPreformatted(T && x) { return std::forward<T>(x); }
|
||||
};
|
||||
|
||||
template <typename... Ts> constexpr size_t numArgs(Ts &&...) { return sizeof...(Ts); }
|
||||
template <typename T, typename... Ts> constexpr auto firstArg(T && x, Ts &&...) { return std::forward<T>(x); }
|
||||
/// For implicit conversion of fmt::basic_runtime<> to char* for std::string ctor
|
||||
template <typename T, typename... Ts> constexpr auto firstArg(fmt::basic_runtime<T> && data, Ts &&...) { return data.str.data(); }
|
||||
template <typename T, typename... Ts> constexpr auto firstArg(const fmt::basic_runtime<T> & data, Ts &&...) { return data.str.data(); }
|
||||
|
||||
consteval ssize_t formatStringCountArgsNum(const char * const str, size_t len)
|
||||
{
|
||||
@ -142,26 +176,19 @@ consteval void formatStringCheckArgsNumImpl(std::string_view str, size_t nargs)
|
||||
functionThatFailsCompilationOfConstevalFunctions("unexpected number of arguments in a format string");
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
struct CheckArgsNumHelperImpl
|
||||
template<typename T>
|
||||
consteval void formatStringCheckArgsNum(T && str, size_t nargs)
|
||||
{
|
||||
template<typename T>
|
||||
consteval CheckArgsNumHelperImpl(T && str)
|
||||
{
|
||||
formatStringCheckArgsNumImpl(tryGetStaticFormatString(str), sizeof...(Args));
|
||||
}
|
||||
|
||||
/// No checks for fmt::runtime and PreformattedMessage
|
||||
template<typename T> CheckArgsNumHelperImpl(fmt::basic_runtime<T> &&) {}
|
||||
template<> CheckArgsNumHelperImpl(PreformattedMessage &) {}
|
||||
template<> CheckArgsNumHelperImpl(const PreformattedMessage &) {}
|
||||
template<> CheckArgsNumHelperImpl(PreformattedMessage &&) {}
|
||||
|
||||
};
|
||||
|
||||
template <typename... Args> using CheckArgsNumHelper = CheckArgsNumHelperImpl<std::type_identity_t<Args>...>;
|
||||
template <typename... Args> void formatStringCheckArgsNum(CheckArgsNumHelper<Args...>, Args &&...) {}
|
||||
formatStringCheckArgsNumImpl(tryGetStaticFormatString(str), nargs);
|
||||
}
|
||||
template<typename T> inline void formatStringCheckArgsNum(fmt::basic_runtime<T> &&, size_t) {}
|
||||
template<> inline void formatStringCheckArgsNum(PreformattedMessage &, size_t) {}
|
||||
template<> inline void formatStringCheckArgsNum(const PreformattedMessage &, size_t) {}
|
||||
template<> inline void formatStringCheckArgsNum(PreformattedMessage &&, size_t) {}
|
||||
|
||||
template<typename T> struct FormatStringTypeInfo{ static constexpr bool is_static = true; static constexpr bool has_format = true; };
|
||||
template<typename T> struct FormatStringTypeInfo<fmt::basic_runtime<T>> { static constexpr bool is_static = false; static constexpr bool has_format = false; };
|
||||
template<> struct FormatStringTypeInfo<PreformattedMessage> { static constexpr bool is_static = false; static constexpr bool has_format = true; };
|
||||
|
||||
/// This wrapper helps to avoid too frequent and noisy log messages.
|
||||
/// For each pair (logger_name, format_string) it remembers when such a message was logged the last time.
|
||||
|
@ -101,9 +101,6 @@ void ProgressIndication::writeFinalProgress()
|
||||
<< formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.)";
|
||||
else
|
||||
std::cout << ". ";
|
||||
auto peak_memory_usage = getMemoryUsage().peak;
|
||||
if (peak_memory_usage >= 0)
|
||||
std::cout << "\nPeak memory usage (for query) " << formatReadableSizeWithBinarySuffix(peak_memory_usage) << ".";
|
||||
}
|
||||
|
||||
void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message)
|
||||
|
@ -82,3 +82,8 @@ endif()
|
||||
|
||||
clickhouse_add_executable (interval_tree interval_tree.cpp)
|
||||
target_link_libraries (interval_tree PRIVATE dbms)
|
||||
|
||||
if (ENABLE_SSL)
|
||||
clickhouse_add_executable (encrypt_decrypt encrypt_decrypt.cpp)
|
||||
target_link_libraries (encrypt_decrypt PRIVATE dbms)
|
||||
endif()
|
||||
|
61
src/Common/examples/encrypt_decrypt.cpp
Normal file
61
src/Common/examples/encrypt_decrypt.cpp
Normal file
@ -0,0 +1,61 @@
|
||||
#include <Common/Config/ConfigProcessor.h>
|
||||
#include <Compression/ICompressionCodec.h>
|
||||
#include <Compression/CompressionCodecEncrypted.h>
|
||||
#include <iostream>
|
||||
|
||||
/** This test program encrypts or decrypts text values using a symmetric encryption codec like AES_128_GCM_SIV or AES_256_GCM_SIV.
|
||||
* Keys for codecs are loaded from <encryption_codecs> section of configuration file.
|
||||
*
|
||||
* How to use:
|
||||
* ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV text_to_encrypt
|
||||
*/
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (argc != 5)
|
||||
{
|
||||
std::cerr << "Usage:" << std::endl
|
||||
<< " " << argv[0] << " path action codec value" << std::endl
|
||||
<< "path: path to configuration file." << std::endl
|
||||
<< "action: -e for encryption and -d for decryption." << std::endl
|
||||
<< "codec: AES_128_GCM_SIV or AES_256_GCM_SIV." << std::endl << std::endl
|
||||
<< "Example:" << std::endl
|
||||
<< " ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV text_to_encrypt";
|
||||
return 3;
|
||||
}
|
||||
|
||||
std::string action = argv[2];
|
||||
std::string codec_name = argv[3];
|
||||
std::string value = argv[4];
|
||||
|
||||
DB::ConfigProcessor processor(argv[1], false, true);
|
||||
auto loaded_config = processor.loadConfig();
|
||||
DB::CompressionCodecEncrypted::Configuration::instance().tryLoad(*loaded_config.configuration, "encryption_codecs");
|
||||
|
||||
if (action == "-e")
|
||||
std::cout << processor.encryptValue(codec_name, value) << std::endl;
|
||||
else if (action == "-d")
|
||||
std::cout << processor.decryptValue(codec_name, value) << std::endl;
|
||||
else
|
||||
std::cerr << "Unknown action: " << action << std::endl;
|
||||
}
|
||||
catch (Poco::Exception & e)
|
||||
{
|
||||
std::cerr << "Exception: " << e.displayText() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
catch (std::exception & e)
|
||||
{
|
||||
std::cerr << "std::exception: " << e.what() << std::endl;
|
||||
return 3;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::cerr << "Some exception" << std::endl;
|
||||
return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
/// Macros for convenient usage of Poco logger.
|
||||
|
||||
#include <unistd.h>
|
||||
#include <fmt/format.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/Message.h>
|
||||
@ -28,33 +28,86 @@ namespace
|
||||
|
||||
#define LOG_IMPL_FIRST_ARG(X, ...) X
|
||||
|
||||
/// Copy-paste from contrib/libpq/include/c.h
|
||||
/// There's no easy way to count the number of arguments without evaluating these arguments...
|
||||
#define CH_VA_ARGS_NARGS(...) \
|
||||
CH_VA_ARGS_NARGS_(__VA_ARGS__, \
|
||||
63,62,61,60, \
|
||||
59,58,57,56,55,54,53,52,51,50, \
|
||||
49,48,47,46,45,44,43,42,41,40, \
|
||||
39,38,37,36,35,34,33,32,31,30, \
|
||||
29,28,27,26,25,24,23,22,21,20, \
|
||||
19,18,17,16,15,14,13,12,11,10, \
|
||||
9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
|
||||
#define CH_VA_ARGS_NARGS_( \
|
||||
_01,_02,_03,_04,_05,_06,_07,_08,_09,_10, \
|
||||
_11,_12,_13,_14,_15,_16,_17,_18,_19,_20, \
|
||||
_21,_22,_23,_24,_25,_26,_27,_28,_29,_30, \
|
||||
_31,_32,_33,_34,_35,_36,_37,_38,_39,_40, \
|
||||
_41,_42,_43,_44,_45,_46,_47,_48,_49,_50, \
|
||||
_51,_52,_53,_54,_55,_56,_57,_58,_59,_60, \
|
||||
_61,_62,_63, N, ...) \
|
||||
(N)
|
||||
|
||||
#define LINE_NUM_AS_STRING_IMPL2(x) #x
|
||||
#define LINE_NUM_AS_STRING_IMPL(x) LINE_NUM_AS_STRING_IMPL2(x)
|
||||
#define LINE_NUM_AS_STRING LINE_NUM_AS_STRING_IMPL(__LINE__)
|
||||
#define MESSAGE_FOR_EXCEPTION_ON_LOGGING "Failed to write a log message: " __FILE__ ":" LINE_NUM_AS_STRING "\n"
|
||||
|
||||
/// Logs a message to a specified logger with that level.
|
||||
/// If more than one argument is provided,
|
||||
/// the first argument is interpreted as a template with {}-substitutions
|
||||
/// and the latter arguments are treated as values to substitute.
|
||||
/// If only one argument is provided, it is treated as a message without substitutions.
|
||||
|
||||
#define LOG_IMPL(logger, priority, PRIORITY, ...) do \
|
||||
{ \
|
||||
auto _logger = ::getLogger(logger); \
|
||||
const bool _is_clients_log = (DB::CurrentThread::getGroup() != nullptr) && \
|
||||
(DB::CurrentThread::get().getClientLogsLevel() >= (priority)); \
|
||||
if (_is_clients_log || _logger->is((PRIORITY))) \
|
||||
{ \
|
||||
std::string formatted_message = numArgs(__VA_ARGS__) > 1 ? fmt::format(__VA_ARGS__) : firstArg(__VA_ARGS__); \
|
||||
formatStringCheckArgsNum(__VA_ARGS__); \
|
||||
if (auto _channel = _logger->getChannel()) \
|
||||
{ \
|
||||
std::string file_function; \
|
||||
file_function += __FILE__; \
|
||||
file_function += "; "; \
|
||||
file_function += __PRETTY_FUNCTION__; \
|
||||
Poco::Message poco_message(_logger->name(), formatted_message, \
|
||||
(PRIORITY), file_function.c_str(), __LINE__, tryGetStaticFormatString(LOG_IMPL_FIRST_ARG(__VA_ARGS__))); \
|
||||
_channel->log(poco_message); \
|
||||
} \
|
||||
ProfileEvents::incrementForLogMessage(PRIORITY); \
|
||||
} \
|
||||
#define LOG_IMPL(logger, priority, PRIORITY, ...) do \
|
||||
{ \
|
||||
auto _logger = ::getLogger(logger); \
|
||||
const bool _is_clients_log = (DB::CurrentThread::getGroup() != nullptr) && \
|
||||
(DB::CurrentThread::get().getClientLogsLevel() >= (priority)); \
|
||||
if (!_is_clients_log && !_logger->is((PRIORITY))) \
|
||||
break; \
|
||||
\
|
||||
try \
|
||||
{ \
|
||||
ProfileEvents::incrementForLogMessage(PRIORITY); \
|
||||
auto _channel = _logger->getChannel(); \
|
||||
if (!_channel) \
|
||||
break; \
|
||||
\
|
||||
constexpr size_t _nargs = CH_VA_ARGS_NARGS(__VA_ARGS__); \
|
||||
using LogTypeInfo = FormatStringTypeInfo<std::decay_t<decltype(LOG_IMPL_FIRST_ARG(__VA_ARGS__))>>; \
|
||||
\
|
||||
std::string_view _format_string; \
|
||||
std::string _formatted_message; \
|
||||
\
|
||||
if constexpr (LogTypeInfo::is_static) \
|
||||
{ \
|
||||
formatStringCheckArgsNum(LOG_IMPL_FIRST_ARG(__VA_ARGS__), _nargs - 1); \
|
||||
_format_string = ConstexprIfsAreNotIfdefs<LogTypeInfo::is_static>::getStaticFormatString(LOG_IMPL_FIRST_ARG(__VA_ARGS__)); \
|
||||
} \
|
||||
\
|
||||
constexpr bool is_preformatted_message = !LogTypeInfo::is_static && LogTypeInfo::has_format; \
|
||||
if constexpr (is_preformatted_message) \
|
||||
{ \
|
||||
static_assert(_nargs == 1 || !is_preformatted_message); \
|
||||
ConstexprIfsAreNotIfdefs<is_preformatted_message>::getPreformatted(LOG_IMPL_FIRST_ARG(__VA_ARGS__)).apply(_formatted_message, _format_string); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
_formatted_message = _nargs == 1 ? firstArg(__VA_ARGS__) : fmt::format(__VA_ARGS__); \
|
||||
} \
|
||||
\
|
||||
std::string _file_function = __FILE__ "; "; \
|
||||
_file_function += __PRETTY_FUNCTION__; \
|
||||
Poco::Message _poco_message(_logger->name(), std::move(_formatted_message), \
|
||||
(PRIORITY), _file_function.c_str(), __LINE__, _format_string); \
|
||||
_channel->log(_poco_message); \
|
||||
} \
|
||||
catch (...) \
|
||||
{ \
|
||||
::write(STDERR_FILENO, static_cast<const void *>(MESSAGE_FOR_EXCEPTION_ON_LOGGING), sizeof(MESSAGE_FOR_EXCEPTION_ON_LOGGING)); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/thread_local_rng.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Poco/Logger.h>
|
||||
@ -50,3 +51,55 @@ TEST(Logger, TestLog)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static size_t global_counter = 0;
|
||||
|
||||
static std::string getLogMessage()
|
||||
{
|
||||
++global_counter;
|
||||
return "test1 " + std::to_string(thread_local_rng());
|
||||
}
|
||||
|
||||
static size_t getLogMessageParam()
|
||||
{
|
||||
++global_counter;
|
||||
return thread_local_rng();
|
||||
}
|
||||
|
||||
static PreformattedMessage getPreformatted()
|
||||
{
|
||||
++global_counter;
|
||||
return PreformattedMessage::create("test3 {}", thread_local_rng());
|
||||
}
|
||||
|
||||
static size_t getLogMessageParamOrThrow()
|
||||
{
|
||||
size_t x = thread_local_rng();
|
||||
if (x % 1000 == 0)
|
||||
return x;
|
||||
throw Poco::Exception("error", 42);
|
||||
}
|
||||
|
||||
TEST(Logger, SideEffects)
|
||||
{
|
||||
std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
auto my_channel = Poco::AutoPtr<Poco::StreamChannel>(new Poco::StreamChannel(oss));
|
||||
auto * log = &Poco::Logger::create("Logger", my_channel.get());
|
||||
log->setLevel("trace");
|
||||
|
||||
/// Ensure that parameters are evaluated only once
|
||||
global_counter = 0;
|
||||
LOG_TRACE(log, fmt::runtime(getLogMessage()));
|
||||
EXPECT_EQ(global_counter, 1);
|
||||
LOG_TRACE(log, "test2 {}", getLogMessageParam());
|
||||
EXPECT_EQ(global_counter, 2);
|
||||
LOG_TRACE(log, getPreformatted());
|
||||
EXPECT_EQ(global_counter, 3);
|
||||
|
||||
auto var = PreformattedMessage::create("test4 {}", thread_local_rng());
|
||||
LOG_TRACE(log, var);
|
||||
EXPECT_EQ(var.text.starts_with("test4 "), true);
|
||||
EXPECT_EQ(var.format_string, "test4 {}");
|
||||
|
||||
LOG_TRACE(log, "test no throw {}", getLogMessageParamOrThrow());
|
||||
}
|
||||
|
@ -28,6 +28,17 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int OPENSSL_ERROR;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
EncryptionMethod getEncryptionMethod(const std::string & name)
|
||||
{
|
||||
if (name == "AES_128_GCM_SIV")
|
||||
return AES_128_GCM_SIV;
|
||||
else if (name == "AES_256_GCM_SIV")
|
||||
return AES_256_GCM_SIV;
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", name);
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -63,7 +74,7 @@ uint8_t getMethodCode(EncryptionMethod Method)
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method));
|
||||
}
|
||||
}
|
||||
|
||||
@ -79,7 +90,6 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int INCORRECT_DATA;
|
||||
}
|
||||
|
||||
@ -104,7 +114,7 @@ UInt64 methodKeySize(EncryptionMethod Method)
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method));
|
||||
}
|
||||
}
|
||||
|
||||
@ -129,7 +139,7 @@ auto getMethod(EncryptionMethod Method)
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method));
|
||||
}
|
||||
}
|
||||
|
||||
@ -205,7 +215,7 @@ auto getMethod(EncryptionMethod Method)
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method));
|
||||
}
|
||||
}
|
||||
|
||||
@ -578,7 +588,7 @@ String CompressionCodecEncrypted::Configuration::getKey(EncryptionMethod method,
|
||||
if (current_params->keys_storage[method].contains(key_id))
|
||||
key = current_params->keys_storage[method].at(key_id);
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no key {} in config", key_id);
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no key {} in config for {} encryption codec", key_id, getMethodName(method));
|
||||
|
||||
return key;
|
||||
}
|
||||
|
@ -18,6 +18,9 @@ enum EncryptionMethod
|
||||
MAX_ENCRYPTION_METHOD
|
||||
};
|
||||
|
||||
/// Get method for string name. Throw exception for wrong name.
|
||||
EncryptionMethod getEncryptionMethod(const std::string & name);
|
||||
|
||||
/** This codec encrypts and decrypts blocks with AES-128 in
|
||||
* GCM-SIV mode (RFC-8452), which is the only cipher currently
|
||||
* supported. Although it is implemented as a compression codec
|
||||
|
@ -659,7 +659,8 @@ class IColumn;
|
||||
M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \
|
||||
M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \
|
||||
\
|
||||
M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
|
||||
M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function `range` per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
|
||||
M(UInt64, function_sleep_max_microseconds_per_block, 3000000, "Maximum number of microseconds the function `sleep` is allowed to sleep for each block. If a user called it with a larger value, it throws an exception. It is a safety threshold.", 0) \
|
||||
M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \
|
||||
\
|
||||
M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::pread, "Method of reading data from storage file, one of: read, pread, mmap. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).", 0) \
|
||||
@ -673,6 +674,7 @@ class IColumn;
|
||||
M(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead of read with ignore.", 0) \
|
||||
M(UInt64, merge_tree_min_bytes_per_task_for_remote_reading, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes to read per task.", 0) \
|
||||
M(Bool, merge_tree_use_const_size_tasks_for_remote_reading, true, "Whether to use constant size tasks for reading from a remote table.", 0) \
|
||||
M(Bool, merge_tree_determine_task_size_by_prewhere_columns, true, "Whether to use only prewhere columns size to determine reading task size.", 0) \
|
||||
\
|
||||
M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \
|
||||
M(Bool, wait_for_async_insert, true, "If true wait for processing of asynchronous insertion", 0) \
|
||||
|
@ -80,6 +80,7 @@ namespace SettingsChangesHistory
|
||||
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
|
||||
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
|
||||
{
|
||||
{"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}},
|
||||
{"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."},
|
||||
{"http_receive_timeout", 180, 30, "See http_send_timeout."}}},
|
||||
{"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."},
|
||||
|
@ -292,7 +292,7 @@ void DatabaseWithOwnTablesBase::shutdown()
|
||||
|
||||
for (const auto & kv : tables_snapshot)
|
||||
{
|
||||
kv.second->flush();
|
||||
kv.second->flushAndPrepareForShutdown();
|
||||
}
|
||||
|
||||
for (const auto & kv : tables_snapshot)
|
||||
|
@ -9,7 +9,8 @@
|
||||
#include <Common/assert_cast.h>
|
||||
#include <base/sleep.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -40,11 +41,17 @@ enum class FunctionSleepVariant
|
||||
template <FunctionSleepVariant variant>
|
||||
class FunctionSleep : public IFunction
|
||||
{
|
||||
private:
|
||||
UInt64 max_microseconds;
|
||||
public:
|
||||
static constexpr auto name = variant == FunctionSleepVariant::PerBlock ? "sleep" : "sleepEachRow";
|
||||
static FunctionPtr create(ContextPtr)
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
return std::make_shared<FunctionSleep<variant>>(context->getSettingsRef().function_sleep_max_microseconds_per_block);
|
||||
}
|
||||
|
||||
FunctionSleep(UInt64 max_microseconds_) : max_microseconds(max_microseconds_)
|
||||
{
|
||||
return std::make_shared<FunctionSleep<variant>>();
|
||||
}
|
||||
|
||||
/// Get the name of the function.
|
||||
@ -105,13 +112,19 @@ public:
|
||||
if (size > 0)
|
||||
{
|
||||
/// When sleeping, the query cannot be cancelled. For ability to cancel query, we limit sleep time.
|
||||
if (seconds > 3.0) /// The choice is arbitrary
|
||||
throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is 3 seconds. Requested: {}", toString(seconds));
|
||||
if (max_microseconds && seconds * 1e6 > max_microseconds)
|
||||
throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is {} microseconds. Requested: {}", max_microseconds, seconds);
|
||||
|
||||
if (!dry_run)
|
||||
{
|
||||
UInt64 count = (variant == FunctionSleepVariant::PerBlock ? 1 : size);
|
||||
UInt64 microseconds = static_cast<UInt64>(seconds * count * 1e6);
|
||||
|
||||
if (max_microseconds && microseconds > max_microseconds)
|
||||
throw Exception(ErrorCodes::TOO_SLOW,
|
||||
"The maximum sleep time is {} microseconds. Requested: {} microseconds per block (of size {})",
|
||||
max_microseconds, microseconds, size);
|
||||
|
||||
sleepForMicroseconds(microseconds);
|
||||
ProfileEvents::increment(ProfileEvents::SleepFunctionCalls, count);
|
||||
ProfileEvents::increment(ProfileEvents::SleepFunctionMicroseconds, microseconds);
|
||||
|
@ -40,9 +40,10 @@ struct ReadProgress
|
||||
UInt64 read_rows = 0;
|
||||
UInt64 read_bytes = 0;
|
||||
UInt64 total_rows_to_read = 0;
|
||||
UInt64 total_bytes_to_read = 0;
|
||||
|
||||
ReadProgress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0)
|
||||
: read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_) {}
|
||||
ReadProgress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0, UInt64 total_bytes_to_read_ = 0)
|
||||
: read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_), total_bytes_to_read(total_bytes_to_read_) {}
|
||||
};
|
||||
|
||||
struct WriteProgress
|
||||
@ -98,8 +99,8 @@ struct Progress
|
||||
|
||||
Progress() = default;
|
||||
|
||||
Progress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0)
|
||||
: read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_) {}
|
||||
Progress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0, UInt64 total_bytes_to_read_ = 0)
|
||||
: read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_), total_bytes_to_read(total_bytes_to_read_) {}
|
||||
|
||||
explicit Progress(ReadProgress read_progress)
|
||||
: read_rows(read_progress.read_rows), read_bytes(read_progress.read_bytes), total_rows_to_read(read_progress.total_rows_to_read) {}
|
||||
|
@ -42,7 +42,7 @@ void ReadBufferFromFileBase::setProgressCallback(ContextPtr context)
|
||||
|
||||
setProfileCallback([file_progress_callback](const ProfileInfo & progress)
|
||||
{
|
||||
file_progress_callback(FileProgress(progress.bytes_read, 0));
|
||||
file_progress_callback(FileProgress(progress.bytes_read));
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -2020,7 +2020,8 @@ template <typename Method, bool use_compiled_functions, bool return_single_block
|
||||
Aggregator::ConvertToBlockRes<return_single_block> NO_INLINE
|
||||
Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t) const
|
||||
{
|
||||
const size_t max_block_size = params.max_block_size;
|
||||
/// +1 for nullKeyData, if `data` doesn't have it - not a problem, just some memory for one excessive row will be preallocated
|
||||
const size_t max_block_size = (return_single_block ? data.size() : std::min(params.max_block_size, data.size())) + 1;
|
||||
const bool final = true;
|
||||
ConvertToBlockRes<return_single_block> res;
|
||||
|
||||
@ -2097,7 +2098,8 @@ template <bool return_single_block, typename Method, typename Table>
|
||||
Aggregator::ConvertToBlockRes<return_single_block> NO_INLINE
|
||||
Aggregator::convertToBlockImplNotFinal(Method & method, Table & data, Arenas & aggregates_pools, size_t) const
|
||||
{
|
||||
const size_t max_block_size = params.max_block_size;
|
||||
/// +1 for nullKeyData, if `data` doesn't have it - not a problem, just some memory for one excessive row will be preallocated
|
||||
const size_t max_block_size = (return_single_block ? data.size() : std::min(params.max_block_size, data.size())) + 1;
|
||||
const bool final = false;
|
||||
ConvertToBlockRes<return_single_block> res;
|
||||
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <Core/BackgroundSchedulePool.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <Server/ServerType.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/MarkCache.h>
|
||||
#include <Storages/MergeTree/MergeList.h>
|
||||
@ -357,6 +358,9 @@ struct ContextSharedPart : boost::noncopyable
|
||||
|
||||
Context::ConfigReloadCallback config_reload_callback;
|
||||
|
||||
Context::StartStopServersCallback start_servers_callback;
|
||||
Context::StartStopServersCallback stop_servers_callback;
|
||||
|
||||
bool is_server_completely_started = false;
|
||||
|
||||
#if USE_ROCKSDB
|
||||
@ -3688,6 +3692,36 @@ void Context::reloadConfig() const
|
||||
shared->config_reload_callback();
|
||||
}
|
||||
|
||||
void Context::setStartServersCallback(StartStopServersCallback && callback)
|
||||
{
|
||||
/// Is initialized at server startup, so lock isn't required. Otherwise use mutex.
|
||||
shared->start_servers_callback = std::move(callback);
|
||||
}
|
||||
|
||||
void Context::setStopServersCallback(StartStopServersCallback && callback)
|
||||
{
|
||||
/// Is initialized at server startup, so lock isn't required. Otherwise use mutex.
|
||||
shared->stop_servers_callback = std::move(callback);
|
||||
}
|
||||
|
||||
void Context::startServers(const ServerType & server_type) const
|
||||
{
|
||||
/// Use mutex if callback may be changed after startup.
|
||||
if (!shared->start_servers_callback)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't start servers because start_servers_callback is not set.");
|
||||
|
||||
shared->start_servers_callback(server_type);
|
||||
}
|
||||
|
||||
void Context::stopServers(const ServerType & server_type) const
|
||||
{
|
||||
/// Use mutex if callback may be changed after startup.
|
||||
if (!shared->stop_servers_callback)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't stop servers because stop_servers_callback is not set.");
|
||||
|
||||
shared->stop_servers_callback(server_type);
|
||||
}
|
||||
|
||||
|
||||
void Context::shutdown()
|
||||
{
|
||||
|
@ -134,6 +134,7 @@ using StoragePolicyPtr = std::shared_ptr<const IStoragePolicy>;
|
||||
using StoragePoliciesMap = std::map<String, StoragePolicyPtr>;
|
||||
class StoragePolicySelector;
|
||||
using StoragePolicySelectorPtr = std::shared_ptr<const StoragePolicySelector>;
|
||||
class ServerType;
|
||||
template <class Queue>
|
||||
class MergeTreeBackgroundExecutor;
|
||||
|
||||
@ -1057,6 +1058,13 @@ public:
|
||||
void setConfigReloadCallback(ConfigReloadCallback && callback);
|
||||
void reloadConfig() const;
|
||||
|
||||
using StartStopServersCallback = std::function<void(const ServerType &)>;
|
||||
void setStartServersCallback(StartStopServersCallback && callback);
|
||||
void setStopServersCallback(StartStopServersCallback && callback);
|
||||
|
||||
void startServers(const ServerType & server_type) const;
|
||||
void stopServers(const ServerType & server_type) const;
|
||||
|
||||
void shutdown();
|
||||
|
||||
bool isInternalQuery() const { return is_internal_query; }
|
||||
|
@ -349,6 +349,15 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
|
||||
|
||||
DatabasePtr database;
|
||||
{
|
||||
// Callers assume that this method doesn't throw exceptions, but getDatabaseName() will throw if there is no database part.
|
||||
// So, fail early and gracefully...
|
||||
if (!table_id.hasDatabase())
|
||||
{
|
||||
if (exception)
|
||||
exception->emplace(Exception(ErrorCodes::UNKNOWN_DATABASE, "Empty database name"));
|
||||
return {};
|
||||
}
|
||||
|
||||
std::lock_guard lock{databases_mutex};
|
||||
auto it = databases.find(table_id.getDatabaseName());
|
||||
if (databases.end() == it)
|
||||
|
@ -361,7 +361,7 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query,
|
||||
std::vector<std::pair<String, bool>> tables_to_drop;
|
||||
for (auto iterator = database->getTablesIterator(table_context); iterator->isValid(); iterator->next())
|
||||
{
|
||||
iterator->table()->flush();
|
||||
iterator->table()->flushAndPrepareForShutdown();
|
||||
tables_to_drop.push_back({iterator->name(), iterator->table()->isDictionary()});
|
||||
}
|
||||
|
||||
|
@ -556,6 +556,14 @@ BlockIO InterpreterSystemQuery::execute()
|
||||
);
|
||||
break;
|
||||
}
|
||||
case Type::STOP_LISTEN:
|
||||
getContext()->checkAccess(AccessType::SYSTEM_LISTEN);
|
||||
getContext()->stopServers(query.server_type);
|
||||
break;
|
||||
case Type::START_LISTEN:
|
||||
getContext()->checkAccess(AccessType::SYSTEM_LISTEN);
|
||||
getContext()->startServers(query.server_type);
|
||||
break;
|
||||
case Type::FLUSH_ASYNC_INSERT_QUEUE:
|
||||
{
|
||||
getContext()->checkAccess(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE);
|
||||
@ -567,9 +575,6 @@ BlockIO InterpreterSystemQuery::execute()
|
||||
queue->flushAll();
|
||||
break;
|
||||
}
|
||||
case Type::STOP_LISTEN_QUERIES:
|
||||
case Type::START_LISTEN_QUERIES:
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not supported yet", query.type);
|
||||
case Type::STOP_THREAD_FUZZER:
|
||||
getContext()->checkAccess(AccessType::SYSTEM_THREAD_FUZZER);
|
||||
ThreadFuzzer::stop();
|
||||
@ -1181,8 +1186,12 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
|
||||
required_access.emplace_back(AccessType::SYSTEM_SYNC_FILE_CACHE);
|
||||
break;
|
||||
}
|
||||
case Type::STOP_LISTEN_QUERIES:
|
||||
case Type::START_LISTEN_QUERIES:
|
||||
case Type::STOP_LISTEN:
|
||||
case Type::START_LISTEN:
|
||||
{
|
||||
required_access.emplace_back(AccessType::SYSTEM_LISTEN);
|
||||
break;
|
||||
}
|
||||
case Type::STOP_THREAD_FUZZER:
|
||||
case Type::START_THREAD_FUZZER:
|
||||
case Type::ENABLE_FAILPOINT:
|
||||
|
@ -220,6 +220,17 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &,
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_none : "");
|
||||
}
|
||||
else if (type == Type::START_LISTEN || type == Type::STOP_LISTEN)
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " " << ServerType::serverTypeToString(server_type.type)
|
||||
<< (settings.hilite ? hilite_none : "");
|
||||
|
||||
if (server_type.type == ServerType::CUSTOM)
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_identifier : "") << " " << backQuoteIfNeed(server_type.custom_name);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Parsers/ASTQueryWithOnCluster.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/SyncReplicaMode.h>
|
||||
#include <Server/ServerType.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
@ -35,8 +36,8 @@ public:
|
||||
#if USE_AWS_S3
|
||||
DROP_S3_CLIENT_CACHE,
|
||||
#endif
|
||||
STOP_LISTEN_QUERIES,
|
||||
START_LISTEN_QUERIES,
|
||||
STOP_LISTEN,
|
||||
START_LISTEN,
|
||||
RESTART_REPLICAS,
|
||||
RESTART_REPLICA,
|
||||
RESTORE_REPLICA,
|
||||
@ -116,6 +117,8 @@ public:
|
||||
|
||||
SyncReplicaMode sync_replica_mode = SyncReplicaMode::DEFAULT;
|
||||
|
||||
ServerType server_type;
|
||||
|
||||
String getID(char) const override { return "SYSTEM query"; }
|
||||
|
||||
ASTPtr clone() const override
|
||||
|
@ -442,6 +442,42 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
|
||||
break;
|
||||
}
|
||||
|
||||
case Type::START_LISTEN:
|
||||
case Type::STOP_LISTEN:
|
||||
{
|
||||
if (!parseQueryWithOnCluster(res, pos, expected))
|
||||
return false;
|
||||
|
||||
ServerType::Type current_type = ServerType::Type::END;
|
||||
std::string current_custom_name;
|
||||
|
||||
for (const auto & type : magic_enum::enum_values<ServerType::Type>())
|
||||
{
|
||||
if (ParserKeyword{ServerType::serverTypeToString(type)}.ignore(pos, expected))
|
||||
{
|
||||
current_type = type;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (current_type == ServerType::Type::END)
|
||||
return false;
|
||||
|
||||
if (current_type == ServerType::CUSTOM)
|
||||
{
|
||||
ASTPtr ast;
|
||||
|
||||
if (!ParserStringLiteral{}.parse(pos, ast, expected))
|
||||
return false;
|
||||
|
||||
current_custom_name = ast->as<ASTLiteral &>().value.get<const String &>();
|
||||
}
|
||||
|
||||
res->server_type = ServerType(current_type, current_custom_name);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
if (!parseQueryWithOnCluster(res, pos, expected))
|
||||
|
@ -3,8 +3,8 @@ set(SRCS)
|
||||
clickhouse_add_executable(lexer lexer.cpp ${SRCS})
|
||||
target_link_libraries(lexer PRIVATE clickhouse_parsers)
|
||||
|
||||
clickhouse_add_executable(select_parser select_parser.cpp ${SRCS})
|
||||
clickhouse_add_executable(select_parser select_parser.cpp ${SRCS} "../../Server/ServerType.cpp")
|
||||
target_link_libraries(select_parser PRIVATE clickhouse_parsers)
|
||||
|
||||
clickhouse_add_executable(create_parser create_parser.cpp ${SRCS})
|
||||
clickhouse_add_executable(create_parser create_parser.cpp ${SRCS} "../../Server/ServerType.cpp")
|
||||
target_link_libraries(create_parser PRIVATE clickhouse_parsers)
|
||||
|
@ -56,6 +56,9 @@ static void executeJob(ExecutingGraph::Node * node, ReadProgressCallback * read_
|
||||
if (read_progress->counters.total_rows_approx)
|
||||
read_progress_callback->addTotalRowsApprox(read_progress->counters.total_rows_approx);
|
||||
|
||||
if (read_progress->counters.total_bytes)
|
||||
read_progress_callback->addTotalBytes(read_progress->counters.total_bytes);
|
||||
|
||||
if (!read_progress_callback->onProgress(read_progress->counters.read_rows, read_progress->counters.read_bytes, read_progress->limits))
|
||||
node->processor->cancel();
|
||||
}
|
||||
|
@ -85,7 +85,7 @@ private:
|
||||
size_t num_errors = 0;
|
||||
|
||||
BlockMissingValues block_missing_values;
|
||||
size_t approx_bytes_read_for_chunk;
|
||||
size_t approx_bytes_read_for_chunk = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -50,7 +50,7 @@ private:
|
||||
int record_batch_current = 0;
|
||||
|
||||
BlockMissingValues block_missing_values;
|
||||
size_t approx_bytes_read_for_chunk;
|
||||
size_t approx_bytes_read_for_chunk = 0;
|
||||
|
||||
const FormatSettings format_settings;
|
||||
|
||||
|
@ -67,7 +67,7 @@ protected:
|
||||
Serializations serializations;
|
||||
std::unique_ptr<JSONColumnsReaderBase> reader;
|
||||
BlockMissingValues block_missing_values;
|
||||
size_t approx_bytes_read_for_chunk;
|
||||
size_t approx_bytes_read_for_chunk = 0;
|
||||
};
|
||||
|
||||
|
||||
|
@ -66,7 +66,7 @@ private:
|
||||
std::unique_ptr<NativeReader> reader;
|
||||
Block header;
|
||||
BlockMissingValues block_missing_values;
|
||||
size_t approx_bytes_read_for_chunk;
|
||||
size_t approx_bytes_read_for_chunk = 0;
|
||||
};
|
||||
|
||||
class NativeOutputFormat final : public IOutputFormat
|
||||
|
@ -52,7 +52,7 @@ private:
|
||||
std::vector<int> include_indices;
|
||||
|
||||
BlockMissingValues block_missing_values;
|
||||
size_t approx_bytes_read_for_chunk;
|
||||
size_t approx_bytes_read_for_chunk = 0;
|
||||
|
||||
const FormatSettings format_settings;
|
||||
const std::unordered_set<int> & skip_stripes;
|
||||
|
@ -202,7 +202,7 @@ private:
|
||||
const size_t max_block_size;
|
||||
|
||||
BlockMissingValues last_block_missing_values;
|
||||
size_t last_approx_bytes_read_for_chunk;
|
||||
size_t last_approx_bytes_read_for_chunk = 0;
|
||||
|
||||
/// Non-atomic because it is used in one thread.
|
||||
std::optional<size_t> next_block_in_current_unit;
|
||||
|
@ -273,7 +273,7 @@ private:
|
||||
std::unique_ptr<ThreadPool> pool;
|
||||
|
||||
BlockMissingValues previous_block_missing_values;
|
||||
size_t previous_approx_bytes_read_for_chunk;
|
||||
size_t previous_approx_bytes_read_for_chunk = 0;
|
||||
|
||||
std::exception_ptr background_exception = nullptr;
|
||||
std::atomic<int> is_stopped{0};
|
||||
|
@ -96,7 +96,7 @@ private:
|
||||
Serializations serializations;
|
||||
|
||||
BlockMissingValues block_missing_values;
|
||||
size_t approx_bytes_read_for_chunk;
|
||||
size_t approx_bytes_read_for_chunk = 0;
|
||||
};
|
||||
|
||||
class ValuesSchemaReader : public IRowSchemaReader
|
||||
|
@ -343,6 +343,7 @@ public:
|
||||
uint64_t read_rows = 0;
|
||||
uint64_t read_bytes = 0;
|
||||
uint64_t total_rows_approx = 0;
|
||||
uint64_t total_bytes = 0;
|
||||
};
|
||||
|
||||
struct ReadProgress
|
||||
|
@ -43,6 +43,7 @@ public:
|
||||
std::optional<ReadProgress> getReadProgress() final;
|
||||
|
||||
void addTotalRowsApprox(size_t value) { read_progress.total_rows_approx += value; }
|
||||
void addTotalBytes(size_t value) { read_progress.total_bytes += value; }
|
||||
};
|
||||
|
||||
using SourcePtr = std::shared_ptr<ISource>;
|
||||
|
@ -54,7 +54,7 @@ QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines
|
||||
|
||||
bool JoinStep::allowPushDownToRight() const
|
||||
{
|
||||
return join->pipelineType() == JoinPipelineType::YShaped;
|
||||
return join->pipelineType() == JoinPipelineType::YShaped || join->pipelineType() == JoinPipelineType::FillRightFirst;
|
||||
}
|
||||
|
||||
void JoinStep::describePipeline(FormatSettings & settings) const
|
||||
|
@ -341,6 +341,10 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
|
||||
if (table_join.kind() != JoinKind::Inner && table_join.kind() != JoinKind::Cross && table_join.kind() != kind)
|
||||
return 0;
|
||||
|
||||
/// There is no ASOF Right join, so we're talking about pushing to the right side
|
||||
if (kind == JoinKind::Right && table_join.strictness() == JoinStrictness::Asof)
|
||||
return 0;
|
||||
|
||||
bool is_left = kind == JoinKind::Left;
|
||||
const auto & input_header = is_left ? child->getInputStreams().front().header : child->getInputStreams().back().header;
|
||||
const auto & res_header = child->getOutputStream().header;
|
||||
|
@ -137,6 +137,69 @@ static bool checkAllPartsOnRemoteFS(const RangesInDataParts & parts)
|
||||
return true;
|
||||
}
|
||||
|
||||
/// build sort description for output stream
|
||||
static void updateSortDescriptionForOutputStream(
|
||||
DataStream & output_stream, const Names & sorting_key_columns, const int sort_direction, InputOrderInfoPtr input_order_info, PrewhereInfoPtr prewhere_info)
|
||||
{
|
||||
/// Updating sort description can be done after PREWHERE actions are applied to the header.
|
||||
/// Aftert PREWHERE actions are applied, column names in header can differ from storage column names due to aliases
|
||||
/// To mitigate it, we're trying to build original header and use it to deduce sorting description
|
||||
/// TODO: this approach is fragile, it'd be more robust to update sorting description for the whole plan during plan optimization
|
||||
Block original_header = output_stream.header.cloneEmpty();
|
||||
if (prewhere_info)
|
||||
{
|
||||
if (prewhere_info->prewhere_actions)
|
||||
{
|
||||
FindOriginalNodeForOutputName original_column_finder(prewhere_info->prewhere_actions);
|
||||
for (auto & column : original_header)
|
||||
{
|
||||
const auto * original_node = original_column_finder.find(column.name);
|
||||
if (original_node)
|
||||
column.name = original_node->result_name;
|
||||
}
|
||||
}
|
||||
|
||||
if (prewhere_info->row_level_filter)
|
||||
{
|
||||
FindOriginalNodeForOutputName original_column_finder(prewhere_info->row_level_filter);
|
||||
for (auto & column : original_header)
|
||||
{
|
||||
const auto * original_node = original_column_finder.find(column.name);
|
||||
if (original_node)
|
||||
column.name = original_node->result_name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SortDescription sort_description;
|
||||
const Block & header = output_stream.header;
|
||||
for (const auto & sorting_key : sorting_key_columns)
|
||||
{
|
||||
const auto it = std::find_if(
|
||||
original_header.begin(), original_header.end(), [&sorting_key](const auto & column) { return column.name == sorting_key; });
|
||||
if (it == original_header.end())
|
||||
break;
|
||||
|
||||
const size_t column_pos = std::distance(original_header.begin(), it);
|
||||
sort_description.emplace_back((header.begin() + column_pos)->name, sort_direction);
|
||||
}
|
||||
|
||||
if (!sort_description.empty())
|
||||
{
|
||||
if (input_order_info)
|
||||
{
|
||||
output_stream.sort_scope = DataStream::SortScope::Stream;
|
||||
const size_t used_prefix_of_sorting_key_size = input_order_info->used_prefix_of_sorting_key_size;
|
||||
if (sort_description.size() > used_prefix_of_sorting_key_size)
|
||||
sort_description.resize(used_prefix_of_sorting_key_size);
|
||||
}
|
||||
else
|
||||
output_stream.sort_scope = DataStream::SortScope::Chunk;
|
||||
}
|
||||
|
||||
output_stream.sort_description = std::move(sort_description);
|
||||
}
|
||||
|
||||
void ReadFromMergeTree::AnalysisResult::checkLimits(const Settings & settings, const SelectQueryInfo & query_info_) const
|
||||
{
|
||||
|
||||
@ -250,33 +313,12 @@ ReadFromMergeTree::ReadFromMergeTree(
|
||||
/// Add explicit description.
|
||||
setStepDescription(data.getStorageID().getFullNameNotQuoted());
|
||||
|
||||
{ /// build sort description for output stream
|
||||
SortDescription sort_description;
|
||||
const Names & sorting_key_columns = metadata_for_reading->getSortingKeyColumns();
|
||||
const Block & header = output_stream->header;
|
||||
const int sort_direction = getSortDirection();
|
||||
for (const auto & column_name : sorting_key_columns)
|
||||
{
|
||||
if (std::find_if(header.begin(), header.end(), [&](ColumnWithTypeAndName const & col) { return col.name == column_name; })
|
||||
== header.end())
|
||||
break;
|
||||
sort_description.emplace_back(column_name, sort_direction);
|
||||
}
|
||||
if (!sort_description.empty())
|
||||
{
|
||||
if (query_info.getInputOrderInfo())
|
||||
{
|
||||
output_stream->sort_scope = DataStream::SortScope::Stream;
|
||||
const size_t used_prefix_of_sorting_key_size = query_info.getInputOrderInfo()->used_prefix_of_sorting_key_size;
|
||||
if (sort_description.size() > used_prefix_of_sorting_key_size)
|
||||
sort_description.resize(used_prefix_of_sorting_key_size);
|
||||
}
|
||||
else
|
||||
output_stream->sort_scope = DataStream::SortScope::Chunk;
|
||||
}
|
||||
|
||||
output_stream->sort_description = std::move(sort_description);
|
||||
}
|
||||
updateSortDescriptionForOutputStream(
|
||||
*output_stream,
|
||||
storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(),
|
||||
getSortDirection(),
|
||||
query_info.getInputOrderInfo(),
|
||||
prewhere_info);
|
||||
}
|
||||
|
||||
|
||||
@ -1564,6 +1606,12 @@ void ReadFromMergeTree::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info
|
||||
prewhere_info_value,
|
||||
data.getPartitionValueType(),
|
||||
virt_column_names)};
|
||||
updateSortDescriptionForOutputStream(
|
||||
*output_stream,
|
||||
storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(),
|
||||
getSortDirection(),
|
||||
query_info.getInputOrderInfo(),
|
||||
prewhere_info);
|
||||
}
|
||||
|
||||
bool ReadFromMergeTree::requestOutputEachPartitionThroughSeparatePort()
|
||||
|
@ -77,6 +77,8 @@ std::optional<Chunk> RemoteSource::tryGenerate()
|
||||
{
|
||||
if (value.total_rows_to_read)
|
||||
addTotalRowsApprox(value.total_rows_to_read);
|
||||
if (value.total_bytes_to_read)
|
||||
addTotalBytes(value.total_bytes_to_read);
|
||||
progress(value.read_rows, value.read_bytes);
|
||||
});
|
||||
|
||||
|
@ -63,6 +63,18 @@ bool ReadProgressCallback::onProgress(uint64_t read_rows, uint64_t read_bytes, c
|
||||
process_list_elem->updateProgressIn(total_rows_progress);
|
||||
}
|
||||
|
||||
size_t bytes = 0;
|
||||
if ((bytes = total_bytes.exchange(0)) != 0)
|
||||
{
|
||||
Progress total_bytes_progress = {0, 0, 0, bytes};
|
||||
|
||||
if (progress_callback)
|
||||
progress_callback(total_bytes_progress);
|
||||
|
||||
if (process_list_elem)
|
||||
process_list_elem->updateProgressIn(total_bytes_progress);
|
||||
}
|
||||
|
||||
Progress value {read_rows, read_bytes};
|
||||
|
||||
if (progress_callback)
|
||||
|
@ -23,6 +23,7 @@ public:
|
||||
void setProcessListElement(QueryStatusPtr elem);
|
||||
void setProgressCallback(const ProgressCallback & callback) { progress_callback = callback; }
|
||||
void addTotalRowsApprox(size_t value) { total_rows_approx += value; }
|
||||
void addTotalBytes(size_t value) { total_bytes += value; }
|
||||
|
||||
/// Skip updating profile events.
|
||||
/// For merges in mutations it may need special logic, it's done inside ProgressCallback.
|
||||
@ -37,6 +38,8 @@ private:
|
||||
|
||||
/// The approximate total number of rows to read. For progress bar.
|
||||
std::atomic_size_t total_rows_approx = 0;
|
||||
/// The total number of bytes to read. For progress bar.
|
||||
std::atomic_size_t total_bytes = 0;
|
||||
|
||||
std::mutex limits_and_quotas_mutex;
|
||||
Stopwatch total_stopwatch{CLOCK_MONOTONIC_COARSE}; /// Including waiting time
|
||||
|
@ -591,8 +591,8 @@ void RemoteQueryExecutor::finish()
|
||||
/// Send the request to abort the execution of the request, if not already sent.
|
||||
tryCancel("Cancelling query because enough data has been read");
|
||||
|
||||
/// If connections weren't created yet or query wasn't sent, nothing to do.
|
||||
if (!connections || !sent_query)
|
||||
/// If connections weren't created yet, query wasn't sent or was already finished, nothing to do.
|
||||
if (!connections || !sent_query || finished)
|
||||
return;
|
||||
|
||||
/// Get the remaining packets so that there is no out of sync in the connections to the replicas.
|
||||
|
138
src/Server/ServerType.cpp
Normal file
138
src/Server/ServerType.cpp
Normal file
@ -0,0 +1,138 @@
|
||||
#include <Server/ServerType.h>
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <base/types.h>
|
||||
|
||||
#include <magic_enum.hpp>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
std::vector<std::string> getTypeIndexToTypeName()
|
||||
{
|
||||
constexpr std::size_t types_size = magic_enum::enum_count<ServerType::Type>();
|
||||
|
||||
std::vector<std::string> type_index_to_type_name;
|
||||
type_index_to_type_name.resize(types_size);
|
||||
|
||||
auto entries = magic_enum::enum_entries<ServerType::Type>();
|
||||
for (const auto & [entry, str] : entries)
|
||||
{
|
||||
auto str_copy = String(str);
|
||||
std::replace(str_copy.begin(), str_copy.end(), '_', ' ');
|
||||
type_index_to_type_name[static_cast<UInt64>(entry)] = std::move(str_copy);
|
||||
}
|
||||
|
||||
return type_index_to_type_name;
|
||||
}
|
||||
}
|
||||
|
||||
const char * ServerType::serverTypeToString(ServerType::Type type)
|
||||
{
|
||||
/** During parsing if SystemQuery is not parsed properly it is added to Expected variants as description check IParser.h.
|
||||
* Description string must be statically allocated.
|
||||
*/
|
||||
static std::vector<std::string> type_index_to_type_name = getTypeIndexToTypeName();
|
||||
const auto & type_name = type_index_to_type_name[static_cast<UInt64>(type)];
|
||||
return type_name.data();
|
||||
}
|
||||
|
||||
bool ServerType::shouldStart(Type server_type, const std::string & custom_name_) const
|
||||
{
|
||||
if (type == Type::QUERIES_ALL)
|
||||
return true;
|
||||
|
||||
if (type == Type::QUERIES_DEFAULT)
|
||||
{
|
||||
switch (server_type)
|
||||
{
|
||||
case Type::TCP:
|
||||
case Type::TCP_WITH_PROXY:
|
||||
case Type::TCP_SECURE:
|
||||
case Type::HTTP:
|
||||
case Type::HTTPS:
|
||||
case Type::MYSQL:
|
||||
case Type::GRPC:
|
||||
case Type::POSTGRESQL:
|
||||
case Type::PROMETHEUS:
|
||||
case Type::INTERSERVER_HTTP:
|
||||
case Type::INTERSERVER_HTTPS:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (type == Type::QUERIES_CUSTOM)
|
||||
{
|
||||
switch (server_type)
|
||||
{
|
||||
case Type::CUSTOM:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return type == server_type && custom_name == custom_name_;
|
||||
}
|
||||
|
||||
bool ServerType::shouldStop(const std::string & port_name) const
|
||||
{
|
||||
Type port_type;
|
||||
std::string port_custom_name;
|
||||
|
||||
if (port_name == "http_port")
|
||||
port_type = Type::HTTP;
|
||||
|
||||
else if (port_name == "https_port")
|
||||
port_type = Type::HTTPS;
|
||||
|
||||
else if (port_name == "tcp_port")
|
||||
port_type = Type::TCP;
|
||||
|
||||
else if (port_name == "tcp_with_proxy_port")
|
||||
port_type = Type::TCP_WITH_PROXY;
|
||||
|
||||
else if (port_name == "tcp_port_secure")
|
||||
port_type = Type::TCP_SECURE;
|
||||
|
||||
else if (port_name == "mysql_port")
|
||||
port_type = Type::MYSQL;
|
||||
|
||||
else if (port_name == "postgresql_port")
|
||||
port_type = Type::POSTGRESQL;
|
||||
|
||||
else if (port_name == "grpc_port")
|
||||
port_type = Type::GRPC;
|
||||
|
||||
else if (port_name == "prometheus.port")
|
||||
port_type = Type::PROMETHEUS;
|
||||
|
||||
else if (port_name == "interserver_http_port")
|
||||
port_type = Type::INTERSERVER_HTTP;
|
||||
|
||||
else if (port_name == "interserver_https_port")
|
||||
port_type = Type::INTERSERVER_HTTPS;
|
||||
|
||||
else if (port_name.starts_with("protocols.") && port_name.ends_with(".port"))
|
||||
{
|
||||
constexpr size_t protocols_size = std::string_view("protocols.").size();
|
||||
constexpr size_t port_size = std::string_view("protocols.").size();
|
||||
|
||||
port_type = Type::CUSTOM;
|
||||
port_custom_name = port_name.substr(protocols_size, port_name.size() - port_size);
|
||||
}
|
||||
else
|
||||
port_type = Type::UNKNOWN;
|
||||
|
||||
if (port_type == Type::UNKNOWN)
|
||||
return false;
|
||||
|
||||
return shouldStart(type, port_custom_name);
|
||||
}
|
||||
|
||||
}
|
44
src/Server/ServerType.h
Normal file
44
src/Server/ServerType.h
Normal file
@ -0,0 +1,44 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ServerType
|
||||
{
|
||||
public:
|
||||
|
||||
enum Type
|
||||
{
|
||||
UNKNOWN,
|
||||
TCP,
|
||||
TCP_WITH_PROXY,
|
||||
TCP_SECURE,
|
||||
HTTP,
|
||||
HTTPS,
|
||||
MYSQL,
|
||||
GRPC,
|
||||
POSTGRESQL,
|
||||
PROMETHEUS,
|
||||
CUSTOM,
|
||||
INTERSERVER_HTTP,
|
||||
INTERSERVER_HTTPS,
|
||||
QUERIES_ALL,
|
||||
QUERIES_DEFAULT,
|
||||
QUERIES_CUSTOM,
|
||||
END
|
||||
};
|
||||
|
||||
ServerType() = default;
|
||||
explicit ServerType(Type type_, const std::string & custom_name_ = "") : type(type_), custom_name(custom_name_) {}
|
||||
|
||||
static const char * serverTypeToString(Type type);
|
||||
|
||||
bool shouldStart(Type server_type, const std::string & custom_name_ = "") const;
|
||||
bool shouldStop(const std::string & port_name) const;
|
||||
|
||||
Type type;
|
||||
std::string custom_name;
|
||||
};
|
||||
|
||||
}
|
@ -57,7 +57,23 @@ public:
|
||||
~HDFSBuilderWrapper() { hdfsFreeBuilder(hdfs_builder); }
|
||||
|
||||
HDFSBuilderWrapper(const HDFSBuilderWrapper &) = delete;
|
||||
HDFSBuilderWrapper(HDFSBuilderWrapper &&) = default;
|
||||
HDFSBuilderWrapper & operator=(const HDFSBuilderWrapper &) = delete;
|
||||
|
||||
HDFSBuilderWrapper(HDFSBuilderWrapper && other) noexcept
|
||||
{
|
||||
*this = std::move(other);
|
||||
}
|
||||
|
||||
HDFSBuilderWrapper & operator=(HDFSBuilderWrapper && other) noexcept
|
||||
{
|
||||
std::swap(hdfs_builder, other.hdfs_builder);
|
||||
config_stor = std::move(other.config_stor);
|
||||
hadoop_kerberos_keytab = std::move(other.hadoop_kerberos_keytab);
|
||||
hadoop_kerberos_principal = std::move(other.hadoop_kerberos_principal);
|
||||
hadoop_security_kerberos_ticket_cache_path = std::move(other.hadoop_security_kerberos_ticket_cache_path);
|
||||
need_kinit = std::move(other.need_kinit);
|
||||
return *this;
|
||||
}
|
||||
|
||||
hdfsBuilder * get() { return hdfs_builder; }
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#if USE_HDFS
|
||||
#include <Storages/HDFS/HDFSCommon.h>
|
||||
#include <IO/ResourceGuard.h>
|
||||
#include <IO/Progress.h>
|
||||
#include <Common/Throttler.h>
|
||||
#include <Common/safe_cast.h>
|
||||
#include <hdfs/hdfs.h>
|
||||
@ -42,19 +43,23 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
|
||||
off_t file_offset = 0;
|
||||
off_t read_until_position = 0;
|
||||
|
||||
std::optional<size_t> file_size;
|
||||
|
||||
explicit ReadBufferFromHDFSImpl(
|
||||
const std::string & hdfs_uri_,
|
||||
const std::string & hdfs_file_path_,
|
||||
const Poco::Util::AbstractConfiguration & config_,
|
||||
const ReadSettings & read_settings_,
|
||||
size_t read_until_position_,
|
||||
bool use_external_buffer_)
|
||||
bool use_external_buffer_,
|
||||
std::optional<size_t> file_size_)
|
||||
: BufferWithOwnMemory<SeekableReadBuffer>(use_external_buffer_ ? 0 : read_settings_.remote_fs_buffer_size)
|
||||
, hdfs_uri(hdfs_uri_)
|
||||
, hdfs_file_path(hdfs_file_path_)
|
||||
, builder(createHDFSBuilder(hdfs_uri_, config_))
|
||||
, read_settings(read_settings_)
|
||||
, read_until_position(read_until_position_)
|
||||
, file_size(file_size_)
|
||||
{
|
||||
fs = createHDFSFS(builder.get());
|
||||
fin = hdfsOpenFile(fs.get(), hdfs_file_path.c_str(), O_RDONLY, 0, 0, 0);
|
||||
@ -70,12 +75,16 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
|
||||
hdfsCloseFile(fs.get(), fin);
|
||||
}
|
||||
|
||||
size_t getFileSize() const
|
||||
size_t getFileSize()
|
||||
{
|
||||
if (file_size)
|
||||
return *file_size;
|
||||
|
||||
auto * file_info = hdfsGetPathInfo(fs.get(), hdfs_file_path.c_str());
|
||||
if (!file_info)
|
||||
throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for: {}", hdfs_file_path);
|
||||
return file_info->mSize;
|
||||
file_size = static_cast<size_t>(file_info->mSize);
|
||||
return *file_size;
|
||||
}
|
||||
|
||||
bool nextImpl() override
|
||||
@ -156,10 +165,11 @@ ReadBufferFromHDFS::ReadBufferFromHDFS(
|
||||
const Poco::Util::AbstractConfiguration & config_,
|
||||
const ReadSettings & read_settings_,
|
||||
size_t read_until_position_,
|
||||
bool use_external_buffer_)
|
||||
bool use_external_buffer_,
|
||||
std::optional<size_t> file_size_)
|
||||
: ReadBufferFromFileBase(read_settings_.remote_fs_buffer_size, nullptr, 0)
|
||||
, impl(std::make_unique<ReadBufferFromHDFSImpl>(
|
||||
hdfs_uri_, hdfs_file_path_, config_, read_settings_, read_until_position_, use_external_buffer_))
|
||||
hdfs_uri_, hdfs_file_path_, config_, read_settings_, read_until_position_, use_external_buffer_, file_size_))
|
||||
, use_external_buffer(use_external_buffer_)
|
||||
{
|
||||
}
|
||||
|
@ -29,7 +29,8 @@ public:
|
||||
const Poco::Util::AbstractConfiguration & config_,
|
||||
const ReadSettings & read_settings_,
|
||||
size_t read_until_position_ = 0,
|
||||
bool use_external_buffer = false);
|
||||
bool use_external_buffer = false,
|
||||
std::optional<size_t> file_size = std::nullopt);
|
||||
|
||||
~ReadBufferFromHDFS() override;
|
||||
|
||||
|
@ -30,7 +30,6 @@
|
||||
#include <Storages/PartitionedSink.h>
|
||||
#include <Storages/getVirtualsForStorage.h>
|
||||
#include <Storages/checkAndGetLiteralArgument.h>
|
||||
#include <Storages/ReadFromStorageProgress.h>
|
||||
|
||||
#include <Formats/ReadSchemaUtils.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
@ -367,8 +366,13 @@ public:
|
||||
{
|
||||
const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
|
||||
uris = getPathsList(path_from_uri, uri_without_path, context_);
|
||||
auto file_progress_callback = context_->getFileProgressCallback();
|
||||
for (auto & elem : uris)
|
||||
{
|
||||
elem.path = uri_without_path + elem.path;
|
||||
if (file_progress_callback && elem.info)
|
||||
file_progress_callback(FileProgress(0, elem.info->size));
|
||||
}
|
||||
uris_iter = uris.begin();
|
||||
}
|
||||
|
||||
@ -389,37 +393,54 @@ private:
|
||||
std::vector<StorageHDFS::PathWithInfo>::iterator uris_iter;
|
||||
};
|
||||
|
||||
class HDFSSource::URISIterator::Impl
|
||||
class HDFSSource::URISIterator::Impl : WithContext
|
||||
{
|
||||
public:
|
||||
explicit Impl(const std::vector<String> & uris_, ContextPtr context)
|
||||
explicit Impl(const std::vector<String> & uris_, ContextPtr context_)
|
||||
: WithContext(context_), uris(uris_), file_progress_callback(context_->getFileProgressCallback())
|
||||
{
|
||||
auto path_and_uri = getPathFromUriAndUriWithoutPath(uris_[0]);
|
||||
HDFSBuilderWrapper builder = createHDFSBuilder(path_and_uri.second + "/", context->getGlobalContext()->getConfigRef());
|
||||
auto fs = createHDFSFS(builder.get());
|
||||
for (const auto & uri : uris_)
|
||||
if (!uris.empty())
|
||||
{
|
||||
path_and_uri = getPathFromUriAndUriWithoutPath(uri);
|
||||
if (!hdfsExists(fs.get(), path_and_uri.first.c_str()))
|
||||
uris.push_back(uri);
|
||||
auto path_and_uri = getPathFromUriAndUriWithoutPath(uris[0]);
|
||||
builder = createHDFSBuilder(path_and_uri.second + "/", getContext()->getGlobalContext()->getConfigRef());
|
||||
fs = createHDFSFS(builder.get());
|
||||
}
|
||||
uris_iter = uris.begin();
|
||||
}
|
||||
|
||||
StorageHDFS::PathWithInfo next()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (uris_iter == uris.end())
|
||||
return {"", {}};
|
||||
auto key = *uris_iter;
|
||||
++uris_iter;
|
||||
return {key, {}};
|
||||
String uri;
|
||||
hdfsFileInfo * hdfs_info;
|
||||
do
|
||||
{
|
||||
size_t current_index = index.fetch_add(1);
|
||||
if (current_index >= uris.size())
|
||||
return {"", {}};
|
||||
|
||||
uri = uris[current_index];
|
||||
auto path_and_uri = getPathFromUriAndUriWithoutPath(uri);
|
||||
hdfs_info = hdfsGetPathInfo(fs.get(), path_and_uri.first.c_str());
|
||||
}
|
||||
/// Skip non-existed files.
|
||||
while (!hdfs_info && String(hdfsGetLastError()).find("FileNotFoundException") != std::string::npos);
|
||||
|
||||
std::optional<StorageHDFS::PathInfo> info;
|
||||
if (hdfs_info)
|
||||
{
|
||||
info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast<size_t>(hdfs_info->mSize)};
|
||||
if (file_progress_callback)
|
||||
file_progress_callback(FileProgress(0, hdfs_info->mSize));
|
||||
}
|
||||
|
||||
return {uri, info};
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex mutex;
|
||||
std::atomic_size_t index = 0;
|
||||
Strings uris;
|
||||
Strings::iterator uris_iter;
|
||||
HDFSBuilderWrapper builder;
|
||||
HDFSFSPtr fs;
|
||||
std::function<void(FileProgress)> file_progress_callback;
|
||||
};
|
||||
|
||||
HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(ContextPtr context_, const String & uri)
|
||||
@ -456,7 +477,7 @@ HDFSSource::HDFSSource(
|
||||
UInt64 max_block_size_,
|
||||
std::shared_ptr<IteratorWrapper> file_iterator_,
|
||||
ColumnsDescription columns_description_)
|
||||
: ISource(getHeader(block_for_format_, requested_virtual_columns_))
|
||||
: ISource(getHeader(block_for_format_, requested_virtual_columns_), false)
|
||||
, WithContext(context_)
|
||||
, storage(std::move(storage_))
|
||||
, block_for_format(block_for_format_)
|
||||
@ -482,13 +503,17 @@ bool HDFSSource::initialize()
|
||||
continue;
|
||||
|
||||
current_path = path_with_info.path;
|
||||
std::optional<size_t> file_size;
|
||||
if (path_with_info.info)
|
||||
file_size = path_with_info.info->size;
|
||||
const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_path);
|
||||
|
||||
auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method);
|
||||
auto impl = std::make_unique<ReadBufferFromHDFS>(
|
||||
uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings());
|
||||
uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings(), 0, false, file_size);
|
||||
if (!skip_empty_files || !impl->eof())
|
||||
{
|
||||
impl->setProgressCallback(getContext());
|
||||
const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max;
|
||||
read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast<int>(zstd_window_log_max));
|
||||
break;
|
||||
@ -497,14 +522,6 @@ bool HDFSSource::initialize()
|
||||
|
||||
current_path = path_with_info.path;
|
||||
|
||||
if (path_with_info.info && path_with_info.info->size)
|
||||
{
|
||||
/// Adjust total_rows_approx_accumulated with new total size.
|
||||
if (total_files_size)
|
||||
total_rows_approx_accumulated = static_cast<size_t>(std::ceil(static_cast<double>(total_files_size + path_with_info.info->size) / total_files_size * total_rows_approx_accumulated));
|
||||
total_files_size += path_with_info.info->size;
|
||||
}
|
||||
|
||||
input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size);
|
||||
|
||||
QueryPipelineBuilder builder;
|
||||
@ -542,14 +559,8 @@ Chunk HDFSSource::generate()
|
||||
{
|
||||
Columns columns = chunk.getColumns();
|
||||
UInt64 num_rows = chunk.getNumRows();
|
||||
|
||||
if (num_rows && total_files_size)
|
||||
{
|
||||
size_t chunk_size = input_format->getApproxBytesReadForChunk();
|
||||
if (!chunk_size)
|
||||
chunk_size = chunk.bytes();
|
||||
updateRowsProgressApprox(*this, num_rows, chunk_size, total_files_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max);
|
||||
}
|
||||
size_t chunk_size = input_format->getApproxBytesReadForChunk();
|
||||
progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
|
||||
|
||||
for (const auto & virtual_column : requested_virtual_columns)
|
||||
{
|
||||
|
@ -169,11 +169,6 @@ private:
|
||||
std::unique_ptr<PullingPipelineExecutor> reader;
|
||||
String current_path;
|
||||
|
||||
UInt64 total_rows_approx_max = 0;
|
||||
size_t total_rows_count_times = 0;
|
||||
UInt64 total_rows_approx_accumulated = 0;
|
||||
size_t total_files_size = 0;
|
||||
|
||||
/// Recreate ReadBuffer and PullingPipelineExecutor for each file.
|
||||
bool initialize();
|
||||
};
|
||||
|
@ -553,15 +553,15 @@ public:
|
||||
/**
|
||||
* If the storage requires some complicated work on destroying,
|
||||
* then you have two virtual methods:
|
||||
* - flush()
|
||||
* - flushAndPrepareForShutdown()
|
||||
* - shutdown()
|
||||
*
|
||||
* @see shutdown()
|
||||
* @see flush()
|
||||
* @see flushAndPrepareForShutdown()
|
||||
*/
|
||||
void flushAndShutdown()
|
||||
{
|
||||
flush();
|
||||
flushAndPrepareForShutdown();
|
||||
shutdown();
|
||||
}
|
||||
|
||||
@ -574,7 +574,7 @@ public:
|
||||
|
||||
/// Called before shutdown() to flush data to underlying storage
|
||||
/// Data in memory need to be persistent
|
||||
virtual void flush() {}
|
||||
virtual void flushAndPrepareForShutdown() {}
|
||||
|
||||
/// Asks table to stop executing some action identified by action_type
|
||||
/// If table does not support such type of lock, and empty lock is returned
|
||||
|
@ -203,6 +203,8 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write
|
||||
sendPartFromMemory(part, out, send_projections);
|
||||
else
|
||||
sendPartFromDisk(part, out, client_protocol_version, false, send_projections);
|
||||
|
||||
data.addLastSentPart(part->info);
|
||||
}
|
||||
catch (const NetException &)
|
||||
{
|
||||
|
@ -5693,6 +5693,10 @@ bool MergeTreeData::supportsLightweightDelete() const
|
||||
auto lock = lockParts();
|
||||
for (const auto & part : data_parts_by_info)
|
||||
{
|
||||
if (part->getState() == MergeTreeDataPartState::Outdated
|
||||
|| part->getState() == MergeTreeDataPartState::Deleting)
|
||||
continue;
|
||||
|
||||
if (!part->supportLightweightDeleteMutate())
|
||||
return false;
|
||||
}
|
||||
|
@ -328,7 +328,10 @@ MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInf
|
||||
for (const auto & range : part.ranges)
|
||||
part_info->sum_marks += range.end - range.begin;
|
||||
|
||||
part_info->approx_size_of_mark = getApproximateSizeOfGranule(*part_info->data_part, column_names);
|
||||
const auto & columns = settings.merge_tree_determine_task_size_by_prewhere_columns && prewhere_info
|
||||
? prewhere_info->prewhere_actions->getRequiredColumnsNames()
|
||||
: column_names;
|
||||
part_info->approx_size_of_mark = getApproximateSizeOfGranule(*part_info->data_part, columns);
|
||||
|
||||
const auto task_columns = getReadTaskColumns(
|
||||
part_reader_info,
|
||||
@ -369,9 +372,9 @@ MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInf
|
||||
}
|
||||
if (prewhere_info)
|
||||
{
|
||||
for (const auto & columns : task_columns.pre_columns)
|
||||
for (const auto & cols : task_columns.pre_columns)
|
||||
{
|
||||
for (const auto & col : columns)
|
||||
for (const auto & col : cols)
|
||||
{
|
||||
const size_t col_size = part.data_part->getColumnSize(col.name).data_compressed;
|
||||
part_info->estimated_memory_usage_for_single_prefetch += std::min<size_t>(col_size, settings.prefetch_buffer_size);
|
||||
|
@ -73,8 +73,10 @@ MergeTreeReadPool::MergeTreeReadPool(
|
||||
size_t total_marks = 0;
|
||||
for (const auto & part : parts_ranges)
|
||||
{
|
||||
total_compressed_bytes += getApproxSizeOfPart(
|
||||
*part.data_part, prewhere_info ? prewhere_info->prewhere_actions->getRequiredColumnsNames() : column_names_);
|
||||
const auto & columns = settings.merge_tree_determine_task_size_by_prewhere_columns && prewhere_info
|
||||
? prewhere_info->prewhere_actions->getRequiredColumnsNames()
|
||||
: column_names_;
|
||||
total_compressed_bytes += getApproxSizeOfPart(*part.data_part, columns);
|
||||
total_marks += part.getMarksCount();
|
||||
}
|
||||
|
||||
|
@ -119,6 +119,7 @@ struct Settings;
|
||||
M(Bool, detach_not_byte_identical_parts, false, "Do not remove non byte-idential parts for ReplicatedMergeTree, instead detach them (maybe useful for further analysis).", 0) \
|
||||
M(UInt64, max_replicated_fetches_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_replicated_sends_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \
|
||||
M(Milliseconds, wait_for_unique_parts_send_before_shutdown_ms, 0, "Before shutdown table will wait for required amount time for unique parts (exist only on current replica) to be fetched by other replicas (0 means disabled).", 0) \
|
||||
\
|
||||
/** Check delay of replicas settings. */ \
|
||||
M(UInt64, min_relative_delay_to_measure, 120, "Calculate relative replica delay only if absolute delay is not less that this value.", 0) \
|
||||
|
@ -576,7 +576,7 @@ int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper
|
||||
/// It's ok if replica became readonly due to connection loss after we got current zookeeper (in this case zookeeper must be expired).
|
||||
/// And it's ok if replica became readonly after shutdown.
|
||||
/// In other cases it's likely that someone called pullLogsToQueue(...) when queue is not initialized yet by RestartingThread.
|
||||
bool not_completely_initialized = storage.is_readonly && !zookeeper->expired() && !storage.shutdown_called;
|
||||
bool not_completely_initialized = storage.is_readonly && !zookeeper->expired() && !storage.shutdown_prepared_called;
|
||||
if (not_completely_initialized)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Tried to pull logs to queue (reason: {}) on readonly replica {}, it's a bug",
|
||||
reason, storage.getStorageID().getNameForLogs());
|
||||
|
@ -329,7 +329,7 @@ void ReplicatedMergeTreeRestartingThread::activateReplica()
|
||||
|
||||
void ReplicatedMergeTreeRestartingThread::partialShutdown(bool part_of_full_shutdown)
|
||||
{
|
||||
setReadonly(part_of_full_shutdown);
|
||||
setReadonly(/* on_shutdown = */ part_of_full_shutdown);
|
||||
storage.partialShutdown();
|
||||
}
|
||||
|
||||
@ -339,10 +339,15 @@ void ReplicatedMergeTreeRestartingThread::shutdown(bool part_of_full_shutdown)
|
||||
/// Stop restarting_thread before stopping other tasks - so that it won't restart them again.
|
||||
need_stop = true;
|
||||
task->deactivate();
|
||||
|
||||
/// Explicitly set the event, because the restarting thread will not set it again
|
||||
if (part_of_full_shutdown)
|
||||
storage.startup_event.set();
|
||||
|
||||
LOG_TRACE(log, "Restarting thread finished");
|
||||
|
||||
/// Stop other tasks.
|
||||
partialShutdown(part_of_full_shutdown);
|
||||
setReadonly(part_of_full_shutdown);
|
||||
|
||||
}
|
||||
|
||||
void ReplicatedMergeTreeRestartingThread::setReadonly(bool on_shutdown)
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <base/types.h>
|
||||
#include <thread>
|
||||
#include <atomic>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -25,6 +26,7 @@ public:
|
||||
|
||||
void start(bool schedule = true)
|
||||
{
|
||||
LOG_TRACE(log, "Starting restating thread, schedule: {}", schedule);
|
||||
if (schedule)
|
||||
task->activateAndSchedule();
|
||||
else
|
||||
@ -36,6 +38,7 @@ public:
|
||||
void shutdown(bool part_of_full_shutdown);
|
||||
|
||||
void run();
|
||||
|
||||
private:
|
||||
StorageReplicatedMergeTree & storage;
|
||||
String log_name;
|
||||
|
@ -1,52 +0,0 @@
|
||||
#include <Storages/ReadFromStorageProgress.h>
|
||||
#include <Processors/ISource.h>
|
||||
#include <QueryPipeline/StreamLocalLimits.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void updateRowsProgressApprox(
|
||||
ISource & source,
|
||||
size_t num_rows,
|
||||
UInt64 chunk_bytes_size,
|
||||
UInt64 total_result_size,
|
||||
UInt64 & total_rows_approx_accumulated,
|
||||
size_t & total_rows_count_times,
|
||||
UInt64 & total_rows_approx_max)
|
||||
{
|
||||
if (!total_result_size)
|
||||
return;
|
||||
|
||||
if (!num_rows)
|
||||
return;
|
||||
|
||||
const auto progress = source.getReadProgress();
|
||||
if (progress && !progress->limits.empty())
|
||||
{
|
||||
for (const auto & limit : progress->limits)
|
||||
{
|
||||
if (limit.leaf_limits.max_rows || limit.leaf_limits.max_bytes
|
||||
|| limit.local_limits.size_limits.max_rows || limit.local_limits.size_limits.max_bytes)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const auto bytes_per_row = std::ceil(static_cast<double>(chunk_bytes_size) / num_rows);
|
||||
size_t total_rows_approx = static_cast<size_t>(std::ceil(static_cast<double>(total_result_size) / bytes_per_row));
|
||||
total_rows_approx_accumulated += total_rows_approx;
|
||||
++total_rows_count_times;
|
||||
total_rows_approx = total_rows_approx_accumulated / total_rows_count_times;
|
||||
|
||||
/// We need to add diff, because total_rows_approx is incremental value.
|
||||
/// It would be more correct to send total_rows_approx as is (not a diff),
|
||||
/// but incrementation of total_rows_to_read does not allow that.
|
||||
/// A new counter can be introduced for that to be sent to client, but it does not worth it.
|
||||
if (total_rows_approx > total_rows_approx_max)
|
||||
{
|
||||
size_t diff = total_rows_approx - total_rows_approx_max;
|
||||
source.addTotalRowsApprox(diff);
|
||||
total_rows_approx_max = total_rows_approx;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
#pragma once
|
||||
#include <Core/Types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ISource;
|
||||
|
||||
void updateRowsProgressApprox(
|
||||
ISource & source,
|
||||
size_t num_rows,
|
||||
UInt64 chunk_bytes_size,
|
||||
UInt64 total_result_size,
|
||||
UInt64 & total_rows_approx_accumulated,
|
||||
size_t & total_rows_count_times,
|
||||
UInt64 & total_rows_approx_max);
|
||||
|
||||
}
|
@ -31,7 +31,6 @@
|
||||
#include <Storages/getVirtualsForStorage.h>
|
||||
#include <Storages/StorageURL.h>
|
||||
#include <Storages/NamedCollectionsHelpers.h>
|
||||
#include <Storages/ReadFromStorageProgress.h>
|
||||
#include <Common/parseGlobs.h>
|
||||
#include <Disks/ObjectStorages/ObjectStorageIterator.h>
|
||||
#include <Disks/IO/AsynchronousBoundedReadBuffer.h>
|
||||
@ -631,13 +630,13 @@ Pipe StorageAzureBlob::read(
|
||||
/// Iterate through disclosed globs and make a source for each file
|
||||
iterator_wrapper = std::make_shared<StorageAzureBlobSource::GlobIterator>(
|
||||
object_storage.get(), configuration.container, configuration.blob_path,
|
||||
query_info.query, virtual_block, local_context, nullptr);
|
||||
query_info.query, virtual_block, local_context, nullptr, local_context->getFileProgressCallback());
|
||||
}
|
||||
else
|
||||
{
|
||||
iterator_wrapper = std::make_shared<StorageAzureBlobSource::KeysIterator>(
|
||||
object_storage.get(), configuration.container, configuration.blobs_paths,
|
||||
query_info.query, virtual_block, local_context, nullptr);
|
||||
query_info.query, virtual_block, local_context, nullptr, local_context->getFileProgressCallback());
|
||||
}
|
||||
|
||||
ColumnsDescription columns_description;
|
||||
@ -807,7 +806,8 @@ StorageAzureBlobSource::GlobIterator::GlobIterator(
|
||||
ASTPtr query_,
|
||||
const Block & virtual_header_,
|
||||
ContextPtr context_,
|
||||
RelativePathsWithMetadata * outer_blobs_)
|
||||
RelativePathsWithMetadata * outer_blobs_,
|
||||
std::function<void(FileProgress)> file_progress_callback_)
|
||||
: IIterator(context_)
|
||||
, object_storage(object_storage_)
|
||||
, container(container_)
|
||||
@ -815,6 +815,7 @@ StorageAzureBlobSource::GlobIterator::GlobIterator(
|
||||
, query(query_)
|
||||
, virtual_header(virtual_header_)
|
||||
, outer_blobs(outer_blobs_)
|
||||
, file_progress_callback(file_progress_callback_)
|
||||
{
|
||||
|
||||
const String key_prefix = blob_path_with_globs.substr(0, blob_path_with_globs.find_first_of("*?{"));
|
||||
@ -893,7 +894,8 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next()
|
||||
blobs_with_metadata.clear();
|
||||
for (UInt64 idx : idxs.getData())
|
||||
{
|
||||
total_size.fetch_add(new_batch[idx].metadata.size_bytes, std::memory_order_relaxed);
|
||||
if (file_progress_callback)
|
||||
file_progress_callback(FileProgress(0, new_batch[idx].metadata.size_bytes));
|
||||
blobs_with_metadata.emplace_back(std::move(new_batch[idx]));
|
||||
if (outer_blobs)
|
||||
outer_blobs->emplace_back(blobs_with_metadata.back());
|
||||
@ -905,8 +907,11 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next()
|
||||
outer_blobs->insert(outer_blobs->end(), new_batch.begin(), new_batch.end());
|
||||
|
||||
blobs_with_metadata = std::move(new_batch);
|
||||
for (const auto & [_, info] : blobs_with_metadata)
|
||||
total_size.fetch_add(info.size_bytes, std::memory_order_relaxed);
|
||||
if (file_progress_callback)
|
||||
{
|
||||
for (const auto & [_, info] : blobs_with_metadata)
|
||||
file_progress_callback(FileProgress(0, info.size_bytes));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -916,11 +921,6 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next()
|
||||
return blobs_with_metadata[current_index];
|
||||
}
|
||||
|
||||
size_t StorageAzureBlobSource::GlobIterator::getTotalSize() const
|
||||
{
|
||||
return total_size.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
|
||||
void StorageAzureBlobSource::GlobIterator::createFilterAST(const String & any_key)
|
||||
{
|
||||
@ -940,17 +940,17 @@ void StorageAzureBlobSource::GlobIterator::createFilterAST(const String & any_ke
|
||||
StorageAzureBlobSource::KeysIterator::KeysIterator(
|
||||
AzureObjectStorage * object_storage_,
|
||||
const std::string & container_,
|
||||
Strings keys_,
|
||||
const Strings & keys_,
|
||||
ASTPtr query_,
|
||||
const Block & virtual_header_,
|
||||
ContextPtr context_,
|
||||
RelativePathsWithMetadata * outer_blobs_)
|
||||
RelativePathsWithMetadata * outer_blobs,
|
||||
std::function<void(FileProgress)> file_progress_callback)
|
||||
: IIterator(context_)
|
||||
, object_storage(object_storage_)
|
||||
, container(container_)
|
||||
, query(query_)
|
||||
, virtual_header(virtual_header_)
|
||||
, outer_blobs(outer_blobs_)
|
||||
{
|
||||
Strings all_keys = keys_;
|
||||
|
||||
@ -986,7 +986,8 @@ StorageAzureBlobSource::KeysIterator::KeysIterator(
|
||||
for (auto && key : all_keys)
|
||||
{
|
||||
ObjectMetadata object_metadata = object_storage->getObjectMetadata(key);
|
||||
total_size += object_metadata.size_bytes;
|
||||
if (file_progress_callback)
|
||||
file_progress_callback(FileProgress(0, object_metadata.size_bytes));
|
||||
keys.emplace_back(RelativePathWithMetadata{key, object_metadata});
|
||||
}
|
||||
|
||||
@ -1003,12 +1004,6 @@ RelativePathWithMetadata StorageAzureBlobSource::KeysIterator::next()
|
||||
return keys[current_index];
|
||||
}
|
||||
|
||||
size_t StorageAzureBlobSource::KeysIterator::getTotalSize() const
|
||||
{
|
||||
return total_size.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
|
||||
Chunk StorageAzureBlobSource::generate()
|
||||
{
|
||||
while (true)
|
||||
@ -1024,17 +1019,10 @@ Chunk StorageAzureBlobSource::generate()
|
||||
if (reader->pull(chunk))
|
||||
{
|
||||
UInt64 num_rows = chunk.getNumRows();
|
||||
size_t chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk();
|
||||
progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
|
||||
|
||||
const auto & file_path = reader.getPath();
|
||||
if (num_rows && total_objects_size)
|
||||
{
|
||||
size_t chunk_size = reader.getFormat()->getApproxBytesReadForChunk();
|
||||
if (!chunk_size)
|
||||
chunk_size = chunk.bytes();
|
||||
updateRowsProgressApprox(
|
||||
*this, num_rows, chunk_size, total_objects_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max);
|
||||
}
|
||||
|
||||
for (const auto & virtual_column : requested_virtual_columns)
|
||||
{
|
||||
if (virtual_column.name == "_path")
|
||||
@ -1059,13 +1047,6 @@ Chunk StorageAzureBlobSource::generate()
|
||||
if (!reader)
|
||||
break;
|
||||
|
||||
size_t object_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0);
|
||||
/// Adjust total_rows_approx_accumulated with new total size.
|
||||
if (total_objects_size)
|
||||
total_rows_approx_accumulated = static_cast<size_t>(
|
||||
std::ceil(static_cast<double>(total_objects_size + object_size) / total_objects_size * total_rows_approx_accumulated));
|
||||
total_objects_size += object_size;
|
||||
|
||||
/// Even if task is finished the thread may be not freed in pool.
|
||||
/// So wait until it will be freed before scheduling a new task.
|
||||
create_reader_pool.wait();
|
||||
@ -1096,7 +1077,7 @@ StorageAzureBlobSource::StorageAzureBlobSource(
|
||||
AzureObjectStorage * object_storage_,
|
||||
const String & container_,
|
||||
std::shared_ptr<IIterator> file_iterator_)
|
||||
:ISource(getHeader(sample_block_, requested_virtual_columns_))
|
||||
:ISource(getHeader(sample_block_, requested_virtual_columns_), false)
|
||||
, WithContext(context_)
|
||||
, requested_virtual_columns(requested_virtual_columns_)
|
||||
, format(format_)
|
||||
@ -1114,13 +1095,7 @@ StorageAzureBlobSource::StorageAzureBlobSource(
|
||||
{
|
||||
reader = createReader();
|
||||
if (reader)
|
||||
{
|
||||
const auto & read_buf = reader.getReadBuffer();
|
||||
if (read_buf)
|
||||
total_objects_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0);
|
||||
|
||||
reader_future = createReaderAsync();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1162,7 +1137,7 @@ StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader()
|
||||
auto pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
|
||||
auto current_reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
|
||||
|
||||
return ReaderHolder{fs::path(container) / current_key, std::move(read_buf), input_format, std::move(pipeline), std::move(current_reader)};
|
||||
return ReaderHolder{fs::path(container) / current_key, std::move(read_buf), std::move(input_format), std::move(pipeline), std::move(current_reader)};
|
||||
}
|
||||
|
||||
std::future<StorageAzureBlobSource::ReaderHolder> StorageAzureBlobSource::createReaderAsync()
|
||||
|
@ -148,7 +148,6 @@ public:
|
||||
IIterator(ContextPtr context_):WithContext(context_) {}
|
||||
virtual ~IIterator() = default;
|
||||
virtual RelativePathWithMetadata next() = 0;
|
||||
virtual size_t getTotalSize() const = 0;
|
||||
|
||||
RelativePathWithMetadata operator ()() { return next(); }
|
||||
};
|
||||
@ -163,10 +162,10 @@ public:
|
||||
ASTPtr query_,
|
||||
const Block & virtual_header_,
|
||||
ContextPtr context_,
|
||||
RelativePathsWithMetadata * outer_blobs_);
|
||||
RelativePathsWithMetadata * outer_blobs_,
|
||||
std::function<void(FileProgress)> file_progress_callback_ = {});
|
||||
|
||||
RelativePathWithMetadata next() override;
|
||||
size_t getTotalSize() const override;
|
||||
~GlobIterator() override = default;
|
||||
|
||||
private:
|
||||
@ -178,7 +177,6 @@ public:
|
||||
Block virtual_header;
|
||||
|
||||
size_t index = 0;
|
||||
std::atomic<size_t> total_size = 0;
|
||||
|
||||
RelativePathsWithMetadata blobs_with_metadata;
|
||||
RelativePathsWithMetadata * outer_blobs;
|
||||
@ -191,6 +189,8 @@ public:
|
||||
bool is_finished = false;
|
||||
bool is_initialized = false;
|
||||
std::mutex next_mutex;
|
||||
|
||||
std::function<void(FileProgress)> file_progress_callback;
|
||||
};
|
||||
|
||||
class KeysIterator : public IIterator
|
||||
@ -199,14 +199,14 @@ public:
|
||||
KeysIterator(
|
||||
AzureObjectStorage * object_storage_,
|
||||
const std::string & container_,
|
||||
Strings keys_,
|
||||
const Strings & keys_,
|
||||
ASTPtr query_,
|
||||
const Block & virtual_header_,
|
||||
ContextPtr context_,
|
||||
RelativePathsWithMetadata * outer_blobs_);
|
||||
RelativePathsWithMetadata * outer_blobs,
|
||||
std::function<void(FileProgress)> file_progress_callback = {});
|
||||
|
||||
RelativePathWithMetadata next() override;
|
||||
size_t getTotalSize() const override;
|
||||
~KeysIterator() override = default;
|
||||
|
||||
private:
|
||||
@ -219,9 +219,6 @@ public:
|
||||
Block virtual_header;
|
||||
|
||||
std::atomic<size_t> index = 0;
|
||||
std::atomic<size_t> total_size = 0;
|
||||
|
||||
RelativePathsWithMetadata * outer_blobs;
|
||||
};
|
||||
|
||||
StorageAzureBlobSource(
|
||||
@ -270,7 +267,7 @@ private:
|
||||
std::unique_ptr<PullingPipelineExecutor> reader_)
|
||||
: path(std::move(path_))
|
||||
, read_buf(std::move(read_buf_))
|
||||
, input_format(input_format_)
|
||||
, input_format(std::move(input_format_))
|
||||
, pipeline(std::move(pipeline_))
|
||||
, reader(std::move(reader_))
|
||||
{
|
||||
@ -301,10 +298,7 @@ private:
|
||||
PullingPipelineExecutor * operator->() { return reader.get(); }
|
||||
const PullingPipelineExecutor * operator->() const { return reader.get(); }
|
||||
const String & getPath() const { return path; }
|
||||
|
||||
const std::unique_ptr<ReadBuffer> & getReadBuffer() const { return read_buf; }
|
||||
|
||||
const std::shared_ptr<IInputFormat> & getFormat() const { return input_format; }
|
||||
const IInputFormat * getInputFormat() const { return input_format.get(); }
|
||||
|
||||
private:
|
||||
String path;
|
||||
@ -322,11 +316,6 @@ private:
|
||||
ThreadPoolCallbackRunner<ReaderHolder> create_reader_scheduler;
|
||||
std::future<ReaderHolder> reader_future;
|
||||
|
||||
UInt64 total_rows_approx_max = 0;
|
||||
size_t total_rows_count_times = 0;
|
||||
UInt64 total_rows_approx_accumulated = 0;
|
||||
size_t total_objects_size = 0;
|
||||
|
||||
/// Recreate ReadBuffer and Pipeline for each file.
|
||||
ReaderHolder createReader();
|
||||
std::future<ReaderHolder> createReaderAsync();
|
||||
|
@ -682,7 +682,7 @@ void StorageBuffer::startup()
|
||||
}
|
||||
|
||||
|
||||
void StorageBuffer::flush()
|
||||
void StorageBuffer::flushAndPrepareForShutdown()
|
||||
{
|
||||
if (!flush_handle)
|
||||
return;
|
||||
|
@ -92,7 +92,7 @@ public:
|
||||
|
||||
void startup() override;
|
||||
/// Flush all buffers into the subordinate table and stop background thread.
|
||||
void flush() override;
|
||||
void flushAndPrepareForShutdown() override;
|
||||
bool optimize(
|
||||
const ASTPtr & query,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
|
@ -1432,7 +1432,7 @@ ActionLock StorageDistributed::getActionLock(StorageActionBlockType type)
|
||||
return {};
|
||||
}
|
||||
|
||||
void StorageDistributed::flush()
|
||||
void StorageDistributed::flushAndPrepareForShutdown()
|
||||
{
|
||||
try
|
||||
{
|
||||
|
@ -135,7 +135,7 @@ public:
|
||||
|
||||
void initializeFromDisk();
|
||||
void shutdown() override;
|
||||
void flush() override;
|
||||
void flushAndPrepareForShutdown() override;
|
||||
void drop() override;
|
||||
|
||||
bool storesDataOnDisk() const override { return data_volume != nullptr; }
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include <Storages/PartitionedSink.h>
|
||||
#include <Storages/Distributed/DistributedAsyncInsertSource.h>
|
||||
#include <Storages/checkAndGetLiteralArgument.h>
|
||||
#include <Storages/ReadFromStorageProgress.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
@ -389,14 +388,6 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
|
||||
|
||||
std::unique_ptr<ReadBuffer> nested_buffer = selectReadBuffer(current_path, use_table_fd, table_fd, file_stat, context);
|
||||
|
||||
/// For clickhouse-local and clickhouse-client add progress callback to display progress bar.
|
||||
if (context->getApplicationType() == Context::ApplicationType::LOCAL
|
||||
|| context->getApplicationType() == Context::ApplicationType::CLIENT)
|
||||
{
|
||||
auto & in = static_cast<ReadBufferFromFileBase &>(*nested_buffer);
|
||||
in.setProgressCallback(context);
|
||||
}
|
||||
|
||||
int zstd_window_log_max = static_cast<int>(context->getSettingsRef().zstd_window_log_max);
|
||||
return wrapReadBufferWithCompressionMethod(std::move(nested_buffer), method, zstd_window_log_max);
|
||||
}
|
||||
@ -701,7 +692,7 @@ public:
|
||||
ColumnsDescription columns_description_,
|
||||
const Block & block_for_format_,
|
||||
std::unique_ptr<ReadBuffer> read_buf_)
|
||||
: ISource(getBlockForSource(block_for_format_, files_info_))
|
||||
: ISource(getBlockForSource(block_for_format_, files_info_), false)
|
||||
, storage(std::move(storage_))
|
||||
, storage_snapshot(storage_snapshot_)
|
||||
, files_info(std::move(files_info_))
|
||||
@ -816,12 +807,6 @@ public:
|
||||
read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, context);
|
||||
}
|
||||
|
||||
size_t file_size = tryGetFileSizeFromReadBuffer(*read_buf).value_or(0);
|
||||
/// Adjust total_rows_approx_accumulated with new total size.
|
||||
if (total_files_size)
|
||||
total_rows_approx_accumulated = static_cast<size_t>(std::ceil(static_cast<double>(total_files_size + file_size) / total_files_size * total_rows_approx_accumulated));
|
||||
total_files_size += file_size;
|
||||
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
chassert(!storage->paths.empty());
|
||||
const auto max_parsing_threads = std::max<size_t>(settings.max_threads/ storage->paths.size(), 1UL);
|
||||
@ -847,6 +832,10 @@ public:
|
||||
if (reader->pull(chunk))
|
||||
{
|
||||
UInt64 num_rows = chunk.getNumRows();
|
||||
size_t chunk_size = 0;
|
||||
if (storage->format_name != "Distributed")
|
||||
chunk_size = input_format->getApproxBytesReadForChunk();
|
||||
progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
|
||||
|
||||
/// Enrich with virtual columns.
|
||||
if (files_info->need_path_column)
|
||||
@ -864,14 +853,6 @@ public:
|
||||
chunk.addColumn(column->convertToFullColumnIfConst());
|
||||
}
|
||||
|
||||
if (num_rows && total_files_size)
|
||||
{
|
||||
size_t chunk_size = input_format->getApproxBytesReadForChunk();
|
||||
if (!chunk_size)
|
||||
chunk_size = chunk.bytes();
|
||||
updateRowsProgressApprox(
|
||||
*this, num_rows, chunk_size, total_files_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max);
|
||||
}
|
||||
return chunk;
|
||||
}
|
||||
|
||||
@ -910,12 +891,6 @@ private:
|
||||
bool finished_generate = false;
|
||||
|
||||
std::shared_lock<std::shared_timed_mutex> shared_lock;
|
||||
|
||||
UInt64 total_rows_approx_accumulated = 0;
|
||||
size_t total_rows_count_times = 0;
|
||||
UInt64 total_rows_approx_max = 0;
|
||||
|
||||
size_t total_files_size = 0;
|
||||
};
|
||||
|
||||
|
||||
|
@ -139,7 +139,7 @@ public:
|
||||
|
||||
void startup() override { getNested()->startup(); }
|
||||
void shutdown() override { getNested()->shutdown(); }
|
||||
void flush() override { getNested()->flush(); }
|
||||
void flushAndPrepareForShutdown() override { getNested()->flushAndPrepareForShutdown(); }
|
||||
|
||||
ActionLock getActionLock(StorageActionBlockType action_type) override { return getNested()->getActionLock(action_type); }
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <cstddef>
|
||||
#include <ranges>
|
||||
#include <chrono>
|
||||
|
||||
#include <base/hex.h>
|
||||
#include <base/interpolate.h>
|
||||
@ -185,6 +186,7 @@ namespace ErrorCodes
|
||||
extern const int CHECKSUM_DOESNT_MATCH;
|
||||
extern const int NOT_INITIALIZED;
|
||||
extern const int TOO_LARGE_DISTRIBUTED_DEPTH;
|
||||
extern const int TABLE_IS_DROPPED;
|
||||
}
|
||||
|
||||
namespace ActionLocks
|
||||
@ -3921,7 +3923,10 @@ void StorageReplicatedMergeTree::startBeingLeader()
|
||||
void StorageReplicatedMergeTree::stopBeingLeader()
|
||||
{
|
||||
if (!is_leader)
|
||||
{
|
||||
LOG_TRACE(log, "stopBeingLeader called but we are not a leader already");
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_INFO(log, "Stopped being leader");
|
||||
is_leader = false;
|
||||
@ -3978,6 +3983,153 @@ String StorageReplicatedMergeTree::findReplicaHavingPart(const String & part_nam
|
||||
return {};
|
||||
}
|
||||
|
||||
void StorageReplicatedMergeTree::addLastSentPart(const MergeTreePartInfo & info)
|
||||
{
|
||||
{
|
||||
std::lock_guard lock(last_sent_parts_mutex);
|
||||
last_sent_parts.emplace_back(info);
|
||||
static constexpr size_t LAST_SENT_PARS_WINDOW_SIZE = 1000;
|
||||
while (last_sent_parts.size() > LAST_SENT_PARS_WINDOW_SIZE)
|
||||
last_sent_parts.pop_front();
|
||||
}
|
||||
|
||||
last_sent_parts_cv.notify_all();
|
||||
}
|
||||
|
||||
void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(StorageReplicatedMergeTree::ShutdownDeadline shutdown_deadline_)
|
||||
{
|
||||
/// Will be true in case in case of query
|
||||
if (CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr)
|
||||
{
|
||||
LOG_TRACE(log, "Will not wait for unique parts to be fetched by other replicas because shutdown called from DROP/DETACH query");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!shutdown_called.load())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Called waitForUniquePartsToBeFetchedByOtherReplicas before shutdown, it's a bug");
|
||||
|
||||
auto settings_ptr = getSettings();
|
||||
|
||||
auto wait_ms = settings_ptr->wait_for_unique_parts_send_before_shutdown_ms.totalMilliseconds();
|
||||
if (wait_ms == 0)
|
||||
{
|
||||
LOG_INFO(log, "Will not wait for unique parts to be fetched by other replicas because wait time is zero");
|
||||
return;
|
||||
}
|
||||
|
||||
if (shutdown_deadline_ <= std::chrono::system_clock::now())
|
||||
{
|
||||
LOG_INFO(log, "Will not wait for unique parts to be fetched by other replicas because shutdown_deadline already passed");
|
||||
return;
|
||||
}
|
||||
|
||||
auto zookeeper = getZooKeeperIfTableShutDown();
|
||||
|
||||
auto unique_parts_set = findReplicaUniqueParts(replica_name, zookeeper_path, format_version, zookeeper, log);
|
||||
if (unique_parts_set.empty())
|
||||
{
|
||||
LOG_INFO(log, "Will not wait for unique parts to be fetched because we don't have any unique parts");
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_INFO(log, "Will wait for {} unique parts to be fetched", unique_parts_set.size());
|
||||
}
|
||||
|
||||
auto wait_predicate = [&] () -> bool
|
||||
{
|
||||
for (auto it = unique_parts_set.begin(); it != unique_parts_set.end();)
|
||||
{
|
||||
const auto & part = *it;
|
||||
|
||||
bool found = false;
|
||||
for (const auto & sent_part : last_sent_parts | std::views::reverse)
|
||||
{
|
||||
if (sent_part.contains(part))
|
||||
{
|
||||
LOG_TRACE(log, "Part {} was fetched by some replica", part.getPartNameForLogs());
|
||||
found = true;
|
||||
it = unique_parts_set.erase(it);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
break;
|
||||
}
|
||||
return unique_parts_set.empty();
|
||||
};
|
||||
|
||||
std::unique_lock lock(last_sent_parts_mutex);
|
||||
if (!last_sent_parts_cv.wait_until(lock, shutdown_deadline_, wait_predicate))
|
||||
LOG_INFO(log, "Failed to wait for unique parts to be fetched in {} ms, {} parts can be left on this replica", wait_ms, unique_parts_set.size());
|
||||
else
|
||||
LOG_INFO(log, "Successfully waited all the parts");
|
||||
}
|
||||
|
||||
std::set<MergeTreePartInfo> StorageReplicatedMergeTree::findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_)
|
||||
{
|
||||
if (!zookeeper_->exists(fs::path(zookeeper_path_) / "replicas" / replica_name_ / "is_active"))
|
||||
{
|
||||
LOG_INFO(log_, "Our replica is not active, nobody will try to fetch anything");
|
||||
return {};
|
||||
}
|
||||
|
||||
Strings replicas = zookeeper_->getChildren(fs::path(zookeeper_path_) / "replicas");
|
||||
Strings our_parts;
|
||||
std::vector<ActiveDataPartSet> data_parts_on_replicas;
|
||||
for (const String & replica : replicas)
|
||||
{
|
||||
if (!zookeeper_->exists(fs::path(zookeeper_path_) / "replicas" / replica / "is_active"))
|
||||
{
|
||||
LOG_TRACE(log_, "Replica {} is not active, skipping", replica);
|
||||
continue;
|
||||
}
|
||||
|
||||
Strings parts = zookeeper_->getChildren(fs::path(zookeeper_path_) / "replicas" / replica / "parts");
|
||||
if (replica == replica_name_)
|
||||
{
|
||||
LOG_TRACE(log_, "Our replica parts collected {}", replica);
|
||||
our_parts = parts;
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_TRACE(log_, "Fetching parts for replica {}: [{}]", replica, fmt::join(parts, ", "));
|
||||
data_parts_on_replicas.emplace_back(format_version_, parts);
|
||||
}
|
||||
}
|
||||
|
||||
if (data_parts_on_replicas.empty())
|
||||
{
|
||||
LOG_TRACE(log_, "Has no active replicas, will no try to wait for fetch");
|
||||
return {};
|
||||
}
|
||||
|
||||
std::set<MergeTreePartInfo> our_unique_parts;
|
||||
for (const auto & part : our_parts)
|
||||
{
|
||||
bool found = false;
|
||||
for (const auto & active_parts_set : data_parts_on_replicas)
|
||||
{
|
||||
if (!active_parts_set.getContainingPart(part).empty())
|
||||
{
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found)
|
||||
{
|
||||
LOG_TRACE(log_, "Part not {} found on other replicas", part);
|
||||
our_unique_parts.emplace(MergeTreePartInfo::fromPartName(part, format_version_));
|
||||
}
|
||||
}
|
||||
|
||||
if (!our_parts.empty() && our_unique_parts.empty())
|
||||
LOG_TRACE(log_, "All parts found on replicas");
|
||||
|
||||
return our_unique_parts;
|
||||
}
|
||||
|
||||
String StorageReplicatedMergeTree::findReplicaHavingCoveringPart(LogEntry & entry, bool active)
|
||||
{
|
||||
auto zookeeper = getZooKeeper();
|
||||
@ -4637,6 +4789,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::fetchExistsPart(
|
||||
|
||||
void StorageReplicatedMergeTree::startup()
|
||||
{
|
||||
LOG_TRACE(log, "Starting up table");
|
||||
startOutdatedDataPartsLoadingTask();
|
||||
if (attach_thread)
|
||||
{
|
||||
@ -4658,6 +4811,8 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread)
|
||||
since_metadata_err_incr_readonly_metric = true;
|
||||
CurrentMetrics::add(CurrentMetrics::ReadonlyReplica);
|
||||
}
|
||||
|
||||
LOG_TRACE(log, "No connection to ZooKeeper or no metadata in ZooKeeper, will not startup");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -4692,6 +4847,7 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread)
|
||||
|
||||
if (from_attach_thread)
|
||||
{
|
||||
LOG_TRACE(log, "Trying to startup table from right now");
|
||||
/// Try activating replica in current thread.
|
||||
restarting_thread.run();
|
||||
}
|
||||
@ -4701,9 +4857,18 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread)
|
||||
/// NOTE It does not mean that replication is actually started after receiving this event.
|
||||
/// It only means that an attempt to startup replication was made.
|
||||
/// Table may be still in readonly mode if this attempt failed for any reason.
|
||||
startup_event.wait();
|
||||
while (!startup_event.tryWait(10 * 1000))
|
||||
LOG_TRACE(log, "Waiting for RestartingThread to startup table");
|
||||
}
|
||||
|
||||
auto lock = std::unique_lock<std::mutex>(flush_and_shutdown_mutex, std::defer_lock);
|
||||
do
|
||||
{
|
||||
if (shutdown_prepared_called.load() || shutdown_called.load())
|
||||
throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Cannot startup table because it is dropped");
|
||||
}
|
||||
while (!lock.try_lock());
|
||||
|
||||
/// And this is just a callback
|
||||
session_expired_callback_handler = EventNotifier::instance().subscribe(Coordination::Error::ZSESSIONEXPIRED, [this]()
|
||||
{
|
||||
@ -4744,6 +4909,37 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread)
|
||||
}
|
||||
|
||||
|
||||
void StorageReplicatedMergeTree::flushAndPrepareForShutdown()
|
||||
{
|
||||
std::lock_guard lock{flush_and_shutdown_mutex};
|
||||
if (shutdown_prepared_called.exchange(true))
|
||||
return;
|
||||
|
||||
try
|
||||
{
|
||||
auto settings_ptr = getSettings();
|
||||
/// Cancel fetches, merges and mutations to force the queue_task to finish ASAP.
|
||||
fetcher.blocker.cancelForever();
|
||||
merger_mutator.merges_blocker.cancelForever();
|
||||
parts_mover.moves_blocker.cancelForever();
|
||||
stopBeingLeader();
|
||||
|
||||
if (attach_thread)
|
||||
attach_thread->shutdown();
|
||||
|
||||
restarting_thread.shutdown(/* part_of_full_shutdown */true);
|
||||
/// Explicitly set the event, because the restarting thread will not set it again
|
||||
startup_event.set();
|
||||
shutdown_deadline.emplace(std::chrono::system_clock::now() + std::chrono::milliseconds(settings_ptr->wait_for_unique_parts_send_before_shutdown_ms.totalMilliseconds()));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// Don't wait anything in case of improper prepare for shutdown
|
||||
shutdown_deadline.emplace(std::chrono::system_clock::now());
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void StorageReplicatedMergeTree::partialShutdown()
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::ReplicaPartialShutdown);
|
||||
@ -4779,21 +4975,28 @@ void StorageReplicatedMergeTree::shutdown()
|
||||
if (shutdown_called.exchange(true))
|
||||
return;
|
||||
|
||||
flushAndPrepareForShutdown();
|
||||
|
||||
if (!shutdown_deadline.has_value())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Shutdown deadline is not set in shutdown");
|
||||
|
||||
try
|
||||
{
|
||||
waitForUniquePartsToBeFetchedByOtherReplicas(*shutdown_deadline);
|
||||
}
|
||||
catch (const Exception & ex)
|
||||
{
|
||||
if (ex.code() == ErrorCodes::LOGICAL_ERROR)
|
||||
throw;
|
||||
|
||||
tryLogCurrentException(log, __PRETTY_FUNCTION__);
|
||||
}
|
||||
|
||||
session_expired_callback_handler.reset();
|
||||
stopOutdatedDataPartsLoadingTask();
|
||||
|
||||
/// Cancel fetches, merges and mutations to force the queue_task to finish ASAP.
|
||||
fetcher.blocker.cancelForever();
|
||||
merger_mutator.merges_blocker.cancelForever();
|
||||
parts_mover.moves_blocker.cancelForever();
|
||||
mutations_finalizing_task->deactivate();
|
||||
stopBeingLeader();
|
||||
partialShutdown();
|
||||
|
||||
if (attach_thread)
|
||||
attach_thread->shutdown();
|
||||
|
||||
restarting_thread.shutdown(/* part_of_full_shutdown */true);
|
||||
background_operations_assignee.finish();
|
||||
part_moves_between_shards_orchestrator.shutdown();
|
||||
|
||||
{
|
||||
@ -6167,7 +6370,7 @@ bool StorageReplicatedMergeTree::tryWaitForReplicaToProcessLogEntry(
|
||||
|
||||
const auto & stop_waiting = [&]()
|
||||
{
|
||||
bool stop_waiting_itself = waiting_itself && partial_shutdown_called;
|
||||
bool stop_waiting_itself = waiting_itself && (partial_shutdown_called || shutdown_prepared_called || shutdown_called);
|
||||
bool timeout_exceeded = check_timeout && wait_for_inactive_timeout < time_waiting.elapsedSeconds();
|
||||
bool stop_waiting_inactive = (!wait_for_inactive || timeout_exceeded)
|
||||
&& !getZooKeeper()->exists(fs::path(table_zookeeper_path) / "replicas" / replica / "is_active");
|
||||
|
@ -112,8 +112,35 @@ public:
|
||||
bool need_check_structure);
|
||||
|
||||
void startup() override;
|
||||
void shutdown() override;
|
||||
|
||||
/// To many shutdown methods....
|
||||
///
|
||||
/// Partial shutdown called if we loose connection to zookeeper.
|
||||
/// Table can also recover after partial shutdown and continue
|
||||
/// to work. This method can be called regularly.
|
||||
void partialShutdown();
|
||||
|
||||
/// These two methods are called during final table shutdown (DROP/DETACH/overall server shutdown).
|
||||
/// The shutdown process is split into two methods to make it more soft and fast. In database shutdown()
|
||||
/// looks like:
|
||||
/// for (table : tables)
|
||||
/// table->flushAndPrepareForShutdown()
|
||||
///
|
||||
/// for (table : tables)
|
||||
/// table->shutdown()
|
||||
///
|
||||
/// So we stop producing all the parts first for all tables (fast operation). And after we can wait in shutdown()
|
||||
/// for other replicas to download parts.
|
||||
///
|
||||
/// In flushAndPrepareForShutdown we cancel all part-producing operations:
|
||||
/// merges, fetches, moves and so on. If it wasn't called before shutdown() -- shutdown() will
|
||||
/// call it (defensive programming).
|
||||
void flushAndPrepareForShutdown() override;
|
||||
/// In shutdown we completely terminate table -- remove
|
||||
/// is_active node and interserver handler. Also optionally
|
||||
/// wait until other replicas will download some parts from our replica.
|
||||
void shutdown() override;
|
||||
|
||||
~StorageReplicatedMergeTree() override;
|
||||
|
||||
static String getDefaultZooKeeperPath(const Poco::Util::AbstractConfiguration & config);
|
||||
@ -340,6 +367,13 @@ public:
|
||||
/// Get a sequential consistent view of current parts.
|
||||
ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock getMaxAddedBlocks() const;
|
||||
|
||||
void addLastSentPart(const MergeTreePartInfo & info);
|
||||
|
||||
/// Wait required amount of milliseconds to give other replicas a chance to
|
||||
/// download unique parts from our replica
|
||||
using ShutdownDeadline = std::chrono::time_point<std::chrono::system_clock>;
|
||||
void waitForUniquePartsToBeFetchedByOtherReplicas(ShutdownDeadline shutdown_deadline);
|
||||
|
||||
private:
|
||||
std::atomic_bool are_restoring_replica {false};
|
||||
|
||||
@ -444,9 +478,19 @@ private:
|
||||
Poco::Event partial_shutdown_event {false}; /// Poco::Event::EVENT_MANUALRESET
|
||||
|
||||
std::atomic<bool> shutdown_called {false};
|
||||
std::atomic<bool> flush_called {false};
|
||||
std::atomic<bool> shutdown_prepared_called {false};
|
||||
std::optional<ShutdownDeadline> shutdown_deadline;
|
||||
|
||||
/// We call flushAndPrepareForShutdown before acquiring DDLGuard, so we can shutdown a table that is being created right now
|
||||
mutable std::mutex flush_and_shutdown_mutex;
|
||||
|
||||
|
||||
mutable std::mutex last_sent_parts_mutex;
|
||||
std::condition_variable last_sent_parts_cv;
|
||||
std::deque<MergeTreePartInfo> last_sent_parts;
|
||||
|
||||
/// Threads.
|
||||
///
|
||||
|
||||
/// A task that keeps track of the updates in the logs of all replicas and loads them into the queue.
|
||||
bool queue_update_in_progress = false;
|
||||
@ -729,6 +773,7 @@ private:
|
||||
*/
|
||||
String findReplicaHavingCoveringPart(LogEntry & entry, bool active);
|
||||
String findReplicaHavingCoveringPart(const String & part_name, bool active, String & found_part_name);
|
||||
static std::set<MergeTreePartInfo> findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_);
|
||||
|
||||
/** Download the specified part from the specified replica.
|
||||
* If `to_detached`, the part is placed in the `detached` directory.
|
||||
|
@ -29,7 +29,6 @@
|
||||
#include <Storages/checkAndGetLiteralArgument.h>
|
||||
#include <Storages/StorageURL.h>
|
||||
#include <Storages/NamedCollectionsHelpers.h>
|
||||
#include <Storages/ReadFromStorageProgress.h>
|
||||
|
||||
#include <Disks/IO/AsynchronousBoundedReadBuffer.h>
|
||||
#include <Disks/IO/ReadBufferFromRemoteFSGather.h>
|
||||
@ -148,7 +147,8 @@ public:
|
||||
const Block & virtual_header_,
|
||||
ContextPtr context_,
|
||||
KeysWithInfo * read_keys_,
|
||||
const S3Settings::RequestSettings & request_settings_)
|
||||
const S3Settings::RequestSettings & request_settings_,
|
||||
std::function<void(FileProgress)> file_progress_callback_)
|
||||
: WithContext(context_)
|
||||
, client(client_.clone())
|
||||
, globbed_uri(globbed_uri_)
|
||||
@ -158,6 +158,7 @@ public:
|
||||
, request_settings(request_settings_)
|
||||
, list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, 1)
|
||||
, list_objects_scheduler(threadPoolCallbackRunner<ListObjectsOutcome>(list_objects_pool, "ListObjects"))
|
||||
, file_progress_callback(file_progress_callback_)
|
||||
{
|
||||
if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos)
|
||||
throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name");
|
||||
@ -194,11 +195,6 @@ public:
|
||||
return nextAssumeLocked();
|
||||
}
|
||||
|
||||
size_t getTotalSize() const
|
||||
{
|
||||
return total_size.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
~Impl()
|
||||
{
|
||||
list_objects_pool.wait();
|
||||
@ -312,15 +308,19 @@ private:
|
||||
buffer.reserve(block.rows());
|
||||
for (UInt64 idx : idxs.getData())
|
||||
{
|
||||
total_size.fetch_add(temp_buffer[idx].info->size, std::memory_order_relaxed);
|
||||
if (file_progress_callback)
|
||||
file_progress_callback(FileProgress(0, temp_buffer[idx].info->size));
|
||||
buffer.emplace_back(std::move(temp_buffer[idx]));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer = std::move(temp_buffer);
|
||||
for (const auto & [_, info] : buffer)
|
||||
total_size.fetch_add(info->size, std::memory_order_relaxed);
|
||||
if (file_progress_callback)
|
||||
{
|
||||
for (const auto & [_, info] : buffer)
|
||||
file_progress_callback(FileProgress(0, info->size));
|
||||
}
|
||||
}
|
||||
|
||||
/// Set iterator only after the whole batch is processed
|
||||
@ -381,7 +381,7 @@ private:
|
||||
ThreadPool list_objects_pool;
|
||||
ThreadPoolCallbackRunner<ListObjectsOutcome> list_objects_scheduler;
|
||||
std::future<ListObjectsOutcome> outcome_future;
|
||||
std::atomic<size_t> total_size = 0;
|
||||
std::function<void(FileProgress)> file_progress_callback;
|
||||
};
|
||||
|
||||
StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
|
||||
@ -391,8 +391,9 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
|
||||
const Block & virtual_header,
|
||||
ContextPtr context,
|
||||
KeysWithInfo * read_keys_,
|
||||
const S3Settings::RequestSettings & request_settings_)
|
||||
: pimpl(std::make_shared<StorageS3Source::DisclosedGlobIterator::Impl>(client_, globbed_uri_, query, virtual_header, context, read_keys_, request_settings_))
|
||||
const S3Settings::RequestSettings & request_settings_,
|
||||
std::function<void(FileProgress)> file_progress_callback_)
|
||||
: pimpl(std::make_shared<StorageS3Source::DisclosedGlobIterator::Impl>(client_, globbed_uri_, query, virtual_header, context, read_keys_, request_settings_, file_progress_callback_))
|
||||
{
|
||||
}
|
||||
|
||||
@ -401,11 +402,6 @@ StorageS3Source::KeyWithInfo StorageS3Source::DisclosedGlobIterator::next()
|
||||
return pimpl->next();
|
||||
}
|
||||
|
||||
size_t StorageS3Source::DisclosedGlobIterator::getTotalSize() const
|
||||
{
|
||||
return pimpl->getTotalSize();
|
||||
}
|
||||
|
||||
class StorageS3Source::KeysIterator::Impl : WithContext
|
||||
{
|
||||
public:
|
||||
@ -418,23 +414,26 @@ public:
|
||||
ASTPtr query_,
|
||||
const Block & virtual_header_,
|
||||
ContextPtr context_,
|
||||
bool need_total_size,
|
||||
KeysWithInfo * read_keys_)
|
||||
KeysWithInfo * read_keys_,
|
||||
std::function<void(FileProgress)> file_progress_callback_)
|
||||
: WithContext(context_)
|
||||
, keys(keys_)
|
||||
, client(client_.clone())
|
||||
, version_id(version_id_)
|
||||
, bucket(bucket_)
|
||||
, request_settings(request_settings_)
|
||||
, query(query_)
|
||||
, virtual_header(virtual_header_)
|
||||
, file_progress_callback(file_progress_callback_)
|
||||
{
|
||||
Strings all_keys = keys_;
|
||||
|
||||
/// Create a virtual block with one row to construct filter
|
||||
if (query && virtual_header && !all_keys.empty())
|
||||
if (query && virtual_header && !keys.empty())
|
||||
{
|
||||
/// Append "idx" column as the filter result
|
||||
virtual_header.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
|
||||
|
||||
auto block = virtual_header.cloneEmpty();
|
||||
addPathToVirtualColumns(block, fs::path(bucket) / all_keys.front(), 0);
|
||||
addPathToVirtualColumns(block, fs::path(bucket) / keys.front(), 0);
|
||||
|
||||
ASTPtr filter_ast;
|
||||
VirtualColumnUtils::prepareFilterBlockWithQuery(query, getContext(), block, filter_ast);
|
||||
@ -442,8 +441,8 @@ public:
|
||||
if (filter_ast)
|
||||
{
|
||||
block = virtual_header.cloneEmpty();
|
||||
for (size_t i = 0; i < all_keys.size(); ++i)
|
||||
addPathToVirtualColumns(block, fs::path(bucket) / all_keys[i], i);
|
||||
for (size_t i = 0; i < keys.size(); ++i)
|
||||
addPathToVirtualColumns(block, fs::path(bucket) / keys[i], i);
|
||||
|
||||
VirtualColumnUtils::filterBlockWithQuery(query, block, getContext(), filter_ast);
|
||||
const auto & idxs = typeid_cast<const ColumnUInt64 &>(*block.getByName("_idx").column);
|
||||
@ -451,29 +450,17 @@ public:
|
||||
Strings filtered_keys;
|
||||
filtered_keys.reserve(block.rows());
|
||||
for (UInt64 idx : idxs.getData())
|
||||
filtered_keys.emplace_back(std::move(all_keys[idx]));
|
||||
filtered_keys.emplace_back(std::move(keys[idx]));
|
||||
|
||||
all_keys = std::move(filtered_keys);
|
||||
keys = std::move(filtered_keys);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto && key : all_keys)
|
||||
{
|
||||
std::optional<S3::ObjectInfo> info;
|
||||
/// In case all_keys.size() > 1, avoid getting object info now
|
||||
/// (it will be done anyway eventually, but with delay and in parallel).
|
||||
/// But progress bar will not work in this case.
|
||||
if (need_total_size && all_keys.size() == 1)
|
||||
{
|
||||
info = S3::getObjectInfo(client_, bucket, key, version_id_, request_settings_);
|
||||
total_size += info->size;
|
||||
}
|
||||
|
||||
keys.emplace_back(std::move(key), std::move(info));
|
||||
}
|
||||
|
||||
if (read_keys_)
|
||||
*read_keys_ = keys;
|
||||
{
|
||||
for (const auto & key : keys)
|
||||
read_keys_->push_back({key, {}});
|
||||
}
|
||||
}
|
||||
|
||||
KeyWithInfo next()
|
||||
@ -481,24 +468,27 @@ public:
|
||||
size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
|
||||
if (current_index >= keys.size())
|
||||
return {};
|
||||
auto key = keys[current_index];
|
||||
std::optional<S3::ObjectInfo> info;
|
||||
if (file_progress_callback)
|
||||
{
|
||||
info = S3::getObjectInfo(*client, bucket, key, version_id, request_settings);
|
||||
file_progress_callback(FileProgress(0, info->size));
|
||||
}
|
||||
|
||||
return keys[current_index];
|
||||
}
|
||||
|
||||
size_t getTotalSize() const
|
||||
{
|
||||
return total_size;
|
||||
return {key, info};
|
||||
}
|
||||
|
||||
private:
|
||||
KeysWithInfo keys;
|
||||
Strings keys;
|
||||
std::atomic_size_t index = 0;
|
||||
|
||||
std::unique_ptr<S3::Client> client;
|
||||
String version_id;
|
||||
String bucket;
|
||||
S3Settings::RequestSettings request_settings;
|
||||
ASTPtr query;
|
||||
Block virtual_header;
|
||||
|
||||
size_t total_size = 0;
|
||||
std::function<void(FileProgress)> file_progress_callback;
|
||||
};
|
||||
|
||||
StorageS3Source::KeysIterator::KeysIterator(
|
||||
@ -510,11 +500,11 @@ StorageS3Source::KeysIterator::KeysIterator(
|
||||
ASTPtr query,
|
||||
const Block & virtual_header,
|
||||
ContextPtr context,
|
||||
bool need_total_size,
|
||||
KeysWithInfo * read_keys)
|
||||
KeysWithInfo * read_keys,
|
||||
std::function<void(FileProgress)> file_progress_callback_)
|
||||
: pimpl(std::make_shared<StorageS3Source::KeysIterator::Impl>(
|
||||
client_, version_id_, keys_, bucket_, request_settings_,
|
||||
query, virtual_header, context, need_total_size, read_keys))
|
||||
query, virtual_header, context, read_keys, file_progress_callback_))
|
||||
{
|
||||
}
|
||||
|
||||
@ -523,11 +513,6 @@ StorageS3Source::KeyWithInfo StorageS3Source::KeysIterator::next()
|
||||
return pimpl->next();
|
||||
}
|
||||
|
||||
size_t StorageS3Source::KeysIterator::getTotalSize() const
|
||||
{
|
||||
return pimpl->getTotalSize();
|
||||
}
|
||||
|
||||
Block StorageS3Source::getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns)
|
||||
{
|
||||
for (const auto & virtual_column : requested_virtual_columns)
|
||||
@ -552,7 +537,7 @@ StorageS3Source::StorageS3Source(
|
||||
const String & version_id_,
|
||||
std::shared_ptr<IIterator> file_iterator_,
|
||||
const size_t download_thread_num_)
|
||||
: ISource(getHeader(sample_block_, requested_virtual_columns_))
|
||||
: ISource(getHeader(sample_block_, requested_virtual_columns_), false)
|
||||
, WithContext(context_)
|
||||
, name(std::move(name_))
|
||||
, bucket(bucket_)
|
||||
@ -573,10 +558,7 @@ StorageS3Source::StorageS3Source(
|
||||
{
|
||||
reader = createReader();
|
||||
if (reader)
|
||||
{
|
||||
total_objects_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0);
|
||||
reader_future = createReaderAsync();
|
||||
}
|
||||
}
|
||||
|
||||
StorageS3Source::ReaderHolder StorageS3Source::createReader()
|
||||
@ -614,7 +596,7 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader()
|
||||
auto pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
|
||||
auto current_reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
|
||||
|
||||
return ReaderHolder{fs::path(bucket) / key_with_info.key, std::move(read_buf), input_format, std::move(pipeline), std::move(current_reader)};
|
||||
return ReaderHolder{fs::path(bucket) / key_with_info.key, std::move(read_buf), std::move(input_format), std::move(pipeline), std::move(current_reader)};
|
||||
}
|
||||
|
||||
std::future<StorageS3Source::ReaderHolder> StorageS3Source::createReaderAsync()
|
||||
@ -713,17 +695,11 @@ Chunk StorageS3Source::generate()
|
||||
if (reader->pull(chunk))
|
||||
{
|
||||
UInt64 num_rows = chunk.getNumRows();
|
||||
size_t chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk();
|
||||
progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
|
||||
|
||||
const auto & file_path = reader.getPath();
|
||||
|
||||
if (num_rows && total_objects_size)
|
||||
{
|
||||
size_t chunk_size = reader.getFormat()->getApproxBytesReadForChunk();
|
||||
if (!chunk_size)
|
||||
chunk_size = chunk.bytes();
|
||||
updateRowsProgressApprox(*this, num_rows, chunk_size, total_objects_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max);
|
||||
}
|
||||
|
||||
for (const auto & virtual_column : requested_virtual_columns)
|
||||
{
|
||||
if (virtual_column.name == "_path")
|
||||
@ -748,13 +724,6 @@ Chunk StorageS3Source::generate()
|
||||
if (!reader)
|
||||
break;
|
||||
|
||||
size_t object_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0);
|
||||
/// Adjust total_rows_approx_accumulated with new total size.
|
||||
if (total_objects_size)
|
||||
total_rows_approx_accumulated = static_cast<size_t>(
|
||||
std::ceil(static_cast<double>(total_objects_size + object_size) / total_objects_size * total_rows_approx_accumulated));
|
||||
total_objects_size += object_size;
|
||||
|
||||
/// Even if task is finished the thread may be not freed in pool.
|
||||
/// So wait until it will be freed before scheduling a new task.
|
||||
create_reader_pool.wait();
|
||||
@ -1005,8 +974,8 @@ std::shared_ptr<StorageS3Source::IIterator> StorageS3::createFileIterator(
|
||||
ContextPtr local_context,
|
||||
ASTPtr query,
|
||||
const Block & virtual_block,
|
||||
bool need_total_size,
|
||||
KeysWithInfo * read_keys)
|
||||
KeysWithInfo * read_keys,
|
||||
std::function<void(FileProgress)> file_progress_callback)
|
||||
{
|
||||
if (distributed_processing)
|
||||
{
|
||||
@ -1017,14 +986,14 @@ std::shared_ptr<StorageS3Source::IIterator> StorageS3::createFileIterator(
|
||||
/// Iterate through disclosed globs and make a source for each file
|
||||
return std::make_shared<StorageS3Source::DisclosedGlobIterator>(
|
||||
*configuration.client, configuration.url, query, virtual_block,
|
||||
local_context, read_keys, configuration.request_settings);
|
||||
local_context, read_keys, configuration.request_settings, file_progress_callback);
|
||||
}
|
||||
else
|
||||
{
|
||||
return std::make_shared<StorageS3Source::KeysIterator>(
|
||||
*configuration.client, configuration.url.version_id, configuration.keys,
|
||||
configuration.url.bucket, configuration.request_settings, query,
|
||||
virtual_block, local_context, need_total_size, read_keys);
|
||||
virtual_block, local_context, read_keys, file_progress_callback);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1074,7 +1043,7 @@ Pipe StorageS3::read(
|
||||
}
|
||||
|
||||
std::shared_ptr<StorageS3Source::IIterator> iterator_wrapper = createFileIterator(
|
||||
query_configuration, distributed_processing, local_context, query_info.query, virtual_block);
|
||||
query_configuration, distributed_processing, local_context, query_info.query, virtual_block, nullptr, local_context->getFileProgressCallback());
|
||||
|
||||
ColumnsDescription columns_description;
|
||||
Block block_for_format;
|
||||
@ -1476,7 +1445,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
|
||||
{
|
||||
KeysWithInfo read_keys;
|
||||
|
||||
auto file_iterator = createFileIterator(configuration, false, ctx, nullptr, {}, false, &read_keys);
|
||||
auto file_iterator = createFileIterator(configuration, false, ctx, nullptr, {}, &read_keys);
|
||||
|
||||
std::optional<ColumnsDescription> columns_from_cache;
|
||||
size_t prev_read_keys_size = read_keys.size();
|
||||
|
@ -56,7 +56,6 @@ public:
|
||||
public:
|
||||
virtual ~IIterator() = default;
|
||||
virtual KeyWithInfo next() = 0;
|
||||
virtual size_t getTotalSize() const = 0;
|
||||
|
||||
KeyWithInfo operator ()() { return next(); }
|
||||
};
|
||||
@ -71,10 +70,10 @@ public:
|
||||
const Block & virtual_header,
|
||||
ContextPtr context,
|
||||
KeysWithInfo * read_keys_ = nullptr,
|
||||
const S3Settings::RequestSettings & request_settings_ = {});
|
||||
const S3Settings::RequestSettings & request_settings_ = {},
|
||||
std::function<void(FileProgress)> progress_callback_ = {});
|
||||
|
||||
KeyWithInfo next() override;
|
||||
size_t getTotalSize() const override;
|
||||
|
||||
private:
|
||||
class Impl;
|
||||
@ -94,11 +93,10 @@ public:
|
||||
ASTPtr query,
|
||||
const Block & virtual_header,
|
||||
ContextPtr context,
|
||||
bool need_total_size = true,
|
||||
KeysWithInfo * read_keys = nullptr);
|
||||
KeysWithInfo * read_keys = nullptr,
|
||||
std::function<void(FileProgress)> progress_callback_ = {});
|
||||
|
||||
KeyWithInfo next() override;
|
||||
size_t getTotalSize() const override;
|
||||
|
||||
private:
|
||||
class Impl;
|
||||
@ -113,8 +111,6 @@ public:
|
||||
|
||||
KeyWithInfo next() override { return {callback(), {}}; }
|
||||
|
||||
size_t getTotalSize() const override { return 0; }
|
||||
|
||||
private:
|
||||
ReadTaskCallback callback;
|
||||
};
|
||||
@ -168,7 +164,7 @@ private:
|
||||
std::unique_ptr<PullingPipelineExecutor> reader_)
|
||||
: path(std::move(path_))
|
||||
, read_buf(std::move(read_buf_))
|
||||
, input_format(input_format_)
|
||||
, input_format(std::move(input_format_))
|
||||
, pipeline(std::move(pipeline_))
|
||||
, reader(std::move(reader_))
|
||||
{
|
||||
@ -195,15 +191,13 @@ private:
|
||||
return *this;
|
||||
}
|
||||
|
||||
const std::unique_ptr<ReadBuffer> & getReadBuffer() const { return read_buf; }
|
||||
|
||||
const std::shared_ptr<IInputFormat> & getFormat() const { return input_format; }
|
||||
|
||||
explicit operator bool() const { return reader != nullptr; }
|
||||
PullingPipelineExecutor * operator->() { return reader.get(); }
|
||||
const PullingPipelineExecutor * operator->() const { return reader.get(); }
|
||||
const String & getPath() const { return path; }
|
||||
|
||||
const IInputFormat * getInputFormat() const { return input_format.get(); }
|
||||
|
||||
private:
|
||||
String path;
|
||||
std::unique_ptr<ReadBuffer> read_buf;
|
||||
@ -224,11 +218,6 @@ private:
|
||||
ThreadPoolCallbackRunner<ReaderHolder> create_reader_scheduler;
|
||||
std::future<ReaderHolder> reader_future;
|
||||
|
||||
UInt64 total_rows_approx_max = 0;
|
||||
size_t total_rows_count_times = 0;
|
||||
UInt64 total_rows_approx_accumulated = 0;
|
||||
size_t total_objects_size = 0;
|
||||
|
||||
/// Recreate ReadBuffer and Pipeline for each file.
|
||||
ReaderHolder createReader();
|
||||
std::future<ReaderHolder> createReaderAsync();
|
||||
@ -353,8 +342,8 @@ private:
|
||||
ContextPtr local_context,
|
||||
ASTPtr query,
|
||||
const Block & virtual_block,
|
||||
bool need_total_size = true,
|
||||
KeysWithInfo * read_keys = nullptr);
|
||||
KeysWithInfo * read_keys = nullptr,
|
||||
std::function<void(FileProgress)> progress_callback = {});
|
||||
|
||||
static ColumnsDescription getTableStructureFromDataImpl(
|
||||
const Configuration & configuration,
|
||||
|
@ -88,7 +88,7 @@ void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context)
|
||||
RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const
|
||||
{
|
||||
auto iterator = std::make_shared<StorageS3Source::DisclosedGlobIterator>(
|
||||
*s3_configuration.client, s3_configuration.url, query, virtual_block, context);
|
||||
*s3_configuration.client, s3_configuration.url, query, virtual_block, context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback());
|
||||
auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String { return iterator->next().key; });
|
||||
return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) };
|
||||
}
|
||||
|
@ -79,11 +79,11 @@ public:
|
||||
nested->shutdown();
|
||||
}
|
||||
|
||||
void flush() override
|
||||
void flushAndPrepareForShutdown() override
|
||||
{
|
||||
std::lock_guard lock{nested_mutex};
|
||||
if (nested)
|
||||
nested->flush();
|
||||
nested->flushAndPrepareForShutdown();
|
||||
}
|
||||
|
||||
void drop() override
|
||||
|
@ -3,7 +3,6 @@
|
||||
#include <Storages/PartitionedSink.h>
|
||||
#include <Storages/checkAndGetLiteralArgument.h>
|
||||
#include <Storages/NamedCollectionsHelpers.h>
|
||||
#include <Storages/ReadFromStorageProgress.h>
|
||||
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
#include <Interpreters/threadPoolCallbackRunner.h>
|
||||
@ -235,7 +234,7 @@ StorageURLSource::StorageURLSource(
|
||||
const HTTPHeaderEntries & headers_,
|
||||
const URIParams & params,
|
||||
bool glob_url)
|
||||
: ISource(getHeader(sample_block, requested_virtual_columns_)), name(std::move(name_)), requested_virtual_columns(requested_virtual_columns_), uri_iterator(uri_iterator_)
|
||||
: ISource(getHeader(sample_block, requested_virtual_columns_), false), name(std::move(name_)), requested_virtual_columns(requested_virtual_columns_), uri_iterator(uri_iterator_)
|
||||
{
|
||||
auto headers = getHeaders(headers_);
|
||||
|
||||
@ -271,22 +270,11 @@ StorageURLSource::StorageURLSource(
|
||||
curr_uri = uri_and_buf.first;
|
||||
read_buf = std::move(uri_and_buf.second);
|
||||
|
||||
size_t file_size = 0;
|
||||
try
|
||||
if (auto file_progress_callback = context->getFileProgressCallback())
|
||||
{
|
||||
file_size = getFileSizeFromReadBuffer(*read_buf);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
// we simply continue without updating total_size
|
||||
}
|
||||
|
||||
if (file_size)
|
||||
{
|
||||
/// Adjust total_rows_approx_accumulated with new total size.
|
||||
if (total_size)
|
||||
total_rows_approx_accumulated = static_cast<size_t>(std::ceil(static_cast<double>(total_size + file_size) / total_size * total_rows_approx_accumulated));
|
||||
total_size += file_size;
|
||||
size_t file_size = tryGetFileSizeFromReadBuffer(*read_buf).value_or(0);
|
||||
LOG_DEBUG(&Poco::Logger::get("URL"), "Send file size {}", file_size);
|
||||
file_progress_callback(FileProgress(0, file_size));
|
||||
}
|
||||
|
||||
// TODO: Pass max_parsing_threads and max_download_threads adjusted for num_streams.
|
||||
@ -332,14 +320,8 @@ Chunk StorageURLSource::generate()
|
||||
if (reader->pull(chunk))
|
||||
{
|
||||
UInt64 num_rows = chunk.getNumRows();
|
||||
if (num_rows && total_size)
|
||||
{
|
||||
size_t chunk_size = input_format->getApproxBytesReadForChunk();
|
||||
if (!chunk_size)
|
||||
chunk_size = chunk.bytes();
|
||||
updateRowsProgressApprox(
|
||||
*this, num_rows, chunk_size, total_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max);
|
||||
}
|
||||
size_t chunk_size = input_format->getApproxBytesReadForChunk();
|
||||
progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
|
||||
|
||||
const String & path{curr_uri.getPath()};
|
||||
|
||||
|
@ -212,11 +212,6 @@ private:
|
||||
std::unique_ptr<PullingPipelineExecutor> reader;
|
||||
|
||||
Poco::Net::HTTPBasicCredentials credentials;
|
||||
|
||||
size_t total_size = 0;
|
||||
UInt64 total_rows_approx_max = 0;
|
||||
size_t total_rows_count_times = 0;
|
||||
UInt64 total_rows_approx_accumulated = 0;
|
||||
};
|
||||
|
||||
class StorageURLSink : public SinkToStorage
|
||||
|
@ -81,7 +81,6 @@
|
||||
02242_join_rocksdb
|
||||
02267_join_dup_columns_issue36199
|
||||
02302_s3_file_pruning
|
||||
02317_distinct_in_order_optimization_explain
|
||||
02341_global_join_cte
|
||||
02345_implicit_transaction
|
||||
02352_grouby_shadows_arg
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user