Merge remote-tracking branch 'origin/master' into distinct-in-order-sqlancer-crashes

This commit is contained in:
Igor Nikonov 2023-07-26 22:05:10 +00:00
commit 1ead0d7dac
182 changed files with 2531 additions and 841 deletions

View File

@ -67,6 +67,8 @@ public:
Message(
const std::string & source, const std::string & text, Priority prio, const char * file, int line, std::string_view fmt_str = {});
Message(
std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str);
/// Creates a Message with the given source, text, priority,
/// source file path and line.
///

View File

@ -60,6 +60,19 @@ Message::Message(const std::string& source, const std::string& text, Priority pr
}
Message::Message(std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str):
_source(std::move(source)),
_text(std::move(text)),
_prio(prio),
_tid(0),
_file(file),
_line(line),
_pMap(0),
_fmt_str(fmt_str)
{
init();
}
Message::Message(const Message& msg):
_source(msg._source),
_text(msg._text),

View File

@ -14,6 +14,7 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
# Stress tests and upgrade check uses similar code that was placed
# in a separate bash library. See tests/ci/stress_tests.lib
source /usr/share/clickhouse-test/ci/attach_gdb.lib
source /usr/share/clickhouse-test/ci/stress_tests.lib
install_packages package_folder
@ -52,7 +53,7 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
start
shellcheck disable=SC2086 # No quotes because I want to split it into words.
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
/s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS
chmod 777 -R /var/lib/clickhouse
clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary"

View File

@ -16,6 +16,7 @@ ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_pre
# Stress tests and upgrade check uses similar code that was placed
# in a separate bash library. See tests/ci/stress_tests.lib
source /usr/share/clickhouse-test/ci/attach_gdb.lib
source /usr/share/clickhouse-test/ci/stress_tests.lib
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
@ -61,6 +62,7 @@ configure
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
start
@ -90,6 +92,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
start

View File

@ -65,6 +65,40 @@ XML substitution example:
Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element.
## Encrypting Configuration {#encryption}
You can use symmetric encryption to encrypt a configuration element, for example, a password field. To do so, first configure the [encryption codec](../sql-reference/statements/create/table.md#encryption-codecs), then add attribute `encryption_codec` with the name of the encryption codec as value to the element to encrypt.
Unlike attributes `from_zk`, `from_env` and `incl` (or element `include`), no substitution, i.e. decryption of the encrypted value, is performed in the preprocessed file. Decryption happens only at runtime in the server process.
Example:
```xml
<clickhouse>
<encryption_codecs>
<aes_128_gcm_siv>
<key_hex>00112233445566778899aabbccddeeff</key_hex>
</aes_128_gcm_siv>
</encryption_codecs>
<interserver_http_credentials>
<user>admin</user>
<password encryption_codec="AES_128_GCM_SIV">961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85</password>
</interserver_http_credentials>
</clickhouse>
```
To get the encrypted value `encrypt_decrypt` example application may be used.
Example:
``` bash
./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd
```
``` text
961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85
```
## User Settings {#user-settings}
The `config.xml` file can specify a separate config with user settings, profiles, and quotas. The relative path to this config is set in the `users_config` element. By default, it is `users.xml`. If `users_config` is omitted, the user settings, profiles, and quotas are specified directly in `config.xml`.

View File

@ -213,7 +213,7 @@ Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC
Syntax:
```sql
ALTER TABLE table_name MODIFY column_name REMOVE property;
ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property;
```
**Example**

View File

@ -414,3 +414,29 @@ Will do sync syscall.
```sql
SYSTEM SYNC FILE CACHE [ON CLUSTER cluster_name]
```
### SYSTEM STOP LISTEN
Closes the socket and gracefully terminates the existing connections to the server on the specified port with the specified protocol.
However, if the corresponding protocol settings were not specified in the clickhouse-server configuration, this command will have no effect.
```sql
SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol']
```
- If `CUSTOM 'protocol'` modifier is specified, the custom protocol with the specified name defined in the protocols section of the server configuration will be stopped.
- If `QUERIES ALL` modifier is specified, all protocols are stopped.
- If `QUERIES DEFAULT` modifier is specified, all default protocols are stopped.
- If `QUERIES CUSTOM` modifier is specified, all custom protocols are stopped.
### SYSTEM START LISTEN
Allows new connections to be established on the specified protocols.
However, if the server on the specified port and protocol was not stopped using the SYSTEM STOP LISTEN command, this command will have no effect.
```sql
SYSTEM START LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol']
```

View File

@ -85,6 +85,40 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
Сервер следит за изменениями конфигурационных файлов, а также файлов и ZooKeeper-узлов, которые были использованы при выполнении подстановок и переопределений, и перезагружает настройки пользователей и кластеров на лету. То есть, можно изменять кластера, пользователей и их настройки без перезапуска сервера.
## Шифрование {#encryption}
Вы можете использовать симметричное шифрование для зашифровки элемента конфигурации, например, поля password. Чтобы это сделать, сначала настройте [кодек шифрования](../sql-reference/statements/create/table.md#encryption-codecs), затем добавьте аттибут`encryption_codec` с именем кодека шифрования как значение к элементу, который надо зашифровать.
В отличии от аттрибутов `from_zk`, `from_env` и `incl` (или элемента `include`), подстановка, т.е. расшифровка зашифрованного значения, не выподняется в файле предобработки. Расшифровка происходит только во время исполнения в серверном процессе.
Пример:
```xml
<clickhouse>
<encryption_codecs>
<aes_128_gcm_siv>
<key_hex>00112233445566778899aabbccddeeff</key_hex>
</aes_128_gcm_siv>
</encryption_codecs>
<interserver_http_credentials>
<user>admin</user>
<password encryption_codec="AES_128_GCM_SIV">961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85</password>
</interserver_http_credentials>
</clickhouse>
```
Чтобы получить зашифрованное значение может быть использовано приложение-пример `encrypt_decrypt` .
Пример:
``` bash
./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd
```
``` text
961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85
```
## Примеры записи конфигурации на YAML {#example}
Здесь можно рассмотреть пример реальной конфигурации записанной на YAML: [config.yaml.example](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.yaml.example).

View File

@ -182,7 +182,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)
Синтаксис:
```sql
ALTER TABLE table_name MODIFY column_name REMOVE property;
ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property;
```
**Пример**

View File

@ -65,6 +65,7 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusRequestHandler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusMetricsWriter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ServerType.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp
@ -80,6 +81,7 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp

View File

@ -747,6 +747,7 @@ try
std::lock_guard lock(servers_lock);
metrics.reserve(servers_to_start_before_tables.size() + servers.size());
for (const auto & server : servers_to_start_before_tables)
metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()});
@ -1456,6 +1457,24 @@ try
access_control.reload(AccessControl::ReloadMode::USERS_CONFIG_ONLY);
});
global_context->setStopServersCallback([&](const ServerType & server_type)
{
stopServers(servers, server_type);
});
global_context->setStartServersCallback([&](const ServerType & server_type)
{
createServers(
config(),
listen_hosts,
listen_try,
server_pool,
async_metrics,
servers,
/* start_servers= */ true,
server_type);
});
/// Limit on total number of concurrently executed queries.
global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries);
@ -1997,7 +2016,8 @@ void Server::createServers(
Poco::ThreadPool & server_pool,
AsynchronousMetrics & async_metrics,
std::vector<ProtocolServerAdapter> & servers,
bool start_servers)
bool start_servers,
const ServerType & server_type)
{
const Settings & settings = global_context->getSettingsRef();
@ -2011,6 +2031,9 @@ void Server::createServers(
for (const auto & protocol : protocols)
{
if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol))
continue;
std::vector<std::string> hosts;
if (config.has("protocols." + protocol + ".host"))
hosts.push_back(config.getString("protocols." + protocol + ".host"));
@ -2057,162 +2080,190 @@ void Server::createServers(
for (const auto & listen_host : listen_hosts)
{
/// HTTP
const char * port_name = "http_port";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
{
Poco::Net::ServerSocket socket;
auto address = socketBindListen(config, socket, listen_host, port);
socket.setReceiveTimeout(settings.http_receive_timeout);
socket.setSendTimeout(settings.http_send_timeout);
const char * port_name;
return ProtocolServerAdapter(
listen_host,
port_name,
"http://" + address.toString(),
std::make_unique<HTTPServer>(
httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params));
});
/// HTTPS
port_name = "https_port";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
if (server_type.shouldStart(ServerType::Type::HTTP))
{
/// HTTP
port_name = "http_port";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
{
Poco::Net::ServerSocket socket;
auto address = socketBindListen(config, socket, listen_host, port);
socket.setReceiveTimeout(settings.http_receive_timeout);
socket.setSendTimeout(settings.http_send_timeout);
return ProtocolServerAdapter(
listen_host,
port_name,
"http://" + address.toString(),
std::make_unique<HTTPServer>(
httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params));
});
}
if (server_type.shouldStart(ServerType::Type::HTTPS))
{
/// HTTPS
port_name = "https_port";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
{
#if USE_SSL
Poco::Net::SecureServerSocket socket;
auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
socket.setReceiveTimeout(settings.http_receive_timeout);
socket.setSendTimeout(settings.http_send_timeout);
return ProtocolServerAdapter(
listen_host,
port_name,
"https://" + address.toString(),
std::make_unique<HTTPServer>(
httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params));
Poco::Net::SecureServerSocket socket;
auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
socket.setReceiveTimeout(settings.http_receive_timeout);
socket.setSendTimeout(settings.http_send_timeout);
return ProtocolServerAdapter(
listen_host,
port_name,
"https://" + address.toString(),
std::make_unique<HTTPServer>(
httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params));
#else
UNUSED(port);
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support.");
UNUSED(port);
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support.");
#endif
});
});
}
/// TCP
port_name = "tcp_port";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
if (server_type.shouldStart(ServerType::Type::TCP))
{
Poco::Net::ServerSocket socket;
auto address = socketBindListen(config, socket, listen_host, port);
socket.setReceiveTimeout(settings.receive_timeout);
socket.setSendTimeout(settings.send_timeout);
return ProtocolServerAdapter(
listen_host,
port_name,
"native protocol (tcp): " + address.toString(),
std::make_unique<TCPServer>(
new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false),
server_pool,
socket,
new Poco::Net::TCPServerParams));
});
/// TCP
port_name = "tcp_port";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
{
Poco::Net::ServerSocket socket;
auto address = socketBindListen(config, socket, listen_host, port);
socket.setReceiveTimeout(settings.receive_timeout);
socket.setSendTimeout(settings.send_timeout);
return ProtocolServerAdapter(
listen_host,
port_name,
"native protocol (tcp): " + address.toString(),
std::make_unique<TCPServer>(
new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false),
server_pool,
socket,
new Poco::Net::TCPServerParams));
});
}
/// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt
port_name = "tcp_with_proxy_port";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
if (server_type.shouldStart(ServerType::Type::TCP_WITH_PROXY))
{
Poco::Net::ServerSocket socket;
auto address = socketBindListen(config, socket, listen_host, port);
socket.setReceiveTimeout(settings.receive_timeout);
socket.setSendTimeout(settings.send_timeout);
return ProtocolServerAdapter(
listen_host,
port_name,
"native protocol (tcp) with PROXY: " + address.toString(),
std::make_unique<TCPServer>(
new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true),
server_pool,
socket,
new Poco::Net::TCPServerParams));
});
/// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt
port_name = "tcp_with_proxy_port";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
{
Poco::Net::ServerSocket socket;
auto address = socketBindListen(config, socket, listen_host, port);
socket.setReceiveTimeout(settings.receive_timeout);
socket.setSendTimeout(settings.send_timeout);
return ProtocolServerAdapter(
listen_host,
port_name,
"native protocol (tcp) with PROXY: " + address.toString(),
std::make_unique<TCPServer>(
new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true),
server_pool,
socket,
new Poco::Net::TCPServerParams));
});
}
/// TCP with SSL
port_name = "tcp_port_secure";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
if (server_type.shouldStart(ServerType::Type::TCP_SECURE))
{
#if USE_SSL
Poco::Net::SecureServerSocket socket;
auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
socket.setReceiveTimeout(settings.receive_timeout);
socket.setSendTimeout(settings.send_timeout);
return ProtocolServerAdapter(
listen_host,
port_name,
"secure native protocol (tcp_secure): " + address.toString(),
std::make_unique<TCPServer>(
new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false),
server_pool,
socket,
new Poco::Net::TCPServerParams));
#else
UNUSED(port);
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
#endif
});
/// TCP with SSL
port_name = "tcp_port_secure";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
{
#if USE_SSL
Poco::Net::SecureServerSocket socket;
auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
socket.setReceiveTimeout(settings.receive_timeout);
socket.setSendTimeout(settings.send_timeout);
return ProtocolServerAdapter(
listen_host,
port_name,
"secure native protocol (tcp_secure): " + address.toString(),
std::make_unique<TCPServer>(
new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false),
server_pool,
socket,
new Poco::Net::TCPServerParams));
#else
UNUSED(port);
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
#endif
});
}
port_name = "mysql_port";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
if (server_type.shouldStart(ServerType::Type::MYSQL))
{
Poco::Net::ServerSocket socket;
auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
socket.setReceiveTimeout(Poco::Timespan());
socket.setSendTimeout(settings.send_timeout);
return ProtocolServerAdapter(
listen_host,
port_name,
"MySQL compatibility protocol: " + address.toString(),
std::make_unique<TCPServer>(new MySQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
});
port_name = "mysql_port";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
{
Poco::Net::ServerSocket socket;
auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
socket.setReceiveTimeout(Poco::Timespan());
socket.setSendTimeout(settings.send_timeout);
return ProtocolServerAdapter(
listen_host,
port_name,
"MySQL compatibility protocol: " + address.toString(),
std::make_unique<TCPServer>(new MySQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
});
}
port_name = "postgresql_port";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
if (server_type.shouldStart(ServerType::Type::POSTGRESQL))
{
Poco::Net::ServerSocket socket;
auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
socket.setReceiveTimeout(Poco::Timespan());
socket.setSendTimeout(settings.send_timeout);
return ProtocolServerAdapter(
listen_host,
port_name,
"PostgreSQL compatibility protocol: " + address.toString(),
std::make_unique<TCPServer>(new PostgreSQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
});
port_name = "postgresql_port";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
{
Poco::Net::ServerSocket socket;
auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
socket.setReceiveTimeout(Poco::Timespan());
socket.setSendTimeout(settings.send_timeout);
return ProtocolServerAdapter(
listen_host,
port_name,
"PostgreSQL compatibility protocol: " + address.toString(),
std::make_unique<TCPServer>(new PostgreSQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
});
}
#if USE_GRPC
port_name = "grpc_port";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
if (server_type.shouldStart(ServerType::Type::GRPC))
{
Poco::Net::SocketAddress server_address(listen_host, port);
return ProtocolServerAdapter(
listen_host,
port_name,
"gRPC protocol: " + server_address.toString(),
std::make_unique<GRPCServer>(*this, makeSocketAddress(listen_host, port, &logger())));
});
port_name = "grpc_port";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
{
Poco::Net::SocketAddress server_address(listen_host, port);
return ProtocolServerAdapter(
listen_host,
port_name,
"gRPC protocol: " + server_address.toString(),
std::make_unique<GRPCServer>(*this, makeSocketAddress(listen_host, port, &logger())));
});
}
#endif
/// Prometheus (if defined and not setup yet with http_port)
port_name = "prometheus.port";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
if (server_type.shouldStart(ServerType::Type::PROMETHEUS))
{
Poco::Net::ServerSocket socket;
auto address = socketBindListen(config, socket, listen_host, port);
socket.setReceiveTimeout(settings.http_receive_timeout);
socket.setSendTimeout(settings.http_send_timeout);
return ProtocolServerAdapter(
listen_host,
port_name,
"Prometheus: http://" + address.toString(),
std::make_unique<HTTPServer>(
httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params));
});
/// Prometheus (if defined and not setup yet with http_port)
port_name = "prometheus.port";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
{
Poco::Net::ServerSocket socket;
auto address = socketBindListen(config, socket, listen_host, port);
socket.setReceiveTimeout(settings.http_receive_timeout);
socket.setSendTimeout(settings.http_send_timeout);
return ProtocolServerAdapter(
listen_host,
port_name,
"Prometheus: http://" + address.toString(),
std::make_unique<HTTPServer>(
httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params));
});
}
}
}
@ -2223,7 +2274,8 @@ void Server::createInterserverServers(
Poco::ThreadPool & server_pool,
AsynchronousMetrics & async_metrics,
std::vector<ProtocolServerAdapter> & servers,
bool start_servers)
bool start_servers,
const ServerType & server_type)
{
const Settings & settings = global_context->getSettingsRef();
@ -2235,52 +2287,97 @@ void Server::createInterserverServers(
/// Now iterate over interserver_listen_hosts
for (const auto & interserver_listen_host : interserver_listen_hosts)
{
/// Interserver IO HTTP
const char * port_name = "interserver_http_port";
createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
{
Poco::Net::ServerSocket socket;
auto address = socketBindListen(config, socket, interserver_listen_host, port);
socket.setReceiveTimeout(settings.http_receive_timeout);
socket.setSendTimeout(settings.http_send_timeout);
return ProtocolServerAdapter(
interserver_listen_host,
port_name,
"replica communication (interserver): http://" + address.toString(),
std::make_unique<HTTPServer>(
httpContext(),
createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"),
server_pool,
socket,
http_params));
});
const char * port_name;
port_name = "interserver_https_port";
createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTP))
{
/// Interserver IO HTTP
port_name = "interserver_http_port";
createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
{
Poco::Net::ServerSocket socket;
auto address = socketBindListen(config, socket, interserver_listen_host, port);
socket.setReceiveTimeout(settings.http_receive_timeout);
socket.setSendTimeout(settings.http_send_timeout);
return ProtocolServerAdapter(
interserver_listen_host,
port_name,
"replica communication (interserver): http://" + address.toString(),
std::make_unique<HTTPServer>(
httpContext(),
createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"),
server_pool,
socket,
http_params));
});
}
if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTPS))
{
port_name = "interserver_https_port";
createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
{
#if USE_SSL
Poco::Net::SecureServerSocket socket;
auto address = socketBindListen(config, socket, interserver_listen_host, port, /* secure = */ true);
socket.setReceiveTimeout(settings.http_receive_timeout);
socket.setSendTimeout(settings.http_send_timeout);
return ProtocolServerAdapter(
interserver_listen_host,
port_name,
"secure replica communication (interserver): https://" + address.toString(),
std::make_unique<HTTPServer>(
httpContext(),
createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"),
server_pool,
socket,
http_params));
Poco::Net::SecureServerSocket socket;
auto address = socketBindListen(config, socket, interserver_listen_host, port, /* secure = */ true);
socket.setReceiveTimeout(settings.http_receive_timeout);
socket.setSendTimeout(settings.http_send_timeout);
return ProtocolServerAdapter(
interserver_listen_host,
port_name,
"secure replica communication (interserver): https://" + address.toString(),
std::make_unique<HTTPServer>(
httpContext(),
createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"),
server_pool,
socket,
http_params));
#else
UNUSED(port);
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
UNUSED(port);
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
#endif
});
});
}
}
}
void Server::stopServers(
std::vector<ProtocolServerAdapter> & servers,
const ServerType & server_type
) const
{
Poco::Logger * log = &logger();
/// Remove servers once all their connections are closed
auto check_server = [&log](const char prefix[], auto & server)
{
if (!server.isStopping())
return false;
size_t current_connections = server.currentConnections();
LOG_DEBUG(log, "Server {}{}: {} ({} connections)",
server.getDescription(),
prefix,
!current_connections ? "finished" : "waiting",
current_connections);
return !current_connections;
};
std::erase_if(servers, std::bind_front(check_server, " (from one of previous remove)"));
for (auto & server : servers)
{
if (!server.isStopping())
{
const std::string server_port_name = server.getPortName();
if (server_type.shouldStop(server_port_name))
server.stop();
}
}
std::erase_if(servers, std::bind_front(check_server, ""));
}
void Server::updateServers(
Poco::Util::AbstractConfiguration & config,
Poco::ThreadPool & server_pool,

View File

@ -3,8 +3,9 @@
#include <Server/IServer.h>
#include <Daemon/BaseDaemon.h>
#include "Server/HTTP/HTTPContext.h"
#include <Server/HTTP/HTTPContext.h>
#include <Server/TCPProtocolStackFactory.h>
#include <Server/ServerType.h>
#include <Poco/Net/HTTPServerParams.h>
/** Server provides three interfaces:
@ -106,7 +107,8 @@ private:
Poco::ThreadPool & server_pool,
AsynchronousMetrics & async_metrics,
std::vector<ProtocolServerAdapter> & servers,
bool start_servers = false);
bool start_servers = false,
const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL));
void createInterserverServers(
Poco::Util::AbstractConfiguration & config,
@ -115,7 +117,8 @@ private:
Poco::ThreadPool & server_pool,
AsynchronousMetrics & async_metrics,
std::vector<ProtocolServerAdapter> & servers,
bool start_servers = false);
bool start_servers = false,
const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL));
void updateServers(
Poco::Util::AbstractConfiguration & config,
@ -123,6 +126,11 @@ private:
AsynchronousMetrics & async_metrics,
std::vector<ProtocolServerAdapter> & servers,
std::vector<ProtocolServerAdapter> & servers_to_start_before_tables);
void stopServers(
std::vector<ProtocolServerAdapter> & servers,
const ServerType & server_type
) const;
};
}

View File

@ -187,6 +187,7 @@ enum class AccessType
M(SYSTEM_THREAD_FUZZER, "SYSTEM START THREAD FUZZER, SYSTEM STOP THREAD FUZZER, START THREAD FUZZER, STOP THREAD FUZZER", GLOBAL, SYSTEM) \
M(SYSTEM_UNFREEZE, "SYSTEM UNFREEZE", GLOBAL, SYSTEM) \
M(SYSTEM_FAILPOINT, "SYSTEM ENABLE FAILPOINT, SYSTEM DISABLE FAILPOINT", GLOBAL, SYSTEM) \
M(SYSTEM_LISTEN, "SYSTEM START LISTEN, SYSTEM STOP LISTEN", GLOBAL, SYSTEM) \
M(SYSTEM, "", GROUP, ALL) /* allows to execute SYSTEM {SHUTDOWN|RELOAD CONFIG|...} */ \
\
M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\

View File

@ -1,4 +1,26 @@
#include "Allocator.h"
template class Allocator<false>;
template class Allocator<true>;
/** Keep definition of this constant in cpp file; otherwise its value
* is inlined into allocator code making it impossible to override it
* in third-party code.
*
* Note: extern may seem redundant, but is actually needed due to bug in GCC.
* See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html
*/
#ifdef NDEBUG
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 128 * (1ULL << 20);
#else
/**
* In debug build, use small mmap threshold to reproduce more memory
* stomping bugs. Along with ASLR it will hopefully detect more issues than
* ASan. The program may fail due to the limit on number of memory mappings.
*
* Not too small to avoid too quick exhaust of memory mappings.
*/
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 16384;
#endif
template class Allocator<false, false>;
template class Allocator<true, false>;
template class Allocator<false, true>;
template class Allocator<true, true>;

View File

@ -36,26 +36,51 @@
#include <Common/Allocator_fwd.h>
/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
/**
* Many modern allocators (for example, tcmalloc) do not do a mremap for
* realloc, even in case of large enough chunks of memory. Although this allows
* you to increase performance and reduce memory consumption during realloc.
* To fix this, we do mremap manually if the chunk of memory is large enough.
* The threshold (64 MB) is chosen quite large, since changing the address
* space is very slow, especially in the case of a large number of threads. We
* expect that the set of operations mmap/something to do/mremap can only be
* performed about 1000 times per second.
*
* P.S. This is also required, because tcmalloc can not allocate a chunk of
* memory greater than 16 GB.
*
* P.P.S. Note that MMAP_THRESHOLD symbol is intentionally made weak. It allows
* to override it during linkage when using ClickHouse as a library in
* third-party applications which may already use own allocator doing mmaps
* in the implementation of alloc/realloc.
*/
extern const size_t MMAP_THRESHOLD;
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
namespace CurrentMetrics
{
extern const Metric MMappedAllocs;
extern const Metric MMappedAllocBytes;
}
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int CANNOT_MUNMAP;
extern const int CANNOT_MREMAP;
extern const int LOGICAL_ERROR;
}
}
/** Previously there was a code which tried to use manual mmap and mremap (clickhouse_mremap.h) for large allocations/reallocations (64MB+).
* Most modern allocators (including jemalloc) don't use mremap, so the idea was to take advantage from mremap system call for large reallocs.
* Actually jemalloc had support for mremap, but it was intentionally removed from codebase https://github.com/jemalloc/jemalloc/commit/e2deab7a751c8080c2b2cdcfd7b11887332be1bb.
* Our performance tests also shows that without manual mmap/mremap/munmap clickhouse is overall faster for about 1-2% and up to 5-7x for some types of queries.
* That is why we don't do manuall mmap/mremap/munmap here and completely rely on jemalloc for allocations of any size.
*/
/** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
* Also used in hash tables.
* The interface is different from std::allocator
@ -63,8 +88,10 @@ namespace ErrorCodes
* - passing the size into the `free` method;
* - by the presence of the `alignment` argument;
* - the possibility of zeroing memory (used in hash tables);
* - random hint address for mmap
* - mmap_threshold for using mmap less or more
*/
template <bool clear_memory_>
template <bool clear_memory_, bool mmap_populate>
class Allocator
{
public:
@ -82,7 +109,7 @@ public:
try
{
checkSize(size);
freeNoTrack(buf);
freeNoTrack(buf, size);
CurrentMemoryTracker::free(size);
}
catch (...)
@ -105,26 +132,49 @@ public:
/// nothing to do.
/// BTW, it's not possible to change alignment while doing realloc.
}
else if (alignment <= MALLOC_MIN_ALIGNMENT)
else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD
&& alignment <= MALLOC_MIN_ALIGNMENT)
{
/// Resize malloc'd memory region with no special alignment requirement.
CurrentMemoryTracker::realloc(old_size, new_size);
void * new_buf = ::realloc(buf, new_size);
if (nullptr == new_buf)
{
DB::throwFromErrno(
fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
}
DB::throwFromErrno(fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
buf = new_buf;
if constexpr (clear_memory)
if (new_size > old_size)
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
}
else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD)
{
/// Resize mmap'd memory region.
CurrentMemoryTracker::realloc(old_size, new_size);
// On apple and freebsd self-implemented mremap used (common/mremap.h)
buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE,
PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
if (MAP_FAILED == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.",
ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_MREMAP);
/// No need for zero-fill, because mmap guarantees it.
}
else if (new_size < MMAP_THRESHOLD)
{
/// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once.
CurrentMemoryTracker::realloc(old_size, new_size);
void * new_buf = allocNoTrack(new_size, alignment);
memcpy(new_buf, buf, std::min(old_size, new_size));
freeNoTrack(buf, old_size);
buf = new_buf;
}
else
{
/// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
void * new_buf = alloc(new_size, alignment);
memcpy(new_buf, buf, std::min(old_size, new_size));
free(buf, old_size);
@ -142,38 +192,83 @@ protected:
static constexpr bool clear_memory = clear_memory_;
// Freshly mmapped pages are copy-on-write references to a global zero page.
// On the first write, a page fault occurs, and an actual writable page is
// allocated. If we are going to use this memory soon, such as when resizing
// hash tables, it makes sense to pre-fault the pages by passing
// MAP_POPULATE to mmap(). This takes some time, but should be faster
// overall than having a hot loop interrupted by page faults.
// It is only supported on Linux.
static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS
#if defined(OS_LINUX)
| (mmap_populate ? MAP_POPULATE : 0)
#endif
;
private:
void * allocNoTrack(size_t size, size_t alignment)
{
void * buf;
if (alignment <= MALLOC_MIN_ALIGNMENT)
{
if constexpr (clear_memory)
buf = ::calloc(size, 1);
else
buf = ::malloc(size);
size_t mmap_min_alignment = ::getPageSize();
if (nullptr == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
if (size >= MMAP_THRESHOLD)
{
if (alignment > mmap_min_alignment)
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS,
"Too large alignment {}: more than page size when allocating {}.",
ReadableSize(alignment), ReadableSize(size));
buf = mmap(getMmapHint(), size, PROT_READ | PROT_WRITE,
mmap_flags, -1, 0);
if (MAP_FAILED == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot mmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
/// No need for zero-fill, because mmap guarantees it.
CurrentMetrics::add(CurrentMetrics::MMappedAllocs);
CurrentMetrics::add(CurrentMetrics::MMappedAllocBytes, size);
}
else
{
buf = nullptr;
int res = posix_memalign(&buf, alignment, size);
if (alignment <= MALLOC_MIN_ALIGNMENT)
{
if constexpr (clear_memory)
buf = ::calloc(size, 1);
else
buf = ::malloc(size);
if (0 != res)
DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)),
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
if (nullptr == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
}
else
{
buf = nullptr;
int res = posix_memalign(&buf, alignment, size);
if constexpr (clear_memory)
memset(buf, 0, size);
if (0 != res)
DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)),
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
if constexpr (clear_memory)
memset(buf, 0, size);
}
}
return buf;
}
void freeNoTrack(void * buf)
void freeNoTrack(void * buf, size_t size)
{
::free(buf);
if (size >= MMAP_THRESHOLD)
{
if (0 != munmap(buf, size))
DB::throwFromErrno(fmt::format("Allocator: Cannot munmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_MUNMAP);
CurrentMetrics::sub(CurrentMetrics::MMappedAllocs);
CurrentMetrics::sub(CurrentMetrics::MMappedAllocBytes, size);
}
else
{
::free(buf);
}
}
void checkSize(size_t size)
@ -182,6 +277,21 @@ private:
if (size >= 0x8000000000000000ULL)
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Too large size ({}) passed to allocator. It indicates an error.", size);
}
#ifndef NDEBUG
/// In debug builds, request mmap() at random addresses (a kind of ASLR), to
/// reproduce more memory stomping bugs. Note that Linux doesn't do it by
/// default. This may lead to worse TLB performance.
void * getMmapHint()
{
return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(thread_local_rng));
}
#else
void * getMmapHint()
{
return nullptr;
}
#endif
};
@ -257,5 +367,7 @@ constexpr size_t allocatorInitialBytes<AllocatorWithStackMemory<
/// Prevent implicit template instantiation of Allocator
extern template class Allocator<false>;
extern template class Allocator<true>;
extern template class Allocator<false, false>;
extern template class Allocator<true, false>;
extern template class Allocator<false, true>;
extern template class Allocator<true, true>;

View File

@ -3,7 +3,7 @@
* This file provides forward declarations for Allocator.
*/
template <bool clear_memory_>
template <bool clear_memory_, bool mmap_populate = false>
class Allocator;
template <typename Base, size_t N = 64, size_t Alignment = 1>

View File

@ -26,6 +26,14 @@
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#if USE_SSL
#include <format>
#include <IO/BufferWithOwnMemory.h>
#include <Compression/ICompressionCodec.h>
#include <Compression/CompressionCodecEncrypted.h>
#include <boost/algorithm/hex.hpp>
#endif
#define PREPROCESSED_SUFFIX "-preprocessed"
namespace fs = std::filesystem;
@ -39,6 +47,9 @@ namespace ErrorCodes
{
extern const int FILE_DOESNT_EXIST;
extern const int CANNOT_LOAD_CONFIG;
#if USE_SSL
extern const int BAD_ARGUMENTS;
#endif
}
/// For cutting preprocessed path to this base
@ -177,6 +188,72 @@ static void mergeAttributes(Element & config_element, Element & with_element)
with_element_attributes->release();
}
#if USE_SSL
std::string ConfigProcessor::encryptValue(const std::string & codec_name, const std::string & value)
{
EncryptionMethod method = getEncryptionMethod(codec_name);
CompressionCodecEncrypted codec(method);
Memory<> memory;
memory.resize(codec.getCompressedReserveSize(static_cast<UInt32>(value.size())));
auto bytes_written = codec.compress(value.data(), static_cast<UInt32>(value.size()), memory.data());
auto encrypted_value = std::string(memory.data(), bytes_written);
std::string hex_value;
boost::algorithm::hex(encrypted_value.begin(), encrypted_value.end(), std::back_inserter(hex_value));
return hex_value;
}
std::string ConfigProcessor::decryptValue(const std::string & codec_name, const std::string & value)
{
EncryptionMethod method = getEncryptionMethod(codec_name);
CompressionCodecEncrypted codec(method);
Memory<> memory;
std::string encrypted_value;
try
{
boost::algorithm::unhex(value, std::back_inserter(encrypted_value));
}
catch (const std::exception &)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read encrypted text, check for valid characters [0-9a-fA-F] and length");
}
memory.resize(codec.readDecompressedBlockSize(encrypted_value.data()));
codec.decompress(encrypted_value.data(), static_cast<UInt32>(encrypted_value.size()), memory.data());
std::string decrypted_value = std::string(memory.data(), memory.size());
return decrypted_value;
}
void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root)
{
for (Node * node = config_root->firstChild(); node; node = node->nextSibling())
{
if (node->nodeType() == Node::ELEMENT_NODE)
{
Element & element = dynamic_cast<Element &>(*node);
if (element.hasAttribute("encryption_codec"))
{
const NodeListPtr children = element.childNodes();
if (children->length() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encrypted node {} cannot contain nested elements", node->nodeName());
Node * text_node = node->firstChild();
if (text_node->nodeType() != Node::TEXT_NODE)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encrypted node {} should have text node", node->nodeName());
auto encryption_codec = element.getAttribute("encryption_codec");
text_node->setNodeValue(decryptValue(encryption_codec, text_node->getNodeValue()));
}
decryptRecursive(node);
}
}
}
#endif
void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, const Node * with_root)
{
const NodeListPtr with_nodes = with_root->childNodes();
@ -694,7 +771,19 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes(
return LoadedConfig{configuration, has_zk_includes, !processed_successfully, config_xml, path};
}
void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config, std::string preprocessed_dir)
#if USE_SSL
void ConfigProcessor::decryptEncryptedElements(LoadedConfig & loaded_config)
{
CompressionCodecEncrypted::Configuration::instance().tryLoad(*loaded_config.configuration, "encryption_codecs");
Node * config_root = getRootNode(loaded_config.preprocessed_xml.get());
decryptRecursive(config_root);
loaded_config.configuration = new Poco::Util::XMLConfiguration(loaded_config.preprocessed_xml);
}
#endif
void ConfigProcessor::savePreprocessedConfig(LoadedConfig & loaded_config, std::string preprocessed_dir)
{
try
{
@ -749,6 +838,12 @@ void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config,
{
LOG_WARNING(log, "Couldn't save preprocessed config to {}: {}", preprocessed_path, e.displayText());
}
#if USE_SSL
std::string preprocessed_file_name = fs::path(preprocessed_path).filename();
if (preprocessed_file_name == "config.xml" || preprocessed_file_name == std::format("config{}.xml", PREPROCESSED_SUFFIX))
decryptEncryptedElements(loaded_config);
#endif
}
void ConfigProcessor::setConfigPath(const std::string & config_path)

View File

@ -97,7 +97,7 @@ public:
/// Save preprocessed config to specified directory.
/// If preprocessed_dir is empty - calculate from loaded_config.path + /preprocessed_configs/
void savePreprocessedConfig(const LoadedConfig & loaded_config, std::string preprocessed_dir);
void savePreprocessedConfig(LoadedConfig & loaded_config, std::string preprocessed_dir);
/// Set path of main config.xml. It will be cut from all configs placed to preprocessed_configs/
static void setConfigPath(const std::string & config_path);
@ -109,6 +109,14 @@ public:
/// Is the file named as result of config preprocessing, not as original files.
static bool isPreprocessedFile(const std::string & config_path);
#if USE_SSL
/// Encrypt text value
static std::string encryptValue(const std::string & codec_name, const std::string & value);
/// Decrypt value
static std::string decryptValue(const std::string & codec_name, const std::string & value);
#endif
static inline const auto SUBSTITUTION_ATTRS = {"incl", "from_zk", "from_env"};
private:
@ -127,6 +135,13 @@ private:
using NodePtr = Poco::AutoPtr<Poco::XML::Node>;
#if USE_SSL
void decryptRecursive(Poco::XML::Node * config_root);
/// Decrypt elements in config with specified encryption attributes
void decryptEncryptedElements(LoadedConfig & loaded_config);
#endif
void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root);
void merge(XMLDocumentPtr config, XMLDocumentPtr with);

View File

@ -110,9 +110,23 @@ namespace
}
else
{
Poco::AutoPtr<Poco::XML::Element> xml_key = xml_document->createElement(key);
parent_xml_node.appendChild(xml_key);
processNode(value_node, *xml_key);
if (key == "#text" && value_node.IsScalar())
{
for (Node * child_node = parent_xml_node.firstChild(); child_node; child_node = child_node->nextSibling())
if (child_node->nodeType() == Node::TEXT_NODE)
throw Exception(ErrorCodes::CANNOT_PARSE_YAML,
"YAMLParser has encountered node with several text nodes "
"and cannot continue parsing of the file");
std::string value = value_node.as<std::string>();
Poco::AutoPtr<Poco::XML::Text> xml_value = xml_document->createTextNode(value);
parent_xml_node.appendChild(xml_value);
}
else
{
Poco::AutoPtr<Poco::XML::Element> xml_key = xml_document->createElement(key);
parent_xml_node.appendChild(xml_key);
processNode(value_node, *xml_key);
}
}
}
break;

View File

@ -173,6 +173,8 @@
M(PartsInMemory, "In-memory parts.") \
M(MMappedFiles, "Total number of mmapped files.") \
M(MMappedFileBytes, "Sum size of mmapped file regions.") \
M(MMappedAllocs, "Total number of mmapped allocations") \
M(MMappedAllocBytes, "Sum bytes of mmapped allocations") \
M(AsynchronousReadWait, "Number of threads waiting for asynchronous read.") \
M(PendingAsyncInsert, "Number of asynchronous inserts that are waiting for flush.") \
M(KafkaConsumers, "Number of active Kafka consumers") \

View File

@ -8,7 +8,7 @@
* table, so it makes sense to pre-fault the pages so that page faults don't
* interrupt the resize loop. Set the allocator parameter accordingly.
*/
using HashTableAllocator = Allocator<true /* clear_memory */>;
using HashTableAllocator = Allocator<true /* clear_memory */, true /* mmap_populate */>;
template <size_t initial_bytes = 64>
using HashTableAllocatorWithStackMemory = AllocatorWithStackMemory<HashTableAllocator, initial_bytes>;

View File

@ -113,13 +113,19 @@ public:
if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
{
memcpy(&n[0], p, 8);
n[0] &= -1ULL >> s;
if constexpr (std::endian::native == std::endian::little)
n[0] &= -1ULL >> s;
else
n[0] &= -1ULL << s;
}
else
{
const char * lp = x.data + x.size - 8;
memcpy(&n[0], lp, 8);
n[0] >>= s;
if constexpr (std::endian::native == std::endian::little)
n[0] >>= s;
else
n[0] <<= s;
}
auto res = hash(k8);
auto buck = getBucketFromHash(res);
@ -131,7 +137,10 @@ public:
memcpy(&n[0], p, 8);
const char * lp = x.data + x.size - 8;
memcpy(&n[1], lp, 8);
n[1] >>= s;
if constexpr (std::endian::native == std::endian::little)
n[1] >>= s;
else
n[1] <<= s;
auto res = hash(k16);
auto buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder);
@ -142,7 +151,10 @@ public:
memcpy(&n[0], p, 16);
const char * lp = x.data + x.size - 8;
memcpy(&n[2], lp, 8);
n[2] >>= s;
if constexpr (std::endian::native == std::endian::little)
n[2] >>= s;
else
n[2] <<= s;
auto res = hash(k24);
auto buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder);

View File

@ -27,15 +27,9 @@ struct Interval
};
template <typename IntervalStorageType>
bool operator<(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
auto operator<=>(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
{
return std::tie(lhs.left, lhs.right) < std::tie(rhs.left, rhs.right);
}
template <typename IntervalStorageType>
bool operator<=(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
{
return std::tie(lhs.left, lhs.right) <= std::tie(rhs.left, rhs.right);
return std::tie(lhs.left, lhs.right) <=> std::tie(rhs.left, rhs.right);
}
template <typename IntervalStorageType>
@ -44,24 +38,6 @@ bool operator==(const Interval<IntervalStorageType> & lhs, const Interval<Interv
return std::tie(lhs.left, lhs.right) == std::tie(rhs.left, rhs.right);
}
template <typename IntervalStorageType>
bool operator!=(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
{
return std::tie(lhs.left, lhs.right) != std::tie(rhs.left, rhs.right);
}
template <typename IntervalStorageType>
bool operator>(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
{
return std::tie(lhs.left, lhs.right) > std::tie(rhs.left, rhs.right);
}
template <typename IntervalStorageType>
bool operator>=(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
{
return std::tie(lhs.left, lhs.right) >= std::tie(rhs.left, rhs.right);
}
struct IntervalTreeVoidValue
{
};

View File

@ -43,6 +43,17 @@ struct PreformattedMessage
operator const std::string & () const { return text; }
operator std::string () && { return std::move(text); }
operator fmt::format_string<> () const { UNREACHABLE(); }
void apply(std::string & out_text, std::string_view & out_format_string) const &
{
out_text = text;
out_format_string = format_string;
}
void apply(std::string & out_text, std::string_view & out_format_string) &&
{
out_text = std::move(text);
out_format_string = format_string;
}
};
template <typename... Args>
@ -99,10 +110,33 @@ template <typename T> constexpr std::string_view tryGetStaticFormatString(T && x
}
}
/// Constexpr ifs are not like ifdefs, and compiler still checks that unneeded code can be compiled
/// This template is useful to avoid compilation failures when condition of some "constexpr if" is false
template<bool enable> struct ConstexprIfsAreNotIfdefs
{
template <typename T> constexpr static std::string_view getStaticFormatString(T &&) { return {}; }
template <typename T> static PreformattedMessage getPreformatted(T &&) { return {}; }
};
template<> struct ConstexprIfsAreNotIfdefs<true>
{
template <typename T> consteval static std::string_view getStaticFormatString(T && x)
{
/// See tryGetStaticFormatString(...)
static_assert(!std::is_same_v<std::string, std::decay_t<T>>);
static_assert(std::is_nothrow_convertible<T, const char * const>::value);
static_assert(!std::is_pointer<T>::value);
return std::string_view(x);
}
template <typename T> static T && getPreformatted(T && x) { return std::forward<T>(x); }
};
template <typename... Ts> constexpr size_t numArgs(Ts &&...) { return sizeof...(Ts); }
template <typename T, typename... Ts> constexpr auto firstArg(T && x, Ts &&...) { return std::forward<T>(x); }
/// For implicit conversion of fmt::basic_runtime<> to char* for std::string ctor
template <typename T, typename... Ts> constexpr auto firstArg(fmt::basic_runtime<T> && data, Ts &&...) { return data.str.data(); }
template <typename T, typename... Ts> constexpr auto firstArg(const fmt::basic_runtime<T> & data, Ts &&...) { return data.str.data(); }
consteval ssize_t formatStringCountArgsNum(const char * const str, size_t len)
{
@ -142,26 +176,19 @@ consteval void formatStringCheckArgsNumImpl(std::string_view str, size_t nargs)
functionThatFailsCompilationOfConstevalFunctions("unexpected number of arguments in a format string");
}
template <typename... Args>
struct CheckArgsNumHelperImpl
template<typename T>
consteval void formatStringCheckArgsNum(T && str, size_t nargs)
{
template<typename T>
consteval CheckArgsNumHelperImpl(T && str)
{
formatStringCheckArgsNumImpl(tryGetStaticFormatString(str), sizeof...(Args));
}
/// No checks for fmt::runtime and PreformattedMessage
template<typename T> CheckArgsNumHelperImpl(fmt::basic_runtime<T> &&) {}
template<> CheckArgsNumHelperImpl(PreformattedMessage &) {}
template<> CheckArgsNumHelperImpl(const PreformattedMessage &) {}
template<> CheckArgsNumHelperImpl(PreformattedMessage &&) {}
};
template <typename... Args> using CheckArgsNumHelper = CheckArgsNumHelperImpl<std::type_identity_t<Args>...>;
template <typename... Args> void formatStringCheckArgsNum(CheckArgsNumHelper<Args...>, Args &&...) {}
formatStringCheckArgsNumImpl(tryGetStaticFormatString(str), nargs);
}
template<typename T> inline void formatStringCheckArgsNum(fmt::basic_runtime<T> &&, size_t) {}
template<> inline void formatStringCheckArgsNum(PreformattedMessage &, size_t) {}
template<> inline void formatStringCheckArgsNum(const PreformattedMessage &, size_t) {}
template<> inline void formatStringCheckArgsNum(PreformattedMessage &&, size_t) {}
template<typename T> struct FormatStringTypeInfo{ static constexpr bool is_static = true; static constexpr bool has_format = true; };
template<typename T> struct FormatStringTypeInfo<fmt::basic_runtime<T>> { static constexpr bool is_static = false; static constexpr bool has_format = false; };
template<> struct FormatStringTypeInfo<PreformattedMessage> { static constexpr bool is_static = false; static constexpr bool has_format = true; };
/// This wrapper helps to avoid too frequent and noisy log messages.
/// For each pair (logger_name, format_string) it remembers when such a message was logged the last time.

View File

@ -101,9 +101,6 @@ void ProgressIndication::writeFinalProgress()
<< formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.)";
else
std::cout << ". ";
auto peak_memory_usage = getMemoryUsage().peak;
if (peak_memory_usage >= 0)
std::cout << "\nPeak memory usage (for query) " << formatReadableSizeWithBinarySuffix(peak_memory_usage) << ".";
}
void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message)

View File

@ -82,3 +82,8 @@ endif()
clickhouse_add_executable (interval_tree interval_tree.cpp)
target_link_libraries (interval_tree PRIVATE dbms)
if (ENABLE_SSL)
clickhouse_add_executable (encrypt_decrypt encrypt_decrypt.cpp)
target_link_libraries (encrypt_decrypt PRIVATE dbms)
endif()

View File

@ -0,0 +1,61 @@
#include <Common/Config/ConfigProcessor.h>
#include <Compression/ICompressionCodec.h>
#include <Compression/CompressionCodecEncrypted.h>
#include <iostream>
/** This test program encrypts or decrypts text values using a symmetric encryption codec like AES_128_GCM_SIV or AES_256_GCM_SIV.
* Keys for codecs are loaded from <encryption_codecs> section of configuration file.
*
* How to use:
* ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV text_to_encrypt
*/
int main(int argc, char ** argv)
{
try
{
if (argc != 5)
{
std::cerr << "Usage:" << std::endl
<< " " << argv[0] << " path action codec value" << std::endl
<< "path: path to configuration file." << std::endl
<< "action: -e for encryption and -d for decryption." << std::endl
<< "codec: AES_128_GCM_SIV or AES_256_GCM_SIV." << std::endl << std::endl
<< "Example:" << std::endl
<< " ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV text_to_encrypt";
return 3;
}
std::string action = argv[2];
std::string codec_name = argv[3];
std::string value = argv[4];
DB::ConfigProcessor processor(argv[1], false, true);
auto loaded_config = processor.loadConfig();
DB::CompressionCodecEncrypted::Configuration::instance().tryLoad(*loaded_config.configuration, "encryption_codecs");
if (action == "-e")
std::cout << processor.encryptValue(codec_name, value) << std::endl;
else if (action == "-d")
std::cout << processor.decryptValue(codec_name, value) << std::endl;
else
std::cerr << "Unknown action: " << action << std::endl;
}
catch (Poco::Exception & e)
{
std::cerr << "Exception: " << e.displayText() << std::endl;
return 1;
}
catch (std::exception & e)
{
std::cerr << "std::exception: " << e.what() << std::endl;
return 3;
}
catch (...)
{
std::cerr << "Some exception" << std::endl;
return 2;
}
return 0;
}

View File

@ -1,7 +1,7 @@
#pragma once
/// Macros for convenient usage of Poco logger.
#include <unistd.h>
#include <fmt/format.h>
#include <Poco/Logger.h>
#include <Poco/Message.h>
@ -28,33 +28,86 @@ namespace
#define LOG_IMPL_FIRST_ARG(X, ...) X
/// Copy-paste from contrib/libpq/include/c.h
/// There's no easy way to count the number of arguments without evaluating these arguments...
#define CH_VA_ARGS_NARGS(...) \
CH_VA_ARGS_NARGS_(__VA_ARGS__, \
63,62,61,60, \
59,58,57,56,55,54,53,52,51,50, \
49,48,47,46,45,44,43,42,41,40, \
39,38,37,36,35,34,33,32,31,30, \
29,28,27,26,25,24,23,22,21,20, \
19,18,17,16,15,14,13,12,11,10, \
9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
#define CH_VA_ARGS_NARGS_( \
_01,_02,_03,_04,_05,_06,_07,_08,_09,_10, \
_11,_12,_13,_14,_15,_16,_17,_18,_19,_20, \
_21,_22,_23,_24,_25,_26,_27,_28,_29,_30, \
_31,_32,_33,_34,_35,_36,_37,_38,_39,_40, \
_41,_42,_43,_44,_45,_46,_47,_48,_49,_50, \
_51,_52,_53,_54,_55,_56,_57,_58,_59,_60, \
_61,_62,_63, N, ...) \
(N)
#define LINE_NUM_AS_STRING_IMPL2(x) #x
#define LINE_NUM_AS_STRING_IMPL(x) LINE_NUM_AS_STRING_IMPL2(x)
#define LINE_NUM_AS_STRING LINE_NUM_AS_STRING_IMPL(__LINE__)
#define MESSAGE_FOR_EXCEPTION_ON_LOGGING "Failed to write a log message: " __FILE__ ":" LINE_NUM_AS_STRING "\n"
/// Logs a message to a specified logger with that level.
/// If more than one argument is provided,
/// the first argument is interpreted as a template with {}-substitutions
/// and the latter arguments are treated as values to substitute.
/// If only one argument is provided, it is treated as a message without substitutions.
#define LOG_IMPL(logger, priority, PRIORITY, ...) do \
{ \
auto _logger = ::getLogger(logger); \
const bool _is_clients_log = (DB::CurrentThread::getGroup() != nullptr) && \
(DB::CurrentThread::get().getClientLogsLevel() >= (priority)); \
if (_is_clients_log || _logger->is((PRIORITY))) \
{ \
std::string formatted_message = numArgs(__VA_ARGS__) > 1 ? fmt::format(__VA_ARGS__) : firstArg(__VA_ARGS__); \
formatStringCheckArgsNum(__VA_ARGS__); \
if (auto _channel = _logger->getChannel()) \
{ \
std::string file_function; \
file_function += __FILE__; \
file_function += "; "; \
file_function += __PRETTY_FUNCTION__; \
Poco::Message poco_message(_logger->name(), formatted_message, \
(PRIORITY), file_function.c_str(), __LINE__, tryGetStaticFormatString(LOG_IMPL_FIRST_ARG(__VA_ARGS__))); \
_channel->log(poco_message); \
} \
ProfileEvents::incrementForLogMessage(PRIORITY); \
} \
#define LOG_IMPL(logger, priority, PRIORITY, ...) do \
{ \
auto _logger = ::getLogger(logger); \
const bool _is_clients_log = (DB::CurrentThread::getGroup() != nullptr) && \
(DB::CurrentThread::get().getClientLogsLevel() >= (priority)); \
if (!_is_clients_log && !_logger->is((PRIORITY))) \
break; \
\
try \
{ \
ProfileEvents::incrementForLogMessage(PRIORITY); \
auto _channel = _logger->getChannel(); \
if (!_channel) \
break; \
\
constexpr size_t _nargs = CH_VA_ARGS_NARGS(__VA_ARGS__); \
using LogTypeInfo = FormatStringTypeInfo<std::decay_t<decltype(LOG_IMPL_FIRST_ARG(__VA_ARGS__))>>; \
\
std::string_view _format_string; \
std::string _formatted_message; \
\
if constexpr (LogTypeInfo::is_static) \
{ \
formatStringCheckArgsNum(LOG_IMPL_FIRST_ARG(__VA_ARGS__), _nargs - 1); \
_format_string = ConstexprIfsAreNotIfdefs<LogTypeInfo::is_static>::getStaticFormatString(LOG_IMPL_FIRST_ARG(__VA_ARGS__)); \
} \
\
constexpr bool is_preformatted_message = !LogTypeInfo::is_static && LogTypeInfo::has_format; \
if constexpr (is_preformatted_message) \
{ \
static_assert(_nargs == 1 || !is_preformatted_message); \
ConstexprIfsAreNotIfdefs<is_preformatted_message>::getPreformatted(LOG_IMPL_FIRST_ARG(__VA_ARGS__)).apply(_formatted_message, _format_string); \
} \
else \
{ \
_formatted_message = _nargs == 1 ? firstArg(__VA_ARGS__) : fmt::format(__VA_ARGS__); \
} \
\
std::string _file_function = __FILE__ "; "; \
_file_function += __PRETTY_FUNCTION__; \
Poco::Message _poco_message(_logger->name(), std::move(_formatted_message), \
(PRIORITY), _file_function.c_str(), __LINE__, _format_string); \
_channel->log(_poco_message); \
} \
catch (...) \
{ \
::write(STDERR_FILENO, static_cast<const void *>(MESSAGE_FOR_EXCEPTION_ON_LOGGING), sizeof(MESSAGE_FOR_EXCEPTION_ON_LOGGING)); \
} \
} while (false)

View File

@ -1,6 +1,7 @@
#include <string>
#include <vector>
#include <Common/logger_useful.h>
#include <Common/thread_local_rng.h>
#include <gtest/gtest.h>
#include <Poco/Logger.h>
@ -50,3 +51,55 @@ TEST(Logger, TestLog)
}
}
static size_t global_counter = 0;
static std::string getLogMessage()
{
++global_counter;
return "test1 " + std::to_string(thread_local_rng());
}
static size_t getLogMessageParam()
{
++global_counter;
return thread_local_rng();
}
static PreformattedMessage getPreformatted()
{
++global_counter;
return PreformattedMessage::create("test3 {}", thread_local_rng());
}
static size_t getLogMessageParamOrThrow()
{
size_t x = thread_local_rng();
if (x % 1000 == 0)
return x;
throw Poco::Exception("error", 42);
}
TEST(Logger, SideEffects)
{
std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
auto my_channel = Poco::AutoPtr<Poco::StreamChannel>(new Poco::StreamChannel(oss));
auto * log = &Poco::Logger::create("Logger", my_channel.get());
log->setLevel("trace");
/// Ensure that parameters are evaluated only once
global_counter = 0;
LOG_TRACE(log, fmt::runtime(getLogMessage()));
EXPECT_EQ(global_counter, 1);
LOG_TRACE(log, "test2 {}", getLogMessageParam());
EXPECT_EQ(global_counter, 2);
LOG_TRACE(log, getPreformatted());
EXPECT_EQ(global_counter, 3);
auto var = PreformattedMessage::create("test4 {}", thread_local_rng());
LOG_TRACE(log, var);
EXPECT_EQ(var.text.starts_with("test4 "), true);
EXPECT_EQ(var.format_string, "test4 {}");
LOG_TRACE(log, "test no throw {}", getLogMessageParamOrThrow());
}

View File

@ -28,6 +28,17 @@ namespace DB
namespace ErrorCodes
{
extern const int OPENSSL_ERROR;
extern const int BAD_ARGUMENTS;
}
EncryptionMethod getEncryptionMethod(const std::string & name)
{
if (name == "AES_128_GCM_SIV")
return AES_128_GCM_SIV;
else if (name == "AES_256_GCM_SIV")
return AES_256_GCM_SIV;
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", name);
}
namespace
@ -63,7 +74,7 @@ uint8_t getMethodCode(EncryptionMethod Method)
}
else
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method));
}
}
@ -79,7 +90,6 @@ namespace ErrorCodes
{
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
extern const int INCORRECT_DATA;
}
@ -104,7 +114,7 @@ UInt64 methodKeySize(EncryptionMethod Method)
}
else
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method));
}
}
@ -129,7 +139,7 @@ auto getMethod(EncryptionMethod Method)
}
else
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method));
}
}
@ -205,7 +215,7 @@ auto getMethod(EncryptionMethod Method)
}
else
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method));
}
}
@ -578,7 +588,7 @@ String CompressionCodecEncrypted::Configuration::getKey(EncryptionMethod method,
if (current_params->keys_storage[method].contains(key_id))
key = current_params->keys_storage[method].at(key_id);
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no key {} in config", key_id);
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no key {} in config for {} encryption codec", key_id, getMethodName(method));
return key;
}

View File

@ -18,6 +18,9 @@ enum EncryptionMethod
MAX_ENCRYPTION_METHOD
};
/// Get method for string name. Throw exception for wrong name.
EncryptionMethod getEncryptionMethod(const std::string & name);
/** This codec encrypts and decrypts blocks with AES-128 in
* GCM-SIV mode (RFC-8452), which is the only cipher currently
* supported. Although it is implemented as a compression codec

View File

@ -659,7 +659,8 @@ class IColumn;
M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \
M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \
\
M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function `range` per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
M(UInt64, function_sleep_max_microseconds_per_block, 3000000, "Maximum number of microseconds the function `sleep` is allowed to sleep for each block. If a user called it with a larger value, it throws an exception. It is a safety threshold.", 0) \
M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \
\
M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::pread, "Method of reading data from storage file, one of: read, pread, mmap. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).", 0) \
@ -673,6 +674,7 @@ class IColumn;
M(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead of read with ignore.", 0) \
M(UInt64, merge_tree_min_bytes_per_task_for_remote_reading, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes to read per task.", 0) \
M(Bool, merge_tree_use_const_size_tasks_for_remote_reading, true, "Whether to use constant size tasks for reading from a remote table.", 0) \
M(Bool, merge_tree_determine_task_size_by_prewhere_columns, true, "Whether to use only prewhere columns size to determine reading task size.", 0) \
\
M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \
M(Bool, wait_for_async_insert, true, "If true wait for processing of asynchronous insertion", 0) \

View File

@ -80,6 +80,7 @@ namespace SettingsChangesHistory
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{
{"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}},
{"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."},
{"http_receive_timeout", 180, 30, "See http_send_timeout."}}},
{"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."},

View File

@ -292,7 +292,7 @@ void DatabaseWithOwnTablesBase::shutdown()
for (const auto & kv : tables_snapshot)
{
kv.second->flush();
kv.second->flushAndPrepareForShutdown();
}
for (const auto & kv : tables_snapshot)

View File

@ -9,7 +9,8 @@
#include <Common/assert_cast.h>
#include <base/sleep.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context_fwd.h>
#include <Interpreters/Context.h>
namespace ProfileEvents
{
@ -40,11 +41,17 @@ enum class FunctionSleepVariant
template <FunctionSleepVariant variant>
class FunctionSleep : public IFunction
{
private:
UInt64 max_microseconds;
public:
static constexpr auto name = variant == FunctionSleepVariant::PerBlock ? "sleep" : "sleepEachRow";
static FunctionPtr create(ContextPtr)
static FunctionPtr create(ContextPtr context)
{
return std::make_shared<FunctionSleep<variant>>(context->getSettingsRef().function_sleep_max_microseconds_per_block);
}
FunctionSleep(UInt64 max_microseconds_) : max_microseconds(max_microseconds_)
{
return std::make_shared<FunctionSleep<variant>>();
}
/// Get the name of the function.
@ -105,13 +112,19 @@ public:
if (size > 0)
{
/// When sleeping, the query cannot be cancelled. For ability to cancel query, we limit sleep time.
if (seconds > 3.0) /// The choice is arbitrary
throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is 3 seconds. Requested: {}", toString(seconds));
if (max_microseconds && seconds * 1e6 > max_microseconds)
throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is {} microseconds. Requested: {}", max_microseconds, seconds);
if (!dry_run)
{
UInt64 count = (variant == FunctionSleepVariant::PerBlock ? 1 : size);
UInt64 microseconds = static_cast<UInt64>(seconds * count * 1e6);
if (max_microseconds && microseconds > max_microseconds)
throw Exception(ErrorCodes::TOO_SLOW,
"The maximum sleep time is {} microseconds. Requested: {} microseconds per block (of size {})",
max_microseconds, microseconds, size);
sleepForMicroseconds(microseconds);
ProfileEvents::increment(ProfileEvents::SleepFunctionCalls, count);
ProfileEvents::increment(ProfileEvents::SleepFunctionMicroseconds, microseconds);

View File

@ -40,9 +40,10 @@ struct ReadProgress
UInt64 read_rows = 0;
UInt64 read_bytes = 0;
UInt64 total_rows_to_read = 0;
UInt64 total_bytes_to_read = 0;
ReadProgress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0)
: read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_) {}
ReadProgress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0, UInt64 total_bytes_to_read_ = 0)
: read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_), total_bytes_to_read(total_bytes_to_read_) {}
};
struct WriteProgress
@ -98,8 +99,8 @@ struct Progress
Progress() = default;
Progress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0)
: read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_) {}
Progress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0, UInt64 total_bytes_to_read_ = 0)
: read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_), total_bytes_to_read(total_bytes_to_read_) {}
explicit Progress(ReadProgress read_progress)
: read_rows(read_progress.read_rows), read_bytes(read_progress.read_bytes), total_rows_to_read(read_progress.total_rows_to_read) {}

View File

@ -42,7 +42,7 @@ void ReadBufferFromFileBase::setProgressCallback(ContextPtr context)
setProfileCallback([file_progress_callback](const ProfileInfo & progress)
{
file_progress_callback(FileProgress(progress.bytes_read, 0));
file_progress_callback(FileProgress(progress.bytes_read));
});
}

View File

@ -2020,7 +2020,8 @@ template <typename Method, bool use_compiled_functions, bool return_single_block
Aggregator::ConvertToBlockRes<return_single_block> NO_INLINE
Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t) const
{
const size_t max_block_size = params.max_block_size;
/// +1 for nullKeyData, if `data` doesn't have it - not a problem, just some memory for one excessive row will be preallocated
const size_t max_block_size = (return_single_block ? data.size() : std::min(params.max_block_size, data.size())) + 1;
const bool final = true;
ConvertToBlockRes<return_single_block> res;
@ -2097,7 +2098,8 @@ template <bool return_single_block, typename Method, typename Table>
Aggregator::ConvertToBlockRes<return_single_block> NO_INLINE
Aggregator::convertToBlockImplNotFinal(Method & method, Table & data, Arenas & aggregates_pools, size_t) const
{
const size_t max_block_size = params.max_block_size;
/// +1 for nullKeyData, if `data` doesn't have it - not a problem, just some memory for one excessive row will be preallocated
const size_t max_block_size = (return_single_block ? data.size() : std::min(params.max_block_size, data.size())) + 1;
const bool final = false;
ConvertToBlockRes<return_single_block> res;

View File

@ -21,6 +21,7 @@
#include <Core/BackgroundSchedulePool.h>
#include <Formats/FormatFactory.h>
#include <Databases/IDatabase.h>
#include <Server/ServerType.h>
#include <Storages/IStorage.h>
#include <Storages/MarkCache.h>
#include <Storages/MergeTree/MergeList.h>
@ -357,6 +358,9 @@ struct ContextSharedPart : boost::noncopyable
Context::ConfigReloadCallback config_reload_callback;
Context::StartStopServersCallback start_servers_callback;
Context::StartStopServersCallback stop_servers_callback;
bool is_server_completely_started = false;
#if USE_ROCKSDB
@ -3688,6 +3692,36 @@ void Context::reloadConfig() const
shared->config_reload_callback();
}
void Context::setStartServersCallback(StartStopServersCallback && callback)
{
/// Is initialized at server startup, so lock isn't required. Otherwise use mutex.
shared->start_servers_callback = std::move(callback);
}
void Context::setStopServersCallback(StartStopServersCallback && callback)
{
/// Is initialized at server startup, so lock isn't required. Otherwise use mutex.
shared->stop_servers_callback = std::move(callback);
}
void Context::startServers(const ServerType & server_type) const
{
/// Use mutex if callback may be changed after startup.
if (!shared->start_servers_callback)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't start servers because start_servers_callback is not set.");
shared->start_servers_callback(server_type);
}
void Context::stopServers(const ServerType & server_type) const
{
/// Use mutex if callback may be changed after startup.
if (!shared->stop_servers_callback)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't stop servers because stop_servers_callback is not set.");
shared->stop_servers_callback(server_type);
}
void Context::shutdown()
{

View File

@ -134,6 +134,7 @@ using StoragePolicyPtr = std::shared_ptr<const IStoragePolicy>;
using StoragePoliciesMap = std::map<String, StoragePolicyPtr>;
class StoragePolicySelector;
using StoragePolicySelectorPtr = std::shared_ptr<const StoragePolicySelector>;
class ServerType;
template <class Queue>
class MergeTreeBackgroundExecutor;
@ -1057,6 +1058,13 @@ public:
void setConfigReloadCallback(ConfigReloadCallback && callback);
void reloadConfig() const;
using StartStopServersCallback = std::function<void(const ServerType &)>;
void setStartServersCallback(StartStopServersCallback && callback);
void setStopServersCallback(StartStopServersCallback && callback);
void startServers(const ServerType & server_type) const;
void stopServers(const ServerType & server_type) const;
void shutdown();
bool isInternalQuery() const { return is_internal_query; }

View File

@ -349,6 +349,15 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
DatabasePtr database;
{
// Callers assume that this method doesn't throw exceptions, but getDatabaseName() will throw if there is no database part.
// So, fail early and gracefully...
if (!table_id.hasDatabase())
{
if (exception)
exception->emplace(Exception(ErrorCodes::UNKNOWN_DATABASE, "Empty database name"));
return {};
}
std::lock_guard lock{databases_mutex};
auto it = databases.find(table_id.getDatabaseName());
if (databases.end() == it)

View File

@ -361,7 +361,7 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query,
std::vector<std::pair<String, bool>> tables_to_drop;
for (auto iterator = database->getTablesIterator(table_context); iterator->isValid(); iterator->next())
{
iterator->table()->flush();
iterator->table()->flushAndPrepareForShutdown();
tables_to_drop.push_back({iterator->name(), iterator->table()->isDictionary()});
}

View File

@ -556,6 +556,14 @@ BlockIO InterpreterSystemQuery::execute()
);
break;
}
case Type::STOP_LISTEN:
getContext()->checkAccess(AccessType::SYSTEM_LISTEN);
getContext()->stopServers(query.server_type);
break;
case Type::START_LISTEN:
getContext()->checkAccess(AccessType::SYSTEM_LISTEN);
getContext()->startServers(query.server_type);
break;
case Type::FLUSH_ASYNC_INSERT_QUEUE:
{
getContext()->checkAccess(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE);
@ -567,9 +575,6 @@ BlockIO InterpreterSystemQuery::execute()
queue->flushAll();
break;
}
case Type::STOP_LISTEN_QUERIES:
case Type::START_LISTEN_QUERIES:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not supported yet", query.type);
case Type::STOP_THREAD_FUZZER:
getContext()->checkAccess(AccessType::SYSTEM_THREAD_FUZZER);
ThreadFuzzer::stop();
@ -1181,8 +1186,12 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
required_access.emplace_back(AccessType::SYSTEM_SYNC_FILE_CACHE);
break;
}
case Type::STOP_LISTEN_QUERIES:
case Type::START_LISTEN_QUERIES:
case Type::STOP_LISTEN:
case Type::START_LISTEN:
{
required_access.emplace_back(AccessType::SYSTEM_LISTEN);
break;
}
case Type::STOP_THREAD_FUZZER:
case Type::START_THREAD_FUZZER:
case Type::ENABLE_FAILPOINT:

View File

@ -220,6 +220,17 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &,
{
settings.ostr << (settings.hilite ? hilite_none : "");
}
else if (type == Type::START_LISTEN || type == Type::STOP_LISTEN)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " " << ServerType::serverTypeToString(server_type.type)
<< (settings.hilite ? hilite_none : "");
if (server_type.type == ServerType::CUSTOM)
{
settings.ostr << (settings.hilite ? hilite_identifier : "") << " " << backQuoteIfNeed(server_type.custom_name);
}
}
}

View File

@ -3,6 +3,7 @@
#include <Parsers/ASTQueryWithOnCluster.h>
#include <Parsers/IAST.h>
#include <Parsers/SyncReplicaMode.h>
#include <Server/ServerType.h>
#include "config.h"
@ -35,8 +36,8 @@ public:
#if USE_AWS_S3
DROP_S3_CLIENT_CACHE,
#endif
STOP_LISTEN_QUERIES,
START_LISTEN_QUERIES,
STOP_LISTEN,
START_LISTEN,
RESTART_REPLICAS,
RESTART_REPLICA,
RESTORE_REPLICA,
@ -116,6 +117,8 @@ public:
SyncReplicaMode sync_replica_mode = SyncReplicaMode::DEFAULT;
ServerType server_type;
String getID(char) const override { return "SYSTEM query"; }
ASTPtr clone() const override

View File

@ -442,6 +442,42 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
break;
}
case Type::START_LISTEN:
case Type::STOP_LISTEN:
{
if (!parseQueryWithOnCluster(res, pos, expected))
return false;
ServerType::Type current_type = ServerType::Type::END;
std::string current_custom_name;
for (const auto & type : magic_enum::enum_values<ServerType::Type>())
{
if (ParserKeyword{ServerType::serverTypeToString(type)}.ignore(pos, expected))
{
current_type = type;
break;
}
}
if (current_type == ServerType::Type::END)
return false;
if (current_type == ServerType::CUSTOM)
{
ASTPtr ast;
if (!ParserStringLiteral{}.parse(pos, ast, expected))
return false;
current_custom_name = ast->as<ASTLiteral &>().value.get<const String &>();
}
res->server_type = ServerType(current_type, current_custom_name);
break;
}
default:
{
if (!parseQueryWithOnCluster(res, pos, expected))

View File

@ -3,8 +3,8 @@ set(SRCS)
clickhouse_add_executable(lexer lexer.cpp ${SRCS})
target_link_libraries(lexer PRIVATE clickhouse_parsers)
clickhouse_add_executable(select_parser select_parser.cpp ${SRCS})
clickhouse_add_executable(select_parser select_parser.cpp ${SRCS} "../../Server/ServerType.cpp")
target_link_libraries(select_parser PRIVATE clickhouse_parsers)
clickhouse_add_executable(create_parser create_parser.cpp ${SRCS})
clickhouse_add_executable(create_parser create_parser.cpp ${SRCS} "../../Server/ServerType.cpp")
target_link_libraries(create_parser PRIVATE clickhouse_parsers)

View File

@ -56,6 +56,9 @@ static void executeJob(ExecutingGraph::Node * node, ReadProgressCallback * read_
if (read_progress->counters.total_rows_approx)
read_progress_callback->addTotalRowsApprox(read_progress->counters.total_rows_approx);
if (read_progress->counters.total_bytes)
read_progress_callback->addTotalBytes(read_progress->counters.total_bytes);
if (!read_progress_callback->onProgress(read_progress->counters.read_rows, read_progress->counters.read_bytes, read_progress->limits))
node->processor->cancel();
}

View File

@ -85,7 +85,7 @@ private:
size_t num_errors = 0;
BlockMissingValues block_missing_values;
size_t approx_bytes_read_for_chunk;
size_t approx_bytes_read_for_chunk = 0;
};
}

View File

@ -50,7 +50,7 @@ private:
int record_batch_current = 0;
BlockMissingValues block_missing_values;
size_t approx_bytes_read_for_chunk;
size_t approx_bytes_read_for_chunk = 0;
const FormatSettings format_settings;

View File

@ -67,7 +67,7 @@ protected:
Serializations serializations;
std::unique_ptr<JSONColumnsReaderBase> reader;
BlockMissingValues block_missing_values;
size_t approx_bytes_read_for_chunk;
size_t approx_bytes_read_for_chunk = 0;
};

View File

@ -66,7 +66,7 @@ private:
std::unique_ptr<NativeReader> reader;
Block header;
BlockMissingValues block_missing_values;
size_t approx_bytes_read_for_chunk;
size_t approx_bytes_read_for_chunk = 0;
};
class NativeOutputFormat final : public IOutputFormat

View File

@ -52,7 +52,7 @@ private:
std::vector<int> include_indices;
BlockMissingValues block_missing_values;
size_t approx_bytes_read_for_chunk;
size_t approx_bytes_read_for_chunk = 0;
const FormatSettings format_settings;
const std::unordered_set<int> & skip_stripes;

View File

@ -202,7 +202,7 @@ private:
const size_t max_block_size;
BlockMissingValues last_block_missing_values;
size_t last_approx_bytes_read_for_chunk;
size_t last_approx_bytes_read_for_chunk = 0;
/// Non-atomic because it is used in one thread.
std::optional<size_t> next_block_in_current_unit;

View File

@ -273,7 +273,7 @@ private:
std::unique_ptr<ThreadPool> pool;
BlockMissingValues previous_block_missing_values;
size_t previous_approx_bytes_read_for_chunk;
size_t previous_approx_bytes_read_for_chunk = 0;
std::exception_ptr background_exception = nullptr;
std::atomic<int> is_stopped{0};

View File

@ -96,7 +96,7 @@ private:
Serializations serializations;
BlockMissingValues block_missing_values;
size_t approx_bytes_read_for_chunk;
size_t approx_bytes_read_for_chunk = 0;
};
class ValuesSchemaReader : public IRowSchemaReader

View File

@ -343,6 +343,7 @@ public:
uint64_t read_rows = 0;
uint64_t read_bytes = 0;
uint64_t total_rows_approx = 0;
uint64_t total_bytes = 0;
};
struct ReadProgress

View File

@ -43,6 +43,7 @@ public:
std::optional<ReadProgress> getReadProgress() final;
void addTotalRowsApprox(size_t value) { read_progress.total_rows_approx += value; }
void addTotalBytes(size_t value) { read_progress.total_bytes += value; }
};
using SourcePtr = std::shared_ptr<ISource>;

View File

@ -54,7 +54,7 @@ QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines
bool JoinStep::allowPushDownToRight() const
{
return join->pipelineType() == JoinPipelineType::YShaped;
return join->pipelineType() == JoinPipelineType::YShaped || join->pipelineType() == JoinPipelineType::FillRightFirst;
}
void JoinStep::describePipeline(FormatSettings & settings) const

View File

@ -341,6 +341,10 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
if (table_join.kind() != JoinKind::Inner && table_join.kind() != JoinKind::Cross && table_join.kind() != kind)
return 0;
/// There is no ASOF Right join, so we're talking about pushing to the right side
if (kind == JoinKind::Right && table_join.strictness() == JoinStrictness::Asof)
return 0;
bool is_left = kind == JoinKind::Left;
const auto & input_header = is_left ? child->getInputStreams().front().header : child->getInputStreams().back().header;
const auto & res_header = child->getOutputStream().header;

View File

@ -137,6 +137,69 @@ static bool checkAllPartsOnRemoteFS(const RangesInDataParts & parts)
return true;
}
/// build sort description for output stream
static void updateSortDescriptionForOutputStream(
DataStream & output_stream, const Names & sorting_key_columns, const int sort_direction, InputOrderInfoPtr input_order_info, PrewhereInfoPtr prewhere_info)
{
/// Updating sort description can be done after PREWHERE actions are applied to the header.
/// Aftert PREWHERE actions are applied, column names in header can differ from storage column names due to aliases
/// To mitigate it, we're trying to build original header and use it to deduce sorting description
/// TODO: this approach is fragile, it'd be more robust to update sorting description for the whole plan during plan optimization
Block original_header = output_stream.header.cloneEmpty();
if (prewhere_info)
{
if (prewhere_info->prewhere_actions)
{
FindOriginalNodeForOutputName original_column_finder(prewhere_info->prewhere_actions);
for (auto & column : original_header)
{
const auto * original_node = original_column_finder.find(column.name);
if (original_node)
column.name = original_node->result_name;
}
}
if (prewhere_info->row_level_filter)
{
FindOriginalNodeForOutputName original_column_finder(prewhere_info->row_level_filter);
for (auto & column : original_header)
{
const auto * original_node = original_column_finder.find(column.name);
if (original_node)
column.name = original_node->result_name;
}
}
}
SortDescription sort_description;
const Block & header = output_stream.header;
for (const auto & sorting_key : sorting_key_columns)
{
const auto it = std::find_if(
original_header.begin(), original_header.end(), [&sorting_key](const auto & column) { return column.name == sorting_key; });
if (it == original_header.end())
break;
const size_t column_pos = std::distance(original_header.begin(), it);
sort_description.emplace_back((header.begin() + column_pos)->name, sort_direction);
}
if (!sort_description.empty())
{
if (input_order_info)
{
output_stream.sort_scope = DataStream::SortScope::Stream;
const size_t used_prefix_of_sorting_key_size = input_order_info->used_prefix_of_sorting_key_size;
if (sort_description.size() > used_prefix_of_sorting_key_size)
sort_description.resize(used_prefix_of_sorting_key_size);
}
else
output_stream.sort_scope = DataStream::SortScope::Chunk;
}
output_stream.sort_description = std::move(sort_description);
}
void ReadFromMergeTree::AnalysisResult::checkLimits(const Settings & settings, const SelectQueryInfo & query_info_) const
{
@ -250,33 +313,12 @@ ReadFromMergeTree::ReadFromMergeTree(
/// Add explicit description.
setStepDescription(data.getStorageID().getFullNameNotQuoted());
{ /// build sort description for output stream
SortDescription sort_description;
const Names & sorting_key_columns = metadata_for_reading->getSortingKeyColumns();
const Block & header = output_stream->header;
const int sort_direction = getSortDirection();
for (const auto & column_name : sorting_key_columns)
{
if (std::find_if(header.begin(), header.end(), [&](ColumnWithTypeAndName const & col) { return col.name == column_name; })
== header.end())
break;
sort_description.emplace_back(column_name, sort_direction);
}
if (!sort_description.empty())
{
if (query_info.getInputOrderInfo())
{
output_stream->sort_scope = DataStream::SortScope::Stream;
const size_t used_prefix_of_sorting_key_size = query_info.getInputOrderInfo()->used_prefix_of_sorting_key_size;
if (sort_description.size() > used_prefix_of_sorting_key_size)
sort_description.resize(used_prefix_of_sorting_key_size);
}
else
output_stream->sort_scope = DataStream::SortScope::Chunk;
}
output_stream->sort_description = std::move(sort_description);
}
updateSortDescriptionForOutputStream(
*output_stream,
storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(),
getSortDirection(),
query_info.getInputOrderInfo(),
prewhere_info);
}
@ -1564,6 +1606,12 @@ void ReadFromMergeTree::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info
prewhere_info_value,
data.getPartitionValueType(),
virt_column_names)};
updateSortDescriptionForOutputStream(
*output_stream,
storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(),
getSortDirection(),
query_info.getInputOrderInfo(),
prewhere_info);
}
bool ReadFromMergeTree::requestOutputEachPartitionThroughSeparatePort()

View File

@ -77,6 +77,8 @@ std::optional<Chunk> RemoteSource::tryGenerate()
{
if (value.total_rows_to_read)
addTotalRowsApprox(value.total_rows_to_read);
if (value.total_bytes_to_read)
addTotalBytes(value.total_bytes_to_read);
progress(value.read_rows, value.read_bytes);
});

View File

@ -63,6 +63,18 @@ bool ReadProgressCallback::onProgress(uint64_t read_rows, uint64_t read_bytes, c
process_list_elem->updateProgressIn(total_rows_progress);
}
size_t bytes = 0;
if ((bytes = total_bytes.exchange(0)) != 0)
{
Progress total_bytes_progress = {0, 0, 0, bytes};
if (progress_callback)
progress_callback(total_bytes_progress);
if (process_list_elem)
process_list_elem->updateProgressIn(total_bytes_progress);
}
Progress value {read_rows, read_bytes};
if (progress_callback)

View File

@ -23,6 +23,7 @@ public:
void setProcessListElement(QueryStatusPtr elem);
void setProgressCallback(const ProgressCallback & callback) { progress_callback = callback; }
void addTotalRowsApprox(size_t value) { total_rows_approx += value; }
void addTotalBytes(size_t value) { total_bytes += value; }
/// Skip updating profile events.
/// For merges in mutations it may need special logic, it's done inside ProgressCallback.
@ -37,6 +38,8 @@ private:
/// The approximate total number of rows to read. For progress bar.
std::atomic_size_t total_rows_approx = 0;
/// The total number of bytes to read. For progress bar.
std::atomic_size_t total_bytes = 0;
std::mutex limits_and_quotas_mutex;
Stopwatch total_stopwatch{CLOCK_MONOTONIC_COARSE}; /// Including waiting time

View File

@ -591,8 +591,8 @@ void RemoteQueryExecutor::finish()
/// Send the request to abort the execution of the request, if not already sent.
tryCancel("Cancelling query because enough data has been read");
/// If connections weren't created yet or query wasn't sent, nothing to do.
if (!connections || !sent_query)
/// If connections weren't created yet, query wasn't sent or was already finished, nothing to do.
if (!connections || !sent_query || finished)
return;
/// Get the remaining packets so that there is no out of sync in the connections to the replicas.

138
src/Server/ServerType.cpp Normal file
View File

@ -0,0 +1,138 @@
#include <Server/ServerType.h>
#include <vector>
#include <algorithm>
#include <base/types.h>
#include <magic_enum.hpp>
namespace DB
{
namespace
{
std::vector<std::string> getTypeIndexToTypeName()
{
constexpr std::size_t types_size = magic_enum::enum_count<ServerType::Type>();
std::vector<std::string> type_index_to_type_name;
type_index_to_type_name.resize(types_size);
auto entries = magic_enum::enum_entries<ServerType::Type>();
for (const auto & [entry, str] : entries)
{
auto str_copy = String(str);
std::replace(str_copy.begin(), str_copy.end(), '_', ' ');
type_index_to_type_name[static_cast<UInt64>(entry)] = std::move(str_copy);
}
return type_index_to_type_name;
}
}
const char * ServerType::serverTypeToString(ServerType::Type type)
{
/** During parsing if SystemQuery is not parsed properly it is added to Expected variants as description check IParser.h.
* Description string must be statically allocated.
*/
static std::vector<std::string> type_index_to_type_name = getTypeIndexToTypeName();
const auto & type_name = type_index_to_type_name[static_cast<UInt64>(type)];
return type_name.data();
}
bool ServerType::shouldStart(Type server_type, const std::string & custom_name_) const
{
if (type == Type::QUERIES_ALL)
return true;
if (type == Type::QUERIES_DEFAULT)
{
switch (server_type)
{
case Type::TCP:
case Type::TCP_WITH_PROXY:
case Type::TCP_SECURE:
case Type::HTTP:
case Type::HTTPS:
case Type::MYSQL:
case Type::GRPC:
case Type::POSTGRESQL:
case Type::PROMETHEUS:
case Type::INTERSERVER_HTTP:
case Type::INTERSERVER_HTTPS:
return true;
default:
return false;
}
}
if (type == Type::QUERIES_CUSTOM)
{
switch (server_type)
{
case Type::CUSTOM:
return true;
default:
return false;
}
}
return type == server_type && custom_name == custom_name_;
}
bool ServerType::shouldStop(const std::string & port_name) const
{
Type port_type;
std::string port_custom_name;
if (port_name == "http_port")
port_type = Type::HTTP;
else if (port_name == "https_port")
port_type = Type::HTTPS;
else if (port_name == "tcp_port")
port_type = Type::TCP;
else if (port_name == "tcp_with_proxy_port")
port_type = Type::TCP_WITH_PROXY;
else if (port_name == "tcp_port_secure")
port_type = Type::TCP_SECURE;
else if (port_name == "mysql_port")
port_type = Type::MYSQL;
else if (port_name == "postgresql_port")
port_type = Type::POSTGRESQL;
else if (port_name == "grpc_port")
port_type = Type::GRPC;
else if (port_name == "prometheus.port")
port_type = Type::PROMETHEUS;
else if (port_name == "interserver_http_port")
port_type = Type::INTERSERVER_HTTP;
else if (port_name == "interserver_https_port")
port_type = Type::INTERSERVER_HTTPS;
else if (port_name.starts_with("protocols.") && port_name.ends_with(".port"))
{
constexpr size_t protocols_size = std::string_view("protocols.").size();
constexpr size_t port_size = std::string_view("protocols.").size();
port_type = Type::CUSTOM;
port_custom_name = port_name.substr(protocols_size, port_name.size() - port_size);
}
else
port_type = Type::UNKNOWN;
if (port_type == Type::UNKNOWN)
return false;
return shouldStart(type, port_custom_name);
}
}

44
src/Server/ServerType.h Normal file
View File

@ -0,0 +1,44 @@
#pragma once
#include <base/types.h>
namespace DB
{
class ServerType
{
public:
enum Type
{
UNKNOWN,
TCP,
TCP_WITH_PROXY,
TCP_SECURE,
HTTP,
HTTPS,
MYSQL,
GRPC,
POSTGRESQL,
PROMETHEUS,
CUSTOM,
INTERSERVER_HTTP,
INTERSERVER_HTTPS,
QUERIES_ALL,
QUERIES_DEFAULT,
QUERIES_CUSTOM,
END
};
ServerType() = default;
explicit ServerType(Type type_, const std::string & custom_name_ = "") : type(type_), custom_name(custom_name_) {}
static const char * serverTypeToString(Type type);
bool shouldStart(Type server_type, const std::string & custom_name_ = "") const;
bool shouldStop(const std::string & port_name) const;
Type type;
std::string custom_name;
};
}

View File

@ -57,7 +57,23 @@ public:
~HDFSBuilderWrapper() { hdfsFreeBuilder(hdfs_builder); }
HDFSBuilderWrapper(const HDFSBuilderWrapper &) = delete;
HDFSBuilderWrapper(HDFSBuilderWrapper &&) = default;
HDFSBuilderWrapper & operator=(const HDFSBuilderWrapper &) = delete;
HDFSBuilderWrapper(HDFSBuilderWrapper && other) noexcept
{
*this = std::move(other);
}
HDFSBuilderWrapper & operator=(HDFSBuilderWrapper && other) noexcept
{
std::swap(hdfs_builder, other.hdfs_builder);
config_stor = std::move(other.config_stor);
hadoop_kerberos_keytab = std::move(other.hadoop_kerberos_keytab);
hadoop_kerberos_principal = std::move(other.hadoop_kerberos_principal);
hadoop_security_kerberos_ticket_cache_path = std::move(other.hadoop_security_kerberos_ticket_cache_path);
need_kinit = std::move(other.need_kinit);
return *this;
}
hdfsBuilder * get() { return hdfs_builder; }

View File

@ -3,6 +3,7 @@
#if USE_HDFS
#include <Storages/HDFS/HDFSCommon.h>
#include <IO/ResourceGuard.h>
#include <IO/Progress.h>
#include <Common/Throttler.h>
#include <Common/safe_cast.h>
#include <hdfs/hdfs.h>
@ -42,19 +43,23 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
off_t file_offset = 0;
off_t read_until_position = 0;
std::optional<size_t> file_size;
explicit ReadBufferFromHDFSImpl(
const std::string & hdfs_uri_,
const std::string & hdfs_file_path_,
const Poco::Util::AbstractConfiguration & config_,
const ReadSettings & read_settings_,
size_t read_until_position_,
bool use_external_buffer_)
bool use_external_buffer_,
std::optional<size_t> file_size_)
: BufferWithOwnMemory<SeekableReadBuffer>(use_external_buffer_ ? 0 : read_settings_.remote_fs_buffer_size)
, hdfs_uri(hdfs_uri_)
, hdfs_file_path(hdfs_file_path_)
, builder(createHDFSBuilder(hdfs_uri_, config_))
, read_settings(read_settings_)
, read_until_position(read_until_position_)
, file_size(file_size_)
{
fs = createHDFSFS(builder.get());
fin = hdfsOpenFile(fs.get(), hdfs_file_path.c_str(), O_RDONLY, 0, 0, 0);
@ -70,12 +75,16 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
hdfsCloseFile(fs.get(), fin);
}
size_t getFileSize() const
size_t getFileSize()
{
if (file_size)
return *file_size;
auto * file_info = hdfsGetPathInfo(fs.get(), hdfs_file_path.c_str());
if (!file_info)
throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for: {}", hdfs_file_path);
return file_info->mSize;
file_size = static_cast<size_t>(file_info->mSize);
return *file_size;
}
bool nextImpl() override
@ -156,10 +165,11 @@ ReadBufferFromHDFS::ReadBufferFromHDFS(
const Poco::Util::AbstractConfiguration & config_,
const ReadSettings & read_settings_,
size_t read_until_position_,
bool use_external_buffer_)
bool use_external_buffer_,
std::optional<size_t> file_size_)
: ReadBufferFromFileBase(read_settings_.remote_fs_buffer_size, nullptr, 0)
, impl(std::make_unique<ReadBufferFromHDFSImpl>(
hdfs_uri_, hdfs_file_path_, config_, read_settings_, read_until_position_, use_external_buffer_))
hdfs_uri_, hdfs_file_path_, config_, read_settings_, read_until_position_, use_external_buffer_, file_size_))
, use_external_buffer(use_external_buffer_)
{
}

View File

@ -29,7 +29,8 @@ public:
const Poco::Util::AbstractConfiguration & config_,
const ReadSettings & read_settings_,
size_t read_until_position_ = 0,
bool use_external_buffer = false);
bool use_external_buffer = false,
std::optional<size_t> file_size = std::nullopt);
~ReadBufferFromHDFS() override;

View File

@ -30,7 +30,6 @@
#include <Storages/PartitionedSink.h>
#include <Storages/getVirtualsForStorage.h>
#include <Storages/checkAndGetLiteralArgument.h>
#include <Storages/ReadFromStorageProgress.h>
#include <Formats/ReadSchemaUtils.h>
#include <Formats/FormatFactory.h>
@ -367,8 +366,13 @@ public:
{
const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
uris = getPathsList(path_from_uri, uri_without_path, context_);
auto file_progress_callback = context_->getFileProgressCallback();
for (auto & elem : uris)
{
elem.path = uri_without_path + elem.path;
if (file_progress_callback && elem.info)
file_progress_callback(FileProgress(0, elem.info->size));
}
uris_iter = uris.begin();
}
@ -389,37 +393,54 @@ private:
std::vector<StorageHDFS::PathWithInfo>::iterator uris_iter;
};
class HDFSSource::URISIterator::Impl
class HDFSSource::URISIterator::Impl : WithContext
{
public:
explicit Impl(const std::vector<String> & uris_, ContextPtr context)
explicit Impl(const std::vector<String> & uris_, ContextPtr context_)
: WithContext(context_), uris(uris_), file_progress_callback(context_->getFileProgressCallback())
{
auto path_and_uri = getPathFromUriAndUriWithoutPath(uris_[0]);
HDFSBuilderWrapper builder = createHDFSBuilder(path_and_uri.second + "/", context->getGlobalContext()->getConfigRef());
auto fs = createHDFSFS(builder.get());
for (const auto & uri : uris_)
if (!uris.empty())
{
path_and_uri = getPathFromUriAndUriWithoutPath(uri);
if (!hdfsExists(fs.get(), path_and_uri.first.c_str()))
uris.push_back(uri);
auto path_and_uri = getPathFromUriAndUriWithoutPath(uris[0]);
builder = createHDFSBuilder(path_and_uri.second + "/", getContext()->getGlobalContext()->getConfigRef());
fs = createHDFSFS(builder.get());
}
uris_iter = uris.begin();
}
StorageHDFS::PathWithInfo next()
{
std::lock_guard lock(mutex);
if (uris_iter == uris.end())
return {"", {}};
auto key = *uris_iter;
++uris_iter;
return {key, {}};
String uri;
hdfsFileInfo * hdfs_info;
do
{
size_t current_index = index.fetch_add(1);
if (current_index >= uris.size())
return {"", {}};
uri = uris[current_index];
auto path_and_uri = getPathFromUriAndUriWithoutPath(uri);
hdfs_info = hdfsGetPathInfo(fs.get(), path_and_uri.first.c_str());
}
/// Skip non-existed files.
while (!hdfs_info && String(hdfsGetLastError()).find("FileNotFoundException") != std::string::npos);
std::optional<StorageHDFS::PathInfo> info;
if (hdfs_info)
{
info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast<size_t>(hdfs_info->mSize)};
if (file_progress_callback)
file_progress_callback(FileProgress(0, hdfs_info->mSize));
}
return {uri, info};
}
private:
std::mutex mutex;
std::atomic_size_t index = 0;
Strings uris;
Strings::iterator uris_iter;
HDFSBuilderWrapper builder;
HDFSFSPtr fs;
std::function<void(FileProgress)> file_progress_callback;
};
HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(ContextPtr context_, const String & uri)
@ -456,7 +477,7 @@ HDFSSource::HDFSSource(
UInt64 max_block_size_,
std::shared_ptr<IteratorWrapper> file_iterator_,
ColumnsDescription columns_description_)
: ISource(getHeader(block_for_format_, requested_virtual_columns_))
: ISource(getHeader(block_for_format_, requested_virtual_columns_), false)
, WithContext(context_)
, storage(std::move(storage_))
, block_for_format(block_for_format_)
@ -482,13 +503,17 @@ bool HDFSSource::initialize()
continue;
current_path = path_with_info.path;
std::optional<size_t> file_size;
if (path_with_info.info)
file_size = path_with_info.info->size;
const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_path);
auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method);
auto impl = std::make_unique<ReadBufferFromHDFS>(
uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings());
uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings(), 0, false, file_size);
if (!skip_empty_files || !impl->eof())
{
impl->setProgressCallback(getContext());
const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max;
read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast<int>(zstd_window_log_max));
break;
@ -497,14 +522,6 @@ bool HDFSSource::initialize()
current_path = path_with_info.path;
if (path_with_info.info && path_with_info.info->size)
{
/// Adjust total_rows_approx_accumulated with new total size.
if (total_files_size)
total_rows_approx_accumulated = static_cast<size_t>(std::ceil(static_cast<double>(total_files_size + path_with_info.info->size) / total_files_size * total_rows_approx_accumulated));
total_files_size += path_with_info.info->size;
}
input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size);
QueryPipelineBuilder builder;
@ -542,14 +559,8 @@ Chunk HDFSSource::generate()
{
Columns columns = chunk.getColumns();
UInt64 num_rows = chunk.getNumRows();
if (num_rows && total_files_size)
{
size_t chunk_size = input_format->getApproxBytesReadForChunk();
if (!chunk_size)
chunk_size = chunk.bytes();
updateRowsProgressApprox(*this, num_rows, chunk_size, total_files_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max);
}
size_t chunk_size = input_format->getApproxBytesReadForChunk();
progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
for (const auto & virtual_column : requested_virtual_columns)
{

View File

@ -169,11 +169,6 @@ private:
std::unique_ptr<PullingPipelineExecutor> reader;
String current_path;
UInt64 total_rows_approx_max = 0;
size_t total_rows_count_times = 0;
UInt64 total_rows_approx_accumulated = 0;
size_t total_files_size = 0;
/// Recreate ReadBuffer and PullingPipelineExecutor for each file.
bool initialize();
};

View File

@ -553,15 +553,15 @@ public:
/**
* If the storage requires some complicated work on destroying,
* then you have two virtual methods:
* - flush()
* - flushAndPrepareForShutdown()
* - shutdown()
*
* @see shutdown()
* @see flush()
* @see flushAndPrepareForShutdown()
*/
void flushAndShutdown()
{
flush();
flushAndPrepareForShutdown();
shutdown();
}
@ -574,7 +574,7 @@ public:
/// Called before shutdown() to flush data to underlying storage
/// Data in memory need to be persistent
virtual void flush() {}
virtual void flushAndPrepareForShutdown() {}
/// Asks table to stop executing some action identified by action_type
/// If table does not support such type of lock, and empty lock is returned

View File

@ -203,6 +203,8 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write
sendPartFromMemory(part, out, send_projections);
else
sendPartFromDisk(part, out, client_protocol_version, false, send_projections);
data.addLastSentPart(part->info);
}
catch (const NetException &)
{

View File

@ -5693,6 +5693,10 @@ bool MergeTreeData::supportsLightweightDelete() const
auto lock = lockParts();
for (const auto & part : data_parts_by_info)
{
if (part->getState() == MergeTreeDataPartState::Outdated
|| part->getState() == MergeTreeDataPartState::Deleting)
continue;
if (!part->supportLightweightDeleteMutate())
return false;
}

View File

@ -328,7 +328,10 @@ MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInf
for (const auto & range : part.ranges)
part_info->sum_marks += range.end - range.begin;
part_info->approx_size_of_mark = getApproximateSizeOfGranule(*part_info->data_part, column_names);
const auto & columns = settings.merge_tree_determine_task_size_by_prewhere_columns && prewhere_info
? prewhere_info->prewhere_actions->getRequiredColumnsNames()
: column_names;
part_info->approx_size_of_mark = getApproximateSizeOfGranule(*part_info->data_part, columns);
const auto task_columns = getReadTaskColumns(
part_reader_info,
@ -369,9 +372,9 @@ MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInf
}
if (prewhere_info)
{
for (const auto & columns : task_columns.pre_columns)
for (const auto & cols : task_columns.pre_columns)
{
for (const auto & col : columns)
for (const auto & col : cols)
{
const size_t col_size = part.data_part->getColumnSize(col.name).data_compressed;
part_info->estimated_memory_usage_for_single_prefetch += std::min<size_t>(col_size, settings.prefetch_buffer_size);

View File

@ -73,8 +73,10 @@ MergeTreeReadPool::MergeTreeReadPool(
size_t total_marks = 0;
for (const auto & part : parts_ranges)
{
total_compressed_bytes += getApproxSizeOfPart(
*part.data_part, prewhere_info ? prewhere_info->prewhere_actions->getRequiredColumnsNames() : column_names_);
const auto & columns = settings.merge_tree_determine_task_size_by_prewhere_columns && prewhere_info
? prewhere_info->prewhere_actions->getRequiredColumnsNames()
: column_names_;
total_compressed_bytes += getApproxSizeOfPart(*part.data_part, columns);
total_marks += part.getMarksCount();
}

View File

@ -119,6 +119,7 @@ struct Settings;
M(Bool, detach_not_byte_identical_parts, false, "Do not remove non byte-idential parts for ReplicatedMergeTree, instead detach them (maybe useful for further analysis).", 0) \
M(UInt64, max_replicated_fetches_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \
M(UInt64, max_replicated_sends_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \
M(Milliseconds, wait_for_unique_parts_send_before_shutdown_ms, 0, "Before shutdown table will wait for required amount time for unique parts (exist only on current replica) to be fetched by other replicas (0 means disabled).", 0) \
\
/** Check delay of replicas settings. */ \
M(UInt64, min_relative_delay_to_measure, 120, "Calculate relative replica delay only if absolute delay is not less that this value.", 0) \

View File

@ -576,7 +576,7 @@ int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper
/// It's ok if replica became readonly due to connection loss after we got current zookeeper (in this case zookeeper must be expired).
/// And it's ok if replica became readonly after shutdown.
/// In other cases it's likely that someone called pullLogsToQueue(...) when queue is not initialized yet by RestartingThread.
bool not_completely_initialized = storage.is_readonly && !zookeeper->expired() && !storage.shutdown_called;
bool not_completely_initialized = storage.is_readonly && !zookeeper->expired() && !storage.shutdown_prepared_called;
if (not_completely_initialized)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Tried to pull logs to queue (reason: {}) on readonly replica {}, it's a bug",
reason, storage.getStorageID().getNameForLogs());

View File

@ -329,7 +329,7 @@ void ReplicatedMergeTreeRestartingThread::activateReplica()
void ReplicatedMergeTreeRestartingThread::partialShutdown(bool part_of_full_shutdown)
{
setReadonly(part_of_full_shutdown);
setReadonly(/* on_shutdown = */ part_of_full_shutdown);
storage.partialShutdown();
}
@ -339,10 +339,15 @@ void ReplicatedMergeTreeRestartingThread::shutdown(bool part_of_full_shutdown)
/// Stop restarting_thread before stopping other tasks - so that it won't restart them again.
need_stop = true;
task->deactivate();
/// Explicitly set the event, because the restarting thread will not set it again
if (part_of_full_shutdown)
storage.startup_event.set();
LOG_TRACE(log, "Restarting thread finished");
/// Stop other tasks.
partialShutdown(part_of_full_shutdown);
setReadonly(part_of_full_shutdown);
}
void ReplicatedMergeTreeRestartingThread::setReadonly(bool on_shutdown)

View File

@ -5,6 +5,7 @@
#include <base/types.h>
#include <thread>
#include <atomic>
#include <Common/logger_useful.h>
namespace DB
@ -25,6 +26,7 @@ public:
void start(bool schedule = true)
{
LOG_TRACE(log, "Starting restating thread, schedule: {}", schedule);
if (schedule)
task->activateAndSchedule();
else
@ -36,6 +38,7 @@ public:
void shutdown(bool part_of_full_shutdown);
void run();
private:
StorageReplicatedMergeTree & storage;
String log_name;

View File

@ -1,52 +0,0 @@
#include <Storages/ReadFromStorageProgress.h>
#include <Processors/ISource.h>
#include <QueryPipeline/StreamLocalLimits.h>
namespace DB
{
void updateRowsProgressApprox(
ISource & source,
size_t num_rows,
UInt64 chunk_bytes_size,
UInt64 total_result_size,
UInt64 & total_rows_approx_accumulated,
size_t & total_rows_count_times,
UInt64 & total_rows_approx_max)
{
if (!total_result_size)
return;
if (!num_rows)
return;
const auto progress = source.getReadProgress();
if (progress && !progress->limits.empty())
{
for (const auto & limit : progress->limits)
{
if (limit.leaf_limits.max_rows || limit.leaf_limits.max_bytes
|| limit.local_limits.size_limits.max_rows || limit.local_limits.size_limits.max_bytes)
return;
}
}
const auto bytes_per_row = std::ceil(static_cast<double>(chunk_bytes_size) / num_rows);
size_t total_rows_approx = static_cast<size_t>(std::ceil(static_cast<double>(total_result_size) / bytes_per_row));
total_rows_approx_accumulated += total_rows_approx;
++total_rows_count_times;
total_rows_approx = total_rows_approx_accumulated / total_rows_count_times;
/// We need to add diff, because total_rows_approx is incremental value.
/// It would be more correct to send total_rows_approx as is (not a diff),
/// but incrementation of total_rows_to_read does not allow that.
/// A new counter can be introduced for that to be sent to client, but it does not worth it.
if (total_rows_approx > total_rows_approx_max)
{
size_t diff = total_rows_approx - total_rows_approx_max;
source.addTotalRowsApprox(diff);
total_rows_approx_max = total_rows_approx;
}
}
}

View File

@ -1,18 +0,0 @@
#pragma once
#include <Core/Types.h>
namespace DB
{
class ISource;
void updateRowsProgressApprox(
ISource & source,
size_t num_rows,
UInt64 chunk_bytes_size,
UInt64 total_result_size,
UInt64 & total_rows_approx_accumulated,
size_t & total_rows_count_times,
UInt64 & total_rows_approx_max);
}

View File

@ -31,7 +31,6 @@
#include <Storages/getVirtualsForStorage.h>
#include <Storages/StorageURL.h>
#include <Storages/NamedCollectionsHelpers.h>
#include <Storages/ReadFromStorageProgress.h>
#include <Common/parseGlobs.h>
#include <Disks/ObjectStorages/ObjectStorageIterator.h>
#include <Disks/IO/AsynchronousBoundedReadBuffer.h>
@ -631,13 +630,13 @@ Pipe StorageAzureBlob::read(
/// Iterate through disclosed globs and make a source for each file
iterator_wrapper = std::make_shared<StorageAzureBlobSource::GlobIterator>(
object_storage.get(), configuration.container, configuration.blob_path,
query_info.query, virtual_block, local_context, nullptr);
query_info.query, virtual_block, local_context, nullptr, local_context->getFileProgressCallback());
}
else
{
iterator_wrapper = std::make_shared<StorageAzureBlobSource::KeysIterator>(
object_storage.get(), configuration.container, configuration.blobs_paths,
query_info.query, virtual_block, local_context, nullptr);
query_info.query, virtual_block, local_context, nullptr, local_context->getFileProgressCallback());
}
ColumnsDescription columns_description;
@ -807,7 +806,8 @@ StorageAzureBlobSource::GlobIterator::GlobIterator(
ASTPtr query_,
const Block & virtual_header_,
ContextPtr context_,
RelativePathsWithMetadata * outer_blobs_)
RelativePathsWithMetadata * outer_blobs_,
std::function<void(FileProgress)> file_progress_callback_)
: IIterator(context_)
, object_storage(object_storage_)
, container(container_)
@ -815,6 +815,7 @@ StorageAzureBlobSource::GlobIterator::GlobIterator(
, query(query_)
, virtual_header(virtual_header_)
, outer_blobs(outer_blobs_)
, file_progress_callback(file_progress_callback_)
{
const String key_prefix = blob_path_with_globs.substr(0, blob_path_with_globs.find_first_of("*?{"));
@ -893,7 +894,8 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next()
blobs_with_metadata.clear();
for (UInt64 idx : idxs.getData())
{
total_size.fetch_add(new_batch[idx].metadata.size_bytes, std::memory_order_relaxed);
if (file_progress_callback)
file_progress_callback(FileProgress(0, new_batch[idx].metadata.size_bytes));
blobs_with_metadata.emplace_back(std::move(new_batch[idx]));
if (outer_blobs)
outer_blobs->emplace_back(blobs_with_metadata.back());
@ -905,8 +907,11 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next()
outer_blobs->insert(outer_blobs->end(), new_batch.begin(), new_batch.end());
blobs_with_metadata = std::move(new_batch);
for (const auto & [_, info] : blobs_with_metadata)
total_size.fetch_add(info.size_bytes, std::memory_order_relaxed);
if (file_progress_callback)
{
for (const auto & [_, info] : blobs_with_metadata)
file_progress_callback(FileProgress(0, info.size_bytes));
}
}
}
@ -916,11 +921,6 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next()
return blobs_with_metadata[current_index];
}
size_t StorageAzureBlobSource::GlobIterator::getTotalSize() const
{
return total_size.load(std::memory_order_relaxed);
}
void StorageAzureBlobSource::GlobIterator::createFilterAST(const String & any_key)
{
@ -940,17 +940,17 @@ void StorageAzureBlobSource::GlobIterator::createFilterAST(const String & any_ke
StorageAzureBlobSource::KeysIterator::KeysIterator(
AzureObjectStorage * object_storage_,
const std::string & container_,
Strings keys_,
const Strings & keys_,
ASTPtr query_,
const Block & virtual_header_,
ContextPtr context_,
RelativePathsWithMetadata * outer_blobs_)
RelativePathsWithMetadata * outer_blobs,
std::function<void(FileProgress)> file_progress_callback)
: IIterator(context_)
, object_storage(object_storage_)
, container(container_)
, query(query_)
, virtual_header(virtual_header_)
, outer_blobs(outer_blobs_)
{
Strings all_keys = keys_;
@ -986,7 +986,8 @@ StorageAzureBlobSource::KeysIterator::KeysIterator(
for (auto && key : all_keys)
{
ObjectMetadata object_metadata = object_storage->getObjectMetadata(key);
total_size += object_metadata.size_bytes;
if (file_progress_callback)
file_progress_callback(FileProgress(0, object_metadata.size_bytes));
keys.emplace_back(RelativePathWithMetadata{key, object_metadata});
}
@ -1003,12 +1004,6 @@ RelativePathWithMetadata StorageAzureBlobSource::KeysIterator::next()
return keys[current_index];
}
size_t StorageAzureBlobSource::KeysIterator::getTotalSize() const
{
return total_size.load(std::memory_order_relaxed);
}
Chunk StorageAzureBlobSource::generate()
{
while (true)
@ -1024,17 +1019,10 @@ Chunk StorageAzureBlobSource::generate()
if (reader->pull(chunk))
{
UInt64 num_rows = chunk.getNumRows();
size_t chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk();
progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
const auto & file_path = reader.getPath();
if (num_rows && total_objects_size)
{
size_t chunk_size = reader.getFormat()->getApproxBytesReadForChunk();
if (!chunk_size)
chunk_size = chunk.bytes();
updateRowsProgressApprox(
*this, num_rows, chunk_size, total_objects_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max);
}
for (const auto & virtual_column : requested_virtual_columns)
{
if (virtual_column.name == "_path")
@ -1059,13 +1047,6 @@ Chunk StorageAzureBlobSource::generate()
if (!reader)
break;
size_t object_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0);
/// Adjust total_rows_approx_accumulated with new total size.
if (total_objects_size)
total_rows_approx_accumulated = static_cast<size_t>(
std::ceil(static_cast<double>(total_objects_size + object_size) / total_objects_size * total_rows_approx_accumulated));
total_objects_size += object_size;
/// Even if task is finished the thread may be not freed in pool.
/// So wait until it will be freed before scheduling a new task.
create_reader_pool.wait();
@ -1096,7 +1077,7 @@ StorageAzureBlobSource::StorageAzureBlobSource(
AzureObjectStorage * object_storage_,
const String & container_,
std::shared_ptr<IIterator> file_iterator_)
:ISource(getHeader(sample_block_, requested_virtual_columns_))
:ISource(getHeader(sample_block_, requested_virtual_columns_), false)
, WithContext(context_)
, requested_virtual_columns(requested_virtual_columns_)
, format(format_)
@ -1114,13 +1095,7 @@ StorageAzureBlobSource::StorageAzureBlobSource(
{
reader = createReader();
if (reader)
{
const auto & read_buf = reader.getReadBuffer();
if (read_buf)
total_objects_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0);
reader_future = createReaderAsync();
}
}
@ -1162,7 +1137,7 @@ StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader()
auto pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
auto current_reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
return ReaderHolder{fs::path(container) / current_key, std::move(read_buf), input_format, std::move(pipeline), std::move(current_reader)};
return ReaderHolder{fs::path(container) / current_key, std::move(read_buf), std::move(input_format), std::move(pipeline), std::move(current_reader)};
}
std::future<StorageAzureBlobSource::ReaderHolder> StorageAzureBlobSource::createReaderAsync()

View File

@ -148,7 +148,6 @@ public:
IIterator(ContextPtr context_):WithContext(context_) {}
virtual ~IIterator() = default;
virtual RelativePathWithMetadata next() = 0;
virtual size_t getTotalSize() const = 0;
RelativePathWithMetadata operator ()() { return next(); }
};
@ -163,10 +162,10 @@ public:
ASTPtr query_,
const Block & virtual_header_,
ContextPtr context_,
RelativePathsWithMetadata * outer_blobs_);
RelativePathsWithMetadata * outer_blobs_,
std::function<void(FileProgress)> file_progress_callback_ = {});
RelativePathWithMetadata next() override;
size_t getTotalSize() const override;
~GlobIterator() override = default;
private:
@ -178,7 +177,6 @@ public:
Block virtual_header;
size_t index = 0;
std::atomic<size_t> total_size = 0;
RelativePathsWithMetadata blobs_with_metadata;
RelativePathsWithMetadata * outer_blobs;
@ -191,6 +189,8 @@ public:
bool is_finished = false;
bool is_initialized = false;
std::mutex next_mutex;
std::function<void(FileProgress)> file_progress_callback;
};
class KeysIterator : public IIterator
@ -199,14 +199,14 @@ public:
KeysIterator(
AzureObjectStorage * object_storage_,
const std::string & container_,
Strings keys_,
const Strings & keys_,
ASTPtr query_,
const Block & virtual_header_,
ContextPtr context_,
RelativePathsWithMetadata * outer_blobs_);
RelativePathsWithMetadata * outer_blobs,
std::function<void(FileProgress)> file_progress_callback = {});
RelativePathWithMetadata next() override;
size_t getTotalSize() const override;
~KeysIterator() override = default;
private:
@ -219,9 +219,6 @@ public:
Block virtual_header;
std::atomic<size_t> index = 0;
std::atomic<size_t> total_size = 0;
RelativePathsWithMetadata * outer_blobs;
};
StorageAzureBlobSource(
@ -270,7 +267,7 @@ private:
std::unique_ptr<PullingPipelineExecutor> reader_)
: path(std::move(path_))
, read_buf(std::move(read_buf_))
, input_format(input_format_)
, input_format(std::move(input_format_))
, pipeline(std::move(pipeline_))
, reader(std::move(reader_))
{
@ -301,10 +298,7 @@ private:
PullingPipelineExecutor * operator->() { return reader.get(); }
const PullingPipelineExecutor * operator->() const { return reader.get(); }
const String & getPath() const { return path; }
const std::unique_ptr<ReadBuffer> & getReadBuffer() const { return read_buf; }
const std::shared_ptr<IInputFormat> & getFormat() const { return input_format; }
const IInputFormat * getInputFormat() const { return input_format.get(); }
private:
String path;
@ -322,11 +316,6 @@ private:
ThreadPoolCallbackRunner<ReaderHolder> create_reader_scheduler;
std::future<ReaderHolder> reader_future;
UInt64 total_rows_approx_max = 0;
size_t total_rows_count_times = 0;
UInt64 total_rows_approx_accumulated = 0;
size_t total_objects_size = 0;
/// Recreate ReadBuffer and Pipeline for each file.
ReaderHolder createReader();
std::future<ReaderHolder> createReaderAsync();

View File

@ -682,7 +682,7 @@ void StorageBuffer::startup()
}
void StorageBuffer::flush()
void StorageBuffer::flushAndPrepareForShutdown()
{
if (!flush_handle)
return;

View File

@ -92,7 +92,7 @@ public:
void startup() override;
/// Flush all buffers into the subordinate table and stop background thread.
void flush() override;
void flushAndPrepareForShutdown() override;
bool optimize(
const ASTPtr & query,
const StorageMetadataPtr & metadata_snapshot,

View File

@ -1432,7 +1432,7 @@ ActionLock StorageDistributed::getActionLock(StorageActionBlockType type)
return {};
}
void StorageDistributed::flush()
void StorageDistributed::flushAndPrepareForShutdown()
{
try
{

View File

@ -135,7 +135,7 @@ public:
void initializeFromDisk();
void shutdown() override;
void flush() override;
void flushAndPrepareForShutdown() override;
void drop() override;
bool storesDataOnDisk() const override { return data_volume != nullptr; }

View File

@ -5,7 +5,6 @@
#include <Storages/PartitionedSink.h>
#include <Storages/Distributed/DistributedAsyncInsertSource.h>
#include <Storages/checkAndGetLiteralArgument.h>
#include <Storages/ReadFromStorageProgress.h>
#include <Interpreters/Context.h>
#include <Interpreters/evaluateConstantExpression.h>
@ -389,14 +388,6 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
std::unique_ptr<ReadBuffer> nested_buffer = selectReadBuffer(current_path, use_table_fd, table_fd, file_stat, context);
/// For clickhouse-local and clickhouse-client add progress callback to display progress bar.
if (context->getApplicationType() == Context::ApplicationType::LOCAL
|| context->getApplicationType() == Context::ApplicationType::CLIENT)
{
auto & in = static_cast<ReadBufferFromFileBase &>(*nested_buffer);
in.setProgressCallback(context);
}
int zstd_window_log_max = static_cast<int>(context->getSettingsRef().zstd_window_log_max);
return wrapReadBufferWithCompressionMethod(std::move(nested_buffer), method, zstd_window_log_max);
}
@ -701,7 +692,7 @@ public:
ColumnsDescription columns_description_,
const Block & block_for_format_,
std::unique_ptr<ReadBuffer> read_buf_)
: ISource(getBlockForSource(block_for_format_, files_info_))
: ISource(getBlockForSource(block_for_format_, files_info_), false)
, storage(std::move(storage_))
, storage_snapshot(storage_snapshot_)
, files_info(std::move(files_info_))
@ -816,12 +807,6 @@ public:
read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, context);
}
size_t file_size = tryGetFileSizeFromReadBuffer(*read_buf).value_or(0);
/// Adjust total_rows_approx_accumulated with new total size.
if (total_files_size)
total_rows_approx_accumulated = static_cast<size_t>(std::ceil(static_cast<double>(total_files_size + file_size) / total_files_size * total_rows_approx_accumulated));
total_files_size += file_size;
const Settings & settings = context->getSettingsRef();
chassert(!storage->paths.empty());
const auto max_parsing_threads = std::max<size_t>(settings.max_threads/ storage->paths.size(), 1UL);
@ -847,6 +832,10 @@ public:
if (reader->pull(chunk))
{
UInt64 num_rows = chunk.getNumRows();
size_t chunk_size = 0;
if (storage->format_name != "Distributed")
chunk_size = input_format->getApproxBytesReadForChunk();
progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
/// Enrich with virtual columns.
if (files_info->need_path_column)
@ -864,14 +853,6 @@ public:
chunk.addColumn(column->convertToFullColumnIfConst());
}
if (num_rows && total_files_size)
{
size_t chunk_size = input_format->getApproxBytesReadForChunk();
if (!chunk_size)
chunk_size = chunk.bytes();
updateRowsProgressApprox(
*this, num_rows, chunk_size, total_files_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max);
}
return chunk;
}
@ -910,12 +891,6 @@ private:
bool finished_generate = false;
std::shared_lock<std::shared_timed_mutex> shared_lock;
UInt64 total_rows_approx_accumulated = 0;
size_t total_rows_count_times = 0;
UInt64 total_rows_approx_max = 0;
size_t total_files_size = 0;
};

View File

@ -139,7 +139,7 @@ public:
void startup() override { getNested()->startup(); }
void shutdown() override { getNested()->shutdown(); }
void flush() override { getNested()->flush(); }
void flushAndPrepareForShutdown() override { getNested()->flushAndPrepareForShutdown(); }
ActionLock getActionLock(StorageActionBlockType action_type) override { return getNested()->getActionLock(action_type); }

View File

@ -2,6 +2,7 @@
#include <cstddef>
#include <ranges>
#include <chrono>
#include <base/hex.h>
#include <base/interpolate.h>
@ -185,6 +186,7 @@ namespace ErrorCodes
extern const int CHECKSUM_DOESNT_MATCH;
extern const int NOT_INITIALIZED;
extern const int TOO_LARGE_DISTRIBUTED_DEPTH;
extern const int TABLE_IS_DROPPED;
}
namespace ActionLocks
@ -3921,7 +3923,10 @@ void StorageReplicatedMergeTree::startBeingLeader()
void StorageReplicatedMergeTree::stopBeingLeader()
{
if (!is_leader)
{
LOG_TRACE(log, "stopBeingLeader called but we are not a leader already");
return;
}
LOG_INFO(log, "Stopped being leader");
is_leader = false;
@ -3978,6 +3983,153 @@ String StorageReplicatedMergeTree::findReplicaHavingPart(const String & part_nam
return {};
}
void StorageReplicatedMergeTree::addLastSentPart(const MergeTreePartInfo & info)
{
{
std::lock_guard lock(last_sent_parts_mutex);
last_sent_parts.emplace_back(info);
static constexpr size_t LAST_SENT_PARS_WINDOW_SIZE = 1000;
while (last_sent_parts.size() > LAST_SENT_PARS_WINDOW_SIZE)
last_sent_parts.pop_front();
}
last_sent_parts_cv.notify_all();
}
void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(StorageReplicatedMergeTree::ShutdownDeadline shutdown_deadline_)
{
/// Will be true in case in case of query
if (CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr)
{
LOG_TRACE(log, "Will not wait for unique parts to be fetched by other replicas because shutdown called from DROP/DETACH query");
return;
}
if (!shutdown_called.load())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Called waitForUniquePartsToBeFetchedByOtherReplicas before shutdown, it's a bug");
auto settings_ptr = getSettings();
auto wait_ms = settings_ptr->wait_for_unique_parts_send_before_shutdown_ms.totalMilliseconds();
if (wait_ms == 0)
{
LOG_INFO(log, "Will not wait for unique parts to be fetched by other replicas because wait time is zero");
return;
}
if (shutdown_deadline_ <= std::chrono::system_clock::now())
{
LOG_INFO(log, "Will not wait for unique parts to be fetched by other replicas because shutdown_deadline already passed");
return;
}
auto zookeeper = getZooKeeperIfTableShutDown();
auto unique_parts_set = findReplicaUniqueParts(replica_name, zookeeper_path, format_version, zookeeper, log);
if (unique_parts_set.empty())
{
LOG_INFO(log, "Will not wait for unique parts to be fetched because we don't have any unique parts");
return;
}
else
{
LOG_INFO(log, "Will wait for {} unique parts to be fetched", unique_parts_set.size());
}
auto wait_predicate = [&] () -> bool
{
for (auto it = unique_parts_set.begin(); it != unique_parts_set.end();)
{
const auto & part = *it;
bool found = false;
for (const auto & sent_part : last_sent_parts | std::views::reverse)
{
if (sent_part.contains(part))
{
LOG_TRACE(log, "Part {} was fetched by some replica", part.getPartNameForLogs());
found = true;
it = unique_parts_set.erase(it);
break;
}
}
if (!found)
break;
}
return unique_parts_set.empty();
};
std::unique_lock lock(last_sent_parts_mutex);
if (!last_sent_parts_cv.wait_until(lock, shutdown_deadline_, wait_predicate))
LOG_INFO(log, "Failed to wait for unique parts to be fetched in {} ms, {} parts can be left on this replica", wait_ms, unique_parts_set.size());
else
LOG_INFO(log, "Successfully waited all the parts");
}
std::set<MergeTreePartInfo> StorageReplicatedMergeTree::findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_)
{
if (!zookeeper_->exists(fs::path(zookeeper_path_) / "replicas" / replica_name_ / "is_active"))
{
LOG_INFO(log_, "Our replica is not active, nobody will try to fetch anything");
return {};
}
Strings replicas = zookeeper_->getChildren(fs::path(zookeeper_path_) / "replicas");
Strings our_parts;
std::vector<ActiveDataPartSet> data_parts_on_replicas;
for (const String & replica : replicas)
{
if (!zookeeper_->exists(fs::path(zookeeper_path_) / "replicas" / replica / "is_active"))
{
LOG_TRACE(log_, "Replica {} is not active, skipping", replica);
continue;
}
Strings parts = zookeeper_->getChildren(fs::path(zookeeper_path_) / "replicas" / replica / "parts");
if (replica == replica_name_)
{
LOG_TRACE(log_, "Our replica parts collected {}", replica);
our_parts = parts;
}
else
{
LOG_TRACE(log_, "Fetching parts for replica {}: [{}]", replica, fmt::join(parts, ", "));
data_parts_on_replicas.emplace_back(format_version_, parts);
}
}
if (data_parts_on_replicas.empty())
{
LOG_TRACE(log_, "Has no active replicas, will no try to wait for fetch");
return {};
}
std::set<MergeTreePartInfo> our_unique_parts;
for (const auto & part : our_parts)
{
bool found = false;
for (const auto & active_parts_set : data_parts_on_replicas)
{
if (!active_parts_set.getContainingPart(part).empty())
{
found = true;
break;
}
}
if (!found)
{
LOG_TRACE(log_, "Part not {} found on other replicas", part);
our_unique_parts.emplace(MergeTreePartInfo::fromPartName(part, format_version_));
}
}
if (!our_parts.empty() && our_unique_parts.empty())
LOG_TRACE(log_, "All parts found on replicas");
return our_unique_parts;
}
String StorageReplicatedMergeTree::findReplicaHavingCoveringPart(LogEntry & entry, bool active)
{
auto zookeeper = getZooKeeper();
@ -4637,6 +4789,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::fetchExistsPart(
void StorageReplicatedMergeTree::startup()
{
LOG_TRACE(log, "Starting up table");
startOutdatedDataPartsLoadingTask();
if (attach_thread)
{
@ -4658,6 +4811,8 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread)
since_metadata_err_incr_readonly_metric = true;
CurrentMetrics::add(CurrentMetrics::ReadonlyReplica);
}
LOG_TRACE(log, "No connection to ZooKeeper or no metadata in ZooKeeper, will not startup");
return;
}
@ -4692,6 +4847,7 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread)
if (from_attach_thread)
{
LOG_TRACE(log, "Trying to startup table from right now");
/// Try activating replica in current thread.
restarting_thread.run();
}
@ -4701,9 +4857,18 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread)
/// NOTE It does not mean that replication is actually started after receiving this event.
/// It only means that an attempt to startup replication was made.
/// Table may be still in readonly mode if this attempt failed for any reason.
startup_event.wait();
while (!startup_event.tryWait(10 * 1000))
LOG_TRACE(log, "Waiting for RestartingThread to startup table");
}
auto lock = std::unique_lock<std::mutex>(flush_and_shutdown_mutex, std::defer_lock);
do
{
if (shutdown_prepared_called.load() || shutdown_called.load())
throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Cannot startup table because it is dropped");
}
while (!lock.try_lock());
/// And this is just a callback
session_expired_callback_handler = EventNotifier::instance().subscribe(Coordination::Error::ZSESSIONEXPIRED, [this]()
{
@ -4744,6 +4909,37 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread)
}
void StorageReplicatedMergeTree::flushAndPrepareForShutdown()
{
std::lock_guard lock{flush_and_shutdown_mutex};
if (shutdown_prepared_called.exchange(true))
return;
try
{
auto settings_ptr = getSettings();
/// Cancel fetches, merges and mutations to force the queue_task to finish ASAP.
fetcher.blocker.cancelForever();
merger_mutator.merges_blocker.cancelForever();
parts_mover.moves_blocker.cancelForever();
stopBeingLeader();
if (attach_thread)
attach_thread->shutdown();
restarting_thread.shutdown(/* part_of_full_shutdown */true);
/// Explicitly set the event, because the restarting thread will not set it again
startup_event.set();
shutdown_deadline.emplace(std::chrono::system_clock::now() + std::chrono::milliseconds(settings_ptr->wait_for_unique_parts_send_before_shutdown_ms.totalMilliseconds()));
}
catch (...)
{
/// Don't wait anything in case of improper prepare for shutdown
shutdown_deadline.emplace(std::chrono::system_clock::now());
throw;
}
}
void StorageReplicatedMergeTree::partialShutdown()
{
ProfileEvents::increment(ProfileEvents::ReplicaPartialShutdown);
@ -4779,21 +4975,28 @@ void StorageReplicatedMergeTree::shutdown()
if (shutdown_called.exchange(true))
return;
flushAndPrepareForShutdown();
if (!shutdown_deadline.has_value())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Shutdown deadline is not set in shutdown");
try
{
waitForUniquePartsToBeFetchedByOtherReplicas(*shutdown_deadline);
}
catch (const Exception & ex)
{
if (ex.code() == ErrorCodes::LOGICAL_ERROR)
throw;
tryLogCurrentException(log, __PRETTY_FUNCTION__);
}
session_expired_callback_handler.reset();
stopOutdatedDataPartsLoadingTask();
/// Cancel fetches, merges and mutations to force the queue_task to finish ASAP.
fetcher.blocker.cancelForever();
merger_mutator.merges_blocker.cancelForever();
parts_mover.moves_blocker.cancelForever();
mutations_finalizing_task->deactivate();
stopBeingLeader();
partialShutdown();
if (attach_thread)
attach_thread->shutdown();
restarting_thread.shutdown(/* part_of_full_shutdown */true);
background_operations_assignee.finish();
part_moves_between_shards_orchestrator.shutdown();
{
@ -6167,7 +6370,7 @@ bool StorageReplicatedMergeTree::tryWaitForReplicaToProcessLogEntry(
const auto & stop_waiting = [&]()
{
bool stop_waiting_itself = waiting_itself && partial_shutdown_called;
bool stop_waiting_itself = waiting_itself && (partial_shutdown_called || shutdown_prepared_called || shutdown_called);
bool timeout_exceeded = check_timeout && wait_for_inactive_timeout < time_waiting.elapsedSeconds();
bool stop_waiting_inactive = (!wait_for_inactive || timeout_exceeded)
&& !getZooKeeper()->exists(fs::path(table_zookeeper_path) / "replicas" / replica / "is_active");

View File

@ -112,8 +112,35 @@ public:
bool need_check_structure);
void startup() override;
void shutdown() override;
/// To many shutdown methods....
///
/// Partial shutdown called if we loose connection to zookeeper.
/// Table can also recover after partial shutdown and continue
/// to work. This method can be called regularly.
void partialShutdown();
/// These two methods are called during final table shutdown (DROP/DETACH/overall server shutdown).
/// The shutdown process is split into two methods to make it more soft and fast. In database shutdown()
/// looks like:
/// for (table : tables)
/// table->flushAndPrepareForShutdown()
///
/// for (table : tables)
/// table->shutdown()
///
/// So we stop producing all the parts first for all tables (fast operation). And after we can wait in shutdown()
/// for other replicas to download parts.
///
/// In flushAndPrepareForShutdown we cancel all part-producing operations:
/// merges, fetches, moves and so on. If it wasn't called before shutdown() -- shutdown() will
/// call it (defensive programming).
void flushAndPrepareForShutdown() override;
/// In shutdown we completely terminate table -- remove
/// is_active node and interserver handler. Also optionally
/// wait until other replicas will download some parts from our replica.
void shutdown() override;
~StorageReplicatedMergeTree() override;
static String getDefaultZooKeeperPath(const Poco::Util::AbstractConfiguration & config);
@ -340,6 +367,13 @@ public:
/// Get a sequential consistent view of current parts.
ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock getMaxAddedBlocks() const;
void addLastSentPart(const MergeTreePartInfo & info);
/// Wait required amount of milliseconds to give other replicas a chance to
/// download unique parts from our replica
using ShutdownDeadline = std::chrono::time_point<std::chrono::system_clock>;
void waitForUniquePartsToBeFetchedByOtherReplicas(ShutdownDeadline shutdown_deadline);
private:
std::atomic_bool are_restoring_replica {false};
@ -444,9 +478,19 @@ private:
Poco::Event partial_shutdown_event {false}; /// Poco::Event::EVENT_MANUALRESET
std::atomic<bool> shutdown_called {false};
std::atomic<bool> flush_called {false};
std::atomic<bool> shutdown_prepared_called {false};
std::optional<ShutdownDeadline> shutdown_deadline;
/// We call flushAndPrepareForShutdown before acquiring DDLGuard, so we can shutdown a table that is being created right now
mutable std::mutex flush_and_shutdown_mutex;
mutable std::mutex last_sent_parts_mutex;
std::condition_variable last_sent_parts_cv;
std::deque<MergeTreePartInfo> last_sent_parts;
/// Threads.
///
/// A task that keeps track of the updates in the logs of all replicas and loads them into the queue.
bool queue_update_in_progress = false;
@ -729,6 +773,7 @@ private:
*/
String findReplicaHavingCoveringPart(LogEntry & entry, bool active);
String findReplicaHavingCoveringPart(const String & part_name, bool active, String & found_part_name);
static std::set<MergeTreePartInfo> findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_);
/** Download the specified part from the specified replica.
* If `to_detached`, the part is placed in the `detached` directory.

View File

@ -29,7 +29,6 @@
#include <Storages/checkAndGetLiteralArgument.h>
#include <Storages/StorageURL.h>
#include <Storages/NamedCollectionsHelpers.h>
#include <Storages/ReadFromStorageProgress.h>
#include <Disks/IO/AsynchronousBoundedReadBuffer.h>
#include <Disks/IO/ReadBufferFromRemoteFSGather.h>
@ -148,7 +147,8 @@ public:
const Block & virtual_header_,
ContextPtr context_,
KeysWithInfo * read_keys_,
const S3Settings::RequestSettings & request_settings_)
const S3Settings::RequestSettings & request_settings_,
std::function<void(FileProgress)> file_progress_callback_)
: WithContext(context_)
, client(client_.clone())
, globbed_uri(globbed_uri_)
@ -158,6 +158,7 @@ public:
, request_settings(request_settings_)
, list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, 1)
, list_objects_scheduler(threadPoolCallbackRunner<ListObjectsOutcome>(list_objects_pool, "ListObjects"))
, file_progress_callback(file_progress_callback_)
{
if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos)
throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name");
@ -194,11 +195,6 @@ public:
return nextAssumeLocked();
}
size_t getTotalSize() const
{
return total_size.load(std::memory_order_relaxed);
}
~Impl()
{
list_objects_pool.wait();
@ -312,15 +308,19 @@ private:
buffer.reserve(block.rows());
for (UInt64 idx : idxs.getData())
{
total_size.fetch_add(temp_buffer[idx].info->size, std::memory_order_relaxed);
if (file_progress_callback)
file_progress_callback(FileProgress(0, temp_buffer[idx].info->size));
buffer.emplace_back(std::move(temp_buffer[idx]));
}
}
else
{
buffer = std::move(temp_buffer);
for (const auto & [_, info] : buffer)
total_size.fetch_add(info->size, std::memory_order_relaxed);
if (file_progress_callback)
{
for (const auto & [_, info] : buffer)
file_progress_callback(FileProgress(0, info->size));
}
}
/// Set iterator only after the whole batch is processed
@ -381,7 +381,7 @@ private:
ThreadPool list_objects_pool;
ThreadPoolCallbackRunner<ListObjectsOutcome> list_objects_scheduler;
std::future<ListObjectsOutcome> outcome_future;
std::atomic<size_t> total_size = 0;
std::function<void(FileProgress)> file_progress_callback;
};
StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
@ -391,8 +391,9 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
const Block & virtual_header,
ContextPtr context,
KeysWithInfo * read_keys_,
const S3Settings::RequestSettings & request_settings_)
: pimpl(std::make_shared<StorageS3Source::DisclosedGlobIterator::Impl>(client_, globbed_uri_, query, virtual_header, context, read_keys_, request_settings_))
const S3Settings::RequestSettings & request_settings_,
std::function<void(FileProgress)> file_progress_callback_)
: pimpl(std::make_shared<StorageS3Source::DisclosedGlobIterator::Impl>(client_, globbed_uri_, query, virtual_header, context, read_keys_, request_settings_, file_progress_callback_))
{
}
@ -401,11 +402,6 @@ StorageS3Source::KeyWithInfo StorageS3Source::DisclosedGlobIterator::next()
return pimpl->next();
}
size_t StorageS3Source::DisclosedGlobIterator::getTotalSize() const
{
return pimpl->getTotalSize();
}
class StorageS3Source::KeysIterator::Impl : WithContext
{
public:
@ -418,23 +414,26 @@ public:
ASTPtr query_,
const Block & virtual_header_,
ContextPtr context_,
bool need_total_size,
KeysWithInfo * read_keys_)
KeysWithInfo * read_keys_,
std::function<void(FileProgress)> file_progress_callback_)
: WithContext(context_)
, keys(keys_)
, client(client_.clone())
, version_id(version_id_)
, bucket(bucket_)
, request_settings(request_settings_)
, query(query_)
, virtual_header(virtual_header_)
, file_progress_callback(file_progress_callback_)
{
Strings all_keys = keys_;
/// Create a virtual block with one row to construct filter
if (query && virtual_header && !all_keys.empty())
if (query && virtual_header && !keys.empty())
{
/// Append "idx" column as the filter result
virtual_header.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
auto block = virtual_header.cloneEmpty();
addPathToVirtualColumns(block, fs::path(bucket) / all_keys.front(), 0);
addPathToVirtualColumns(block, fs::path(bucket) / keys.front(), 0);
ASTPtr filter_ast;
VirtualColumnUtils::prepareFilterBlockWithQuery(query, getContext(), block, filter_ast);
@ -442,8 +441,8 @@ public:
if (filter_ast)
{
block = virtual_header.cloneEmpty();
for (size_t i = 0; i < all_keys.size(); ++i)
addPathToVirtualColumns(block, fs::path(bucket) / all_keys[i], i);
for (size_t i = 0; i < keys.size(); ++i)
addPathToVirtualColumns(block, fs::path(bucket) / keys[i], i);
VirtualColumnUtils::filterBlockWithQuery(query, block, getContext(), filter_ast);
const auto & idxs = typeid_cast<const ColumnUInt64 &>(*block.getByName("_idx").column);
@ -451,29 +450,17 @@ public:
Strings filtered_keys;
filtered_keys.reserve(block.rows());
for (UInt64 idx : idxs.getData())
filtered_keys.emplace_back(std::move(all_keys[idx]));
filtered_keys.emplace_back(std::move(keys[idx]));
all_keys = std::move(filtered_keys);
keys = std::move(filtered_keys);
}
}
for (auto && key : all_keys)
{
std::optional<S3::ObjectInfo> info;
/// In case all_keys.size() > 1, avoid getting object info now
/// (it will be done anyway eventually, but with delay and in parallel).
/// But progress bar will not work in this case.
if (need_total_size && all_keys.size() == 1)
{
info = S3::getObjectInfo(client_, bucket, key, version_id_, request_settings_);
total_size += info->size;
}
keys.emplace_back(std::move(key), std::move(info));
}
if (read_keys_)
*read_keys_ = keys;
{
for (const auto & key : keys)
read_keys_->push_back({key, {}});
}
}
KeyWithInfo next()
@ -481,24 +468,27 @@ public:
size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
if (current_index >= keys.size())
return {};
auto key = keys[current_index];
std::optional<S3::ObjectInfo> info;
if (file_progress_callback)
{
info = S3::getObjectInfo(*client, bucket, key, version_id, request_settings);
file_progress_callback(FileProgress(0, info->size));
}
return keys[current_index];
}
size_t getTotalSize() const
{
return total_size;
return {key, info};
}
private:
KeysWithInfo keys;
Strings keys;
std::atomic_size_t index = 0;
std::unique_ptr<S3::Client> client;
String version_id;
String bucket;
S3Settings::RequestSettings request_settings;
ASTPtr query;
Block virtual_header;
size_t total_size = 0;
std::function<void(FileProgress)> file_progress_callback;
};
StorageS3Source::KeysIterator::KeysIterator(
@ -510,11 +500,11 @@ StorageS3Source::KeysIterator::KeysIterator(
ASTPtr query,
const Block & virtual_header,
ContextPtr context,
bool need_total_size,
KeysWithInfo * read_keys)
KeysWithInfo * read_keys,
std::function<void(FileProgress)> file_progress_callback_)
: pimpl(std::make_shared<StorageS3Source::KeysIterator::Impl>(
client_, version_id_, keys_, bucket_, request_settings_,
query, virtual_header, context, need_total_size, read_keys))
query, virtual_header, context, read_keys, file_progress_callback_))
{
}
@ -523,11 +513,6 @@ StorageS3Source::KeyWithInfo StorageS3Source::KeysIterator::next()
return pimpl->next();
}
size_t StorageS3Source::KeysIterator::getTotalSize() const
{
return pimpl->getTotalSize();
}
Block StorageS3Source::getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns)
{
for (const auto & virtual_column : requested_virtual_columns)
@ -552,7 +537,7 @@ StorageS3Source::StorageS3Source(
const String & version_id_,
std::shared_ptr<IIterator> file_iterator_,
const size_t download_thread_num_)
: ISource(getHeader(sample_block_, requested_virtual_columns_))
: ISource(getHeader(sample_block_, requested_virtual_columns_), false)
, WithContext(context_)
, name(std::move(name_))
, bucket(bucket_)
@ -573,10 +558,7 @@ StorageS3Source::StorageS3Source(
{
reader = createReader();
if (reader)
{
total_objects_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0);
reader_future = createReaderAsync();
}
}
StorageS3Source::ReaderHolder StorageS3Source::createReader()
@ -614,7 +596,7 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader()
auto pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
auto current_reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
return ReaderHolder{fs::path(bucket) / key_with_info.key, std::move(read_buf), input_format, std::move(pipeline), std::move(current_reader)};
return ReaderHolder{fs::path(bucket) / key_with_info.key, std::move(read_buf), std::move(input_format), std::move(pipeline), std::move(current_reader)};
}
std::future<StorageS3Source::ReaderHolder> StorageS3Source::createReaderAsync()
@ -713,17 +695,11 @@ Chunk StorageS3Source::generate()
if (reader->pull(chunk))
{
UInt64 num_rows = chunk.getNumRows();
size_t chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk();
progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
const auto & file_path = reader.getPath();
if (num_rows && total_objects_size)
{
size_t chunk_size = reader.getFormat()->getApproxBytesReadForChunk();
if (!chunk_size)
chunk_size = chunk.bytes();
updateRowsProgressApprox(*this, num_rows, chunk_size, total_objects_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max);
}
for (const auto & virtual_column : requested_virtual_columns)
{
if (virtual_column.name == "_path")
@ -748,13 +724,6 @@ Chunk StorageS3Source::generate()
if (!reader)
break;
size_t object_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0);
/// Adjust total_rows_approx_accumulated with new total size.
if (total_objects_size)
total_rows_approx_accumulated = static_cast<size_t>(
std::ceil(static_cast<double>(total_objects_size + object_size) / total_objects_size * total_rows_approx_accumulated));
total_objects_size += object_size;
/// Even if task is finished the thread may be not freed in pool.
/// So wait until it will be freed before scheduling a new task.
create_reader_pool.wait();
@ -1005,8 +974,8 @@ std::shared_ptr<StorageS3Source::IIterator> StorageS3::createFileIterator(
ContextPtr local_context,
ASTPtr query,
const Block & virtual_block,
bool need_total_size,
KeysWithInfo * read_keys)
KeysWithInfo * read_keys,
std::function<void(FileProgress)> file_progress_callback)
{
if (distributed_processing)
{
@ -1017,14 +986,14 @@ std::shared_ptr<StorageS3Source::IIterator> StorageS3::createFileIterator(
/// Iterate through disclosed globs and make a source for each file
return std::make_shared<StorageS3Source::DisclosedGlobIterator>(
*configuration.client, configuration.url, query, virtual_block,
local_context, read_keys, configuration.request_settings);
local_context, read_keys, configuration.request_settings, file_progress_callback);
}
else
{
return std::make_shared<StorageS3Source::KeysIterator>(
*configuration.client, configuration.url.version_id, configuration.keys,
configuration.url.bucket, configuration.request_settings, query,
virtual_block, local_context, need_total_size, read_keys);
virtual_block, local_context, read_keys, file_progress_callback);
}
}
@ -1074,7 +1043,7 @@ Pipe StorageS3::read(
}
std::shared_ptr<StorageS3Source::IIterator> iterator_wrapper = createFileIterator(
query_configuration, distributed_processing, local_context, query_info.query, virtual_block);
query_configuration, distributed_processing, local_context, query_info.query, virtual_block, nullptr, local_context->getFileProgressCallback());
ColumnsDescription columns_description;
Block block_for_format;
@ -1476,7 +1445,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
{
KeysWithInfo read_keys;
auto file_iterator = createFileIterator(configuration, false, ctx, nullptr, {}, false, &read_keys);
auto file_iterator = createFileIterator(configuration, false, ctx, nullptr, {}, &read_keys);
std::optional<ColumnsDescription> columns_from_cache;
size_t prev_read_keys_size = read_keys.size();

View File

@ -56,7 +56,6 @@ public:
public:
virtual ~IIterator() = default;
virtual KeyWithInfo next() = 0;
virtual size_t getTotalSize() const = 0;
KeyWithInfo operator ()() { return next(); }
};
@ -71,10 +70,10 @@ public:
const Block & virtual_header,
ContextPtr context,
KeysWithInfo * read_keys_ = nullptr,
const S3Settings::RequestSettings & request_settings_ = {});
const S3Settings::RequestSettings & request_settings_ = {},
std::function<void(FileProgress)> progress_callback_ = {});
KeyWithInfo next() override;
size_t getTotalSize() const override;
private:
class Impl;
@ -94,11 +93,10 @@ public:
ASTPtr query,
const Block & virtual_header,
ContextPtr context,
bool need_total_size = true,
KeysWithInfo * read_keys = nullptr);
KeysWithInfo * read_keys = nullptr,
std::function<void(FileProgress)> progress_callback_ = {});
KeyWithInfo next() override;
size_t getTotalSize() const override;
private:
class Impl;
@ -113,8 +111,6 @@ public:
KeyWithInfo next() override { return {callback(), {}}; }
size_t getTotalSize() const override { return 0; }
private:
ReadTaskCallback callback;
};
@ -168,7 +164,7 @@ private:
std::unique_ptr<PullingPipelineExecutor> reader_)
: path(std::move(path_))
, read_buf(std::move(read_buf_))
, input_format(input_format_)
, input_format(std::move(input_format_))
, pipeline(std::move(pipeline_))
, reader(std::move(reader_))
{
@ -195,15 +191,13 @@ private:
return *this;
}
const std::unique_ptr<ReadBuffer> & getReadBuffer() const { return read_buf; }
const std::shared_ptr<IInputFormat> & getFormat() const { return input_format; }
explicit operator bool() const { return reader != nullptr; }
PullingPipelineExecutor * operator->() { return reader.get(); }
const PullingPipelineExecutor * operator->() const { return reader.get(); }
const String & getPath() const { return path; }
const IInputFormat * getInputFormat() const { return input_format.get(); }
private:
String path;
std::unique_ptr<ReadBuffer> read_buf;
@ -224,11 +218,6 @@ private:
ThreadPoolCallbackRunner<ReaderHolder> create_reader_scheduler;
std::future<ReaderHolder> reader_future;
UInt64 total_rows_approx_max = 0;
size_t total_rows_count_times = 0;
UInt64 total_rows_approx_accumulated = 0;
size_t total_objects_size = 0;
/// Recreate ReadBuffer and Pipeline for each file.
ReaderHolder createReader();
std::future<ReaderHolder> createReaderAsync();
@ -353,8 +342,8 @@ private:
ContextPtr local_context,
ASTPtr query,
const Block & virtual_block,
bool need_total_size = true,
KeysWithInfo * read_keys = nullptr);
KeysWithInfo * read_keys = nullptr,
std::function<void(FileProgress)> progress_callback = {});
static ColumnsDescription getTableStructureFromDataImpl(
const Configuration & configuration,

View File

@ -88,7 +88,7 @@ void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context)
RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const
{
auto iterator = std::make_shared<StorageS3Source::DisclosedGlobIterator>(
*s3_configuration.client, s3_configuration.url, query, virtual_block, context);
*s3_configuration.client, s3_configuration.url, query, virtual_block, context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback());
auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String { return iterator->next().key; });
return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) };
}

View File

@ -79,11 +79,11 @@ public:
nested->shutdown();
}
void flush() override
void flushAndPrepareForShutdown() override
{
std::lock_guard lock{nested_mutex};
if (nested)
nested->flush();
nested->flushAndPrepareForShutdown();
}
void drop() override

View File

@ -3,7 +3,6 @@
#include <Storages/PartitionedSink.h>
#include <Storages/checkAndGetLiteralArgument.h>
#include <Storages/NamedCollectionsHelpers.h>
#include <Storages/ReadFromStorageProgress.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/threadPoolCallbackRunner.h>
@ -235,7 +234,7 @@ StorageURLSource::StorageURLSource(
const HTTPHeaderEntries & headers_,
const URIParams & params,
bool glob_url)
: ISource(getHeader(sample_block, requested_virtual_columns_)), name(std::move(name_)), requested_virtual_columns(requested_virtual_columns_), uri_iterator(uri_iterator_)
: ISource(getHeader(sample_block, requested_virtual_columns_), false), name(std::move(name_)), requested_virtual_columns(requested_virtual_columns_), uri_iterator(uri_iterator_)
{
auto headers = getHeaders(headers_);
@ -271,22 +270,11 @@ StorageURLSource::StorageURLSource(
curr_uri = uri_and_buf.first;
read_buf = std::move(uri_and_buf.second);
size_t file_size = 0;
try
if (auto file_progress_callback = context->getFileProgressCallback())
{
file_size = getFileSizeFromReadBuffer(*read_buf);
}
catch (...)
{
// we simply continue without updating total_size
}
if (file_size)
{
/// Adjust total_rows_approx_accumulated with new total size.
if (total_size)
total_rows_approx_accumulated = static_cast<size_t>(std::ceil(static_cast<double>(total_size + file_size) / total_size * total_rows_approx_accumulated));
total_size += file_size;
size_t file_size = tryGetFileSizeFromReadBuffer(*read_buf).value_or(0);
LOG_DEBUG(&Poco::Logger::get("URL"), "Send file size {}", file_size);
file_progress_callback(FileProgress(0, file_size));
}
// TODO: Pass max_parsing_threads and max_download_threads adjusted for num_streams.
@ -332,14 +320,8 @@ Chunk StorageURLSource::generate()
if (reader->pull(chunk))
{
UInt64 num_rows = chunk.getNumRows();
if (num_rows && total_size)
{
size_t chunk_size = input_format->getApproxBytesReadForChunk();
if (!chunk_size)
chunk_size = chunk.bytes();
updateRowsProgressApprox(
*this, num_rows, chunk_size, total_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max);
}
size_t chunk_size = input_format->getApproxBytesReadForChunk();
progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
const String & path{curr_uri.getPath()};

View File

@ -212,11 +212,6 @@ private:
std::unique_ptr<PullingPipelineExecutor> reader;
Poco::Net::HTTPBasicCredentials credentials;
size_t total_size = 0;
UInt64 total_rows_approx_max = 0;
size_t total_rows_count_times = 0;
UInt64 total_rows_approx_accumulated = 0;
};
class StorageURLSink : public SinkToStorage

View File

@ -81,7 +81,6 @@
02242_join_rocksdb
02267_join_dup_columns_issue36199
02302_s3_file_pruning
02317_distinct_in_order_optimization_explain
02341_global_join_cte
02345_implicit_transaction
02352_grouby_shadows_arg

Some files were not shown because too many files have changed in this diff Show More