From 82a849ba8e1b95833d2ae7f3bcdd3e1c008bacb5 Mon Sep 17 00:00:00 2001
From: Artur <613623@mail.ru>
Date: Sat, 18 Sep 2021 09:36:02 +0000
Subject: [PATCH 001/104] add options method
---
programs/server/config.xml | 19 ++++++++++++++++++
src/Interpreters/ClientInfo.h | 5 +++--
src/Server/HTTPHandler.cpp | 37 +++++++++++++++++++++++++++++++++++
3 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/programs/server/config.xml b/programs/server/config.xml
index 6c98ac740fe..bec51de6126 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -62,6 +62,25 @@
-->
+
+
+ Access-Control-Allow-Origin
+ *
+
+
+ Access-Control-Allow-Headers
+ origin, x-requested-with
+
+
+ Access-Control-Allow-Methods
+ POST, GET, OPTIONS
+
+
+ Access-Control-Max-Age
+ 86400
+
+
+
diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h
index 71570778645..294bf3b426c 100644
--- a/src/Interpreters/ClientInfo.h
+++ b/src/Interpreters/ClientInfo.h
@@ -35,8 +35,9 @@ public:
enum class HTTPMethod : uint8_t
{
UNKNOWN = 0,
- GET = 1,
- POST = 2,
+ GET = 1,
+ POST = 2,
+ OPTIONS = 3
};
enum class QueryKind : uint8_t
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index 0492b58dc88..99502261aa9 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -32,6 +32,7 @@
#include
#include
#include
+#include "Server/HTTP/HTTPResponse.h"
#if !defined(ARCADIA_BUILD)
# include
@@ -108,6 +109,37 @@ namespace ErrorCodes
extern const int HTTP_LENGTH_REQUIRED;
}
+namespace
+{
+ /// Process options request. Usefull for CORS.
+ void processOptionsRequest(HTTPServerResponse & response, const Poco::Util::LayeredConfiguration & config)
+ {
+ /// If answer for options request was not defined, return 501 to client.
+ if (!config.has("http_options_response"))
+ {
+ response.setStatusAndReason(HTTPResponse::HTTP_NOT_IMPLEMENTED);
+ response.send();
+ }
+ else
+ {
+ /// otherwise fill response.
+ Strings config_keys;
+ config.keys("http_options_response", config_keys);
+ for (const std::string & config_key : config_keys)
+ {
+ if (config_key == "header" || config_key.starts_with("header["))
+ {
+ response.add(config.getString("http_options_response." + config_key + ".name", "Empty header"),
+ config.getString("http_options_response." + config_key + ".value", ""));
+ response.setKeepAlive(false);
+ }
+ }
+ response.setStatusAndReason(HTTPResponse::HTTP_NO_CONTENT);
+ response.send();
+ }
+ }
+}
+
static String base64Decode(const String & encoded)
{
String decoded;
@@ -850,6 +882,11 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
try
{
+ if (request.getMethod() == HTTPServerRequest::HTTP_OPTIONS)
+ {
+ processOptionsRequest(response, server.config());
+ return;
+ }
response.setContentType("text/plain; charset=UTF-8");
response.set("X-ClickHouse-Server-Display-Name", server_display_name);
/// For keep-alive to work.
From c8892ec7a71eac73a852ab1b8d200a86148b08c5 Mon Sep 17 00:00:00 2001
From: Artur <613623@mail.ru>
Date: Wed, 22 Sep 2021 10:34:48 +0000
Subject: [PATCH 002/104] add options support
---
src/Server/HTTPHandler.cpp | 19 +++++++++++++------
src/Server/HTTPHandlerFactory.cpp | 4 ++--
src/Server/HTTPHandlerFactory.h | 6 ++++--
3 files changed, 19 insertions(+), 10 deletions(-)
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index 99502261aa9..017bc82a475 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -25,6 +25,7 @@
#include
#include
#include
+#include "common/logger_useful.h"
#include
#include
#include
@@ -111,10 +112,11 @@ namespace ErrorCodes
namespace
{
- /// Process options request. Usefull for CORS.
+ /// Process options request. Useful for CORS.
void processOptionsRequest(HTTPServerResponse & response, const Poco::Util::LayeredConfiguration & config)
{
- /// If answer for options request was not defined, return 501 to client.
+ /// If response for options request was not defined, return 501 to client.
+ /// TODO should it be here?
if (!config.has("http_options_response"))
{
response.setStatusAndReason(HTTPResponse::HTTP_NOT_IMPLEMENTED);
@@ -129,12 +131,17 @@ namespace
{
if (config_key == "header" || config_key.starts_with("header["))
{
- response.add(config.getString("http_options_response." + config_key + ".name", "Empty header"),
- config.getString("http_options_response." + config_key + ".value", ""));
- response.setKeepAlive(false);
+ /// If there is empty header name, it will not be processed and message about it will be in logs
+ if (config.getString("http_options_response." + config_key + ".name", "").empty())
+ LOG_WARNING(&Poco::Logger::get("processOptionsRequest"), "Empty header was found in config. It will not be processed.");
+ else
+ response.add(config.getString("http_options_response." + config_key + ".name", ""),
+ config.getString("http_options_response." + config_key + ".value", ""));
+
}
}
- response.setStatusAndReason(HTTPResponse::HTTP_NO_CONTENT);
+ response.setKeepAlive(false);
+ response.setStatusAndReason(HTTPResponse::HTTP_OK);
response.send();
}
}
diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp
index 1e3d02b85ab..526b86a5c28 100644
--- a/src/Server/HTTPHandlerFactory.cpp
+++ b/src/Server/HTTPHandlerFactory.cpp
@@ -123,7 +123,7 @@ static inline HTTPRequestHandlerFactoryPtr createInterserverHTTPHandlerFactory(I
addCommonDefaultHandlersFactory(*factory, server);
auto main_handler = std::make_shared>(server);
- main_handler->allowPostAndGetParamsRequest();
+ main_handler->allowPostAndGetParamsAndOptionsRequest();
factory->addHandler(main_handler);
return factory;
@@ -180,7 +180,7 @@ void addDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer
addCommonDefaultHandlersFactory(factory, server);
auto query_handler = std::make_shared>(server, "query");
- query_handler->allowPostAndGetParamsRequest();
+ query_handler->allowPostAndGetParamsAndOptionsRequest();
factory.addHandler(query_handler);
/// We check that prometheus handler will be served on current (default) port.
diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h
index 6297f988eaa..5497d585d43 100644
--- a/src/Server/HTTPHandlerFactory.h
+++ b/src/Server/HTTPHandlerFactory.h
@@ -104,11 +104,13 @@ public:
}
/// Handle POST or GET with params
- void allowPostAndGetParamsRequest()
+ void allowPostAndGetParamsAndOptionsRequest()
{
addFilter([](const auto & request)
{
- return request.getURI().find('?') != std::string::npos
+ return (request.getURI().find('?') != std::string::npos
+ && request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
+ || request.getMethod() == Poco::Net::HTTPRequest::HTTP_OPTIONS
|| request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST;
});
}
From 2cffa98a60677517a372f72a1fea746a19bb0328 Mon Sep 17 00:00:00 2001
From: Artur <613623@mail.ru>
Date: Wed, 22 Sep 2021 12:22:21 +0000
Subject: [PATCH 003/104] add test and comments in config
---
programs/server/config.xml | 6 ++++--
src/Server/HTTPHandler.cpp | 14 ++++---------
tests/config/config.d/CORS.xml | 20 +++++++++++++++++++
.../02029_test_options_requests.reference | 5 +++++
.../02029_test_options_requests.sh | 8 ++++++++
5 files changed, 41 insertions(+), 12 deletions(-)
create mode 100644 tests/config/config.d/CORS.xml
create mode 100644 tests/queries/0_stateless/02029_test_options_requests.reference
create mode 100755 tests/queries/0_stateless/02029_test_options_requests.sh
diff --git a/programs/server/config.xml b/programs/server/config.xml
index bec51de6126..26c3107e972 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -62,7 +62,9 @@
-->
-
+
+
+
random
+
+
+ 1
diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
index a4fe3649e6f..5bc10841726 100644
--- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
+++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
@@ -29,9 +29,6 @@ void WriteBufferFromHTTPServerResponse::startSendHeaders()
{
headers_started_sending = true;
- if (add_cors_header)
- response.set("Access-Control-Allow-Origin", "*");
-
setResponseDefaultHeaders(response, keep_alive_timeout);
if (!is_http_method_head)
diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h
index b4ff454195f..7cebf5ca770 100644
--- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h
+++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h
@@ -36,7 +36,6 @@ private:
HTTPServerResponse & response;
bool is_http_method_head;
- bool add_cors_header = false;
unsigned keep_alive_timeout = 0;
bool compress = false;
CompressionMethod compression_method;
@@ -103,13 +102,6 @@ public:
compression_level = level;
}
- /// Turn CORS on or off.
- /// The setting has any effect only if HTTP headers haven't been sent yet.
- void addHeaderCORS(bool enable_cors)
- {
- add_cors_header = enable_cors;
- }
-
/// Don't send HTTP headers with progress more frequently.
void setSendProgressInterval(size_t send_progress_interval_ms_)
{
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index cec7e1c8b3d..c27d5343e90 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -33,7 +33,7 @@
#include
#include
#include
-#include "Server/HTTP/HTTPResponse.h"
+#include
#if !defined(ARCADIA_BUILD)
# include
@@ -112,33 +112,41 @@ namespace ErrorCodes
namespace
{
- /// Process options request. Useful for CORS.
- void processOptionsRequest(HTTPServerResponse & response, const Poco::Util::LayeredConfiguration & config)
+bool tryAddHeadersFromConfig(HTTPServerResponse & response, const Poco::Util::LayeredConfiguration & config)
+{
+ if (config.has("http_options_response"))
{
- /// If there is information for options request in cofing, fill response.
- /// For this purpose find all headers related to http_options_response and add them with their values to response
- if (config.has("http_options_response"))
+ Strings config_keys;
+ config.keys("http_options_response", config_keys);
+ for (const std::string & config_key : config_keys)
{
- Strings config_keys;
- config.keys("http_options_response", config_keys);
- for (const std::string & config_key : config_keys)
+ if (config_key == "header" || config_key.starts_with("header["))
{
- if (config_key == "header" || config_key.starts_with("header["))
- {
- /// If there is empty header name, it will not be processed and message about it will be in logs
- if (config.getString("http_options_response." + config_key + ".name", "").empty())
- LOG_WARNING(&Poco::Logger::get("processOptionsRequest"), "Empty header was found in config. It will not be processed.");
- else
- response.add(config.getString("http_options_response." + config_key + ".name", ""),
- config.getString("http_options_response." + config_key + ".value", ""));
+ /// If there is empty header name, it will not be processed and message about it will be in logs
+ if (config.getString("http_options_response." + config_key + ".name", "").empty())
+ LOG_WARNING(&Poco::Logger::get("processOptionsRequest"), "Empty header was found in config. It will not be processed.");
+ else
+ response.add(config.getString("http_options_response." + config_key + ".name", ""),
+ config.getString("http_options_response." + config_key + ".value", ""));
- }
}
- response.setKeepAlive(false);
- response.setStatusAndReason(HTTPResponse::HTTP_NO_CONTENT);
- response.send();
}
+ return true;
}
+ return false;
+}
+
+/// Process options request. Useful for CORS.
+void processOptionsRequest(HTTPServerResponse & response, const Poco::Util::LayeredConfiguration & config)
+{
+ /// If can add some headers from config
+ if (tryAddHeadersFromConfig(response, config))
+ {
+ response.setKeepAlive(false);
+ response.setStatusAndReason(HTTPResponse::HTTP_NO_CONTENT);
+ response.send();
+ }
+}
}
static String base64Decode(const String & encoded)
@@ -739,9 +747,10 @@ void HTTPHandler::processQuery(
if (in_post_compressed && settings.http_native_compression_disable_checksumming_on_decompress)
static_cast(*in_post_maybe_compressed).disableChecksumming();
- /// Add CORS header if 'add_http_cors_header' setting is turned on and the client passed
- /// Origin header.
- used_output.out->addHeaderCORS(settings.add_http_cors_header && !request.get("Origin", "").empty());
+ /// Add CORS header if 'add_http_cors_header' setting is turned on or config has http_options_response,
+ /// which means that there are some headers to be sent, and the client passed Origin header.
+ if ((settings.add_http_cors_header || config.has("http_options_response")) && !request.get("Origin", "").empty())
+ tryAddHeadersFromConfig(response, config);
auto append_callback = [context] (ProgressCallback callback)
{
From ce4193fe957367d28da59e6c94fc54aefb3038db Mon Sep 17 00:00:00 2001
From: Artur <613623@mail.ru>
Date: Wed, 22 Sep 2021 17:10:15 +0000
Subject: [PATCH 007/104] small refactoring
---
src/Server/HTTPHandler.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index c27d5343e90..046e7bdfaad 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -749,7 +749,7 @@ void HTTPHandler::processQuery(
/// Add CORS header if 'add_http_cors_header' setting is turned on or config has http_options_response,
/// which means that there are some headers to be sent, and the client passed Origin header.
- if ((settings.add_http_cors_header || config.has("http_options_response")) && !request.get("Origin", "").empty())
+ if (settings.add_http_cors_header && config.has("http_options_response") && !request.get("Origin", "").empty())
tryAddHeadersFromConfig(response, config);
auto append_callback = [context] (ProgressCallback callback)
From 7bbd08cb5d4c90357fc23b0cbfe96f36cfecff33 Mon Sep 17 00:00:00 2001
From: Filatenkov Artur <58165623+FArthur-cmd@users.noreply.github.com>
Date: Fri, 24 Sep 2021 15:40:27 +0300
Subject: [PATCH 008/104] Update HTTPHandler.cpp
---
src/Server/HTTPHandler.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index bd4452ac6cb..7357c56ad2e 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -125,7 +125,7 @@ bool tryAddHeadersFromConfig(HTTPServerResponse & response, const Poco::Util::La
/// If there is empty header name, it will not be processed and message about it will be in logs
if (config.getString("http_options_response." + config_key + ".name", "").empty())
LOG_WARNING(&Poco::Logger::get("processOptionsRequest"), "Empty header was found in config. It will not be processed.");
- else
+ else
response.add(config.getString("http_options_response." + config_key + ".name", ""),
config.getString("http_options_response." + config_key + ".value", ""));
From fcebf7b9853452caaffc39d91a31d19ae55a45ba Mon Sep 17 00:00:00 2001
From: Artur <613623@mail.ru>
Date: Wed, 29 Sep 2021 11:29:24 +0000
Subject: [PATCH 009/104] correct tests
---
src/Server/HTTPHandlerFactory.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h
index e81955ef2b2..f6d96189d92 100644
--- a/src/Server/HTTPHandlerFactory.h
+++ b/src/Server/HTTPHandlerFactory.h
@@ -108,8 +108,7 @@ public:
{
addFilter([](const auto & request)
{
- return (request.getURI().find('?') != std::string::npos
- && request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
+ return request.getURI().find('?') != std::string::npos
|| request.getMethod() == Poco::Net::HTTPRequest::HTTP_OPTIONS
|| request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST;
});
From 36b699659e466c1deaf4737f973adcfc95fe378b Mon Sep 17 00:00:00 2001
From: Filatenkov Artur <58165623+FArthur-cmd@users.noreply.github.com>
Date: Wed, 29 Sep 2021 14:32:04 +0300
Subject: [PATCH 010/104] Update CORS.xml
---
tests/config/config.d/CORS.xml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tests/config/config.d/CORS.xml b/tests/config/config.d/CORS.xml
index 9dd7d402416..873821478dc 100644
--- a/tests/config/config.d/CORS.xml
+++ b/tests/config/config.d/CORS.xml
@@ -1,4 +1,4 @@
-
+
Access-Control-Allow-Origin
@@ -17,5 +17,5 @@
86400
-
+
From cac28833d247617804627e3059589da17c09de1d Mon Sep 17 00:00:00 2001
From: Artur Filatenkov <613623@mail.ru>
Date: Wed, 29 Sep 2021 18:54:04 +0300
Subject: [PATCH 011/104] apply added config in tests
---
tests/config/install.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/config/install.sh b/tests/config/install.sh
index df62cba0ea9..936c44a4e7b 100755
--- a/tests/config/install.sh
+++ b/tests/config/install.sh
@@ -37,6 +37,7 @@ ln -sf $SRC_PATH/config.d/tcp_with_proxy.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/top_level_domains_lists.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/top_level_domains_path.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/encryption.xml $DEST_SERVER_PATH/config.d/
+ln -sf $SRC_PATH/config.d/CORS.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/zookeeper_log.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/logger.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/users.d/log_queries.xml $DEST_SERVER_PATH/users.d/
@@ -57,7 +58,6 @@ ln -sf $SRC_PATH/strings_dictionary.xml $DEST_SERVER_PATH/
ln -sf $SRC_PATH/decimals_dictionary.xml $DEST_SERVER_PATH/
ln -sf $SRC_PATH/executable_dictionary.xml $DEST_SERVER_PATH/
ln -sf $SRC_PATH/executable_pool_dictionary.xml $DEST_SERVER_PATH/
-ln -sf $SRC_PATH/test_function.xml $DEST_SERVER_PATH/
ln -sf $SRC_PATH/top_level_domains $DEST_SERVER_PATH/
From 68f8b9d235e7417537e4066fb864a71dd8149fd0 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov
Date: Wed, 29 Sep 2021 20:45:01 +0300
Subject: [PATCH 012/104] Update ColumnGathererStream
---
src/DataStreams/ColumnGathererStream.cpp | 164 ++++++++++++------
src/DataStreams/ColumnGathererStream.h | 98 ++++++-----
.../MergeTree/IMergedBlockOutputStream.h | 4 +-
src/Storages/MergeTree/MergeTask.cpp | 5 -
.../MergeTree/MergedBlockOutputStream.cpp | 5 -
.../MergeTree/MergedBlockOutputStream.h | 7 +-
.../MergedColumnOnlyOutputStream.cpp | 5 -
.../MergeTree/MergedColumnOnlyOutputStream.h | 5 +-
8 files changed, 168 insertions(+), 125 deletions(-)
diff --git a/src/DataStreams/ColumnGathererStream.cpp b/src/DataStreams/ColumnGathererStream.cpp
index 683b8012efe..90da7792c21 100644
--- a/src/DataStreams/ColumnGathererStream.cpp
+++ b/src/DataStreams/ColumnGathererStream.cpp
@@ -18,97 +18,151 @@ namespace ErrorCodes
}
ColumnGathererStream::ColumnGathererStream(
- const String & column_name_, const BlockInputStreams & source_streams, ReadBuffer & row_sources_buf_,
- size_t block_preferred_size_)
- : column_name(column_name_), sources(source_streams.size()), row_sources_buf(row_sources_buf_)
- , block_preferred_size(block_preferred_size_), log(&Poco::Logger::get("ColumnGathererStream"))
+ size_t num_inputs, ReadBuffer & row_sources_buf_, size_t block_preferred_size_)
+ : sources(num_inputs), row_sources_buf(row_sources_buf_)
+ , block_preferred_size(block_preferred_size_)
{
- if (source_streams.empty())
+ if (num_inputs == 0)
throw Exception("There are no streams to gather", ErrorCodes::EMPTY_DATA_PASSED);
+}
- children.assign(source_streams.begin(), source_streams.end());
-
- for (size_t i = 0; i < children.size(); ++i)
+void ColumnGathererStream::initialize(Inputs inputs)
+{
+ for (size_t i = 0; i < inputs.size(); ++i)
{
- const Block & header = children[i]->getHeader();
-
- /// Sometimes MergeTreeReader injects additional column with partitioning key
- if (header.columns() > 2)
- throw Exception(
- "Block should have 1 or 2 columns, but contains " + toString(header.columns()),
- ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS);
-
- if (i == 0)
+ if (inputs[i].chunk)
{
- column.name = column_name;
- column.type = header.getByName(column_name).type;
- column.column = column.type->createColumn();
+ sources[i].update(inputs[i].chunk.detachColumns().at(0));
+ if (!result_column)
+ result_column = sources[i].column->cloneEmpty();
}
- else if (header.getByName(column_name).column->getName() != column.column->getName())
- throw Exception("Column types don't match", ErrorCodes::INCOMPATIBLE_COLUMNS);
}
}
-
-Block ColumnGathererStream::readImpl()
+IMergingAlgorithm::Status ColumnGathererStream::merge()
{
+ /// Nothing to read after initialize.
+ if (!result_column)
+ return Status(Chunk(), true);
+
+ if (source_to_fully_copy) /// Was set on a previous iteration
+ {
+ Chunk res;
+ res.addColumn(source_to_fully_copy->column);
+ merged_rows += source_to_fully_copy->size;
+ source_to_fully_copy->pos = source_to_fully_copy->size;
+ source_to_fully_copy = nullptr;
+ return Status(std::move(res));
+ }
+
/// Special case: single source and there are no skipped rows
- if (children.size() == 1 && row_sources_buf.eof() && !source_to_fully_copy)
- return children[0]->read();
+ /// Note: looks like this should never happen because row_sources_buf cannot just skip row info.
+ if (sources.size() == 1 && row_sources_buf.eof())
+ {
+ if (sources.front().pos < sources.front().size)
+ {
+ next_required_source = 0;
+ Chunk res;
+ merged_rows += sources.front().column->size();
+ merged_bytes += sources.front().column->allocatedBytes();
+ res.addColumn(std::move(sources.front().column));
+ sources.front().pos = sources.front().size = 0;
+ return Status(std::move(res));
+ }
- if (!source_to_fully_copy && row_sources_buf.eof())
- return Block();
+ if (next_required_source == -1)
+ return Status(Chunk(), true);
- MutableColumnPtr output_column = column.column->cloneEmpty();
- output_block = Block{column.cloneEmpty()};
- /// Surprisingly this call may directly change output_block, bypassing
+ next_required_source = 0;
+ return Status(next_required_source);
+ }
+
+ if (next_required_source != -1 && sources[next_required_source].size == 0)
+ throw Exception("Cannot fetch required block. Source " + toString(next_required_source), ErrorCodes::RECEIVED_EMPTY_DATA);
+
+ /// Surprisingly this call may directly change some internal state of ColumnGathererStream.
/// output_column. See ColumnGathererStream::gather.
- output_column->gather(*this);
- if (!output_column->empty())
- output_block.getByPosition(0).column = std::move(output_column);
+ result_column->gather(*this);
- return output_block;
+ if (next_required_source != -1)
+ return Status(next_required_source);
+
+ if (source_to_fully_copy && result_column->empty())
+ {
+ Chunk res;
+ merged_rows += source_to_fully_copy->column->size();
+ merged_bytes += source_to_fully_copy->column->allocatedBytes();
+ res.addColumn(source_to_fully_copy->column);
+ source_to_fully_copy->pos = source_to_fully_copy->size;
+ source_to_fully_copy = nullptr;
+ return Status(std::move(res));
+ }
+
+ auto col = result_column->cloneEmpty();
+ result_column.swap(col);
+
+ Chunk res;
+ merged_rows += col->size();
+ merged_bytes += col->allocatedBytes();
+ res.addColumn(std::move(col));
+ return Status(std::move(res), row_sources_buf.eof());
}
-void ColumnGathererStream::fetchNewBlock(Source & source, size_t source_num)
+void ColumnGathererStream::consume(Input & input, size_t source_num)
{
- try
- {
- source.block = children[source_num]->read();
- source.update(column_name);
- }
- catch (Exception & e)
- {
- e.addMessage("Cannot fetch required block. Stream " + children[source_num]->getName() + ", part " + toString(source_num));
- throw;
- }
+ auto & source = sources[source_num];
+ if (input.chunk)
+ source.update(input.chunk.getColumns().at(0));
if (0 == source.size)
{
- throw Exception("Fetched block is empty. Stream " + children[source_num]->getName() + ", part " + toString(source_num),
+ throw Exception("Fetched block is empty. Source " + toString(source_num),
ErrorCodes::RECEIVED_EMPTY_DATA);
}
}
-
-void ColumnGathererStream::readSuffixImpl()
+ColumnGathererTransform::ColumnGathererTransform(
+ const Block & header,
+ size_t num_inputs,
+ ReadBuffer & row_sources_buf_,
+ size_t block_preferred_size_)
+ : IMergingTransform(
+ num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
+ num_inputs, row_sources_buf_, block_preferred_size_)
+ , log(&Poco::Logger::get("ColumnGathererStream"))
{
- const BlockStreamProfileInfo & profile_info = getProfileInfo();
+ if (header.columns() != 1)
+ throw Exception(
+ "Header should have 1 column, but contains " + toString(header.columns()),
+ ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS);
+}
+void ColumnGathererTransform::work()
+{
+ Stopwatch stopwatch;
+ IMergingTransform::work();
+ elapsed_ns += stopwatch.elapsedNanoseconds();
+}
+
+void ColumnGathererTransform::onFinish()
+{
+ auto merged_rows = algorithm.getMergedRows();
+ auto merged_bytes = algorithm.getMergedRows();
/// Don't print info for small parts (< 10M rows)
- if (profile_info.rows < 10000000)
+ if (merged_rows < 10000000)
return;
- double seconds = profile_info.total_stopwatch.elapsedSeconds();
+ double seconds = static_cast(elapsed_ns) / 1000000000ULL;
+ const auto & column_name = getOutputPort().getHeader().getByPosition(0).name;
if (!seconds)
LOG_DEBUG(log, "Gathered column {} ({} bytes/elem.) in 0 sec.",
- column_name, static_cast(profile_info.bytes) / profile_info.rows);
+ column_name, static_cast(merged_bytes) / merged_rows);
else
LOG_DEBUG(log, "Gathered column {} ({} bytes/elem.) in {} sec., {} rows/sec., {}/sec.",
- column_name, static_cast(profile_info.bytes) / profile_info.rows, seconds,
- profile_info.rows / seconds, ReadableSize(profile_info.bytes / seconds));
+ column_name, static_cast(merged_bytes) / merged_rows, seconds,
+ merged_rows / seconds, ReadableSize(merged_bytes / seconds));
}
}
diff --git a/src/DataStreams/ColumnGathererStream.h b/src/DataStreams/ColumnGathererStream.h
index 05665ab3f42..43cbf7094d8 100644
--- a/src/DataStreams/ColumnGathererStream.h
+++ b/src/DataStreams/ColumnGathererStream.h
@@ -1,8 +1,9 @@
#pragma once
-#include
#include
#include
+#include
+#include
namespace Poco { class Logger; }
@@ -53,77 +54,91 @@ using MergedRowSources = PODArray;
* Stream mask maps row number to index of source stream.
* Streams should contain exactly one column.
*/
-class ColumnGathererStream : public IBlockInputStream
+class ColumnGathererStream final : public IMergingAlgorithm
{
public:
- ColumnGathererStream(
- const String & column_name_, const BlockInputStreams & source_streams, ReadBuffer & row_sources_buf_,
- size_t block_preferred_size_ = DEFAULT_BLOCK_SIZE);
+ ColumnGathererStream(size_t num_inputs, ReadBuffer & row_sources_buf_, size_t block_preferred_size_ = DEFAULT_BLOCK_SIZE);
- String getName() const override { return "ColumnGatherer"; }
-
- Block readImpl() override;
-
- void readSuffixImpl() override;
-
- Block getHeader() const override { return children.at(0)->getHeader(); }
+ void initialize(Inputs inputs) override;
+ void consume(Input & input, size_t source_num) override;
+ Status merge() override;
/// for use in implementations of IColumn::gather()
template
void gather(Column & column_res);
+ UInt64 getMergedRows() const { return merged_rows; }
+ UInt64 getMergedBytes() const { return merged_bytes; }
+
private:
/// Cache required fields
struct Source
{
- const IColumn * column = nullptr;
+ ColumnPtr column;
size_t pos = 0;
size_t size = 0;
- Block block;
- void update(const String & name)
+ void update(ColumnPtr column_)
{
- column = block.getByName(name).column.get();
- size = block.rows();
+ column = std::move(column_);
+ size = column->size();
pos = 0;
}
};
- void fetchNewBlock(Source & source, size_t source_num);
-
- String column_name;
- ColumnWithTypeAndName column;
+ MutableColumnPtr result_column;
std::vector
-
-
+
From a99a6fccc7289fb75bb55a3b47cfda8d144478b0 Mon Sep 17 00:00:00 2001
From: WangZengrui
Date: Sat, 2 Oct 2021 02:34:53 +0800
Subject: [PATCH 017/104] init
---
src/Interpreters/getOSKernelVersion.cpp | 37 +++++++++++++++++++++++++
src/Interpreters/getOSKernelVersion.h | 31 +++++++++++++++++++++
2 files changed, 68 insertions(+)
create mode 100644 src/Interpreters/getOSKernelVersion.cpp
create mode 100644 src/Interpreters/getOSKernelVersion.h
diff --git a/src/Interpreters/getOSKernelVersion.cpp b/src/Interpreters/getOSKernelVersion.cpp
new file mode 100644
index 00000000000..44df948be3c
--- /dev/null
+++ b/src/Interpreters/getOSKernelVersion.cpp
@@ -0,0 +1,37 @@
+#if defined(OS_LINUX)
+#include
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int BAD_ARGUMENTS;
+}
+
+String getOSKernelVersion()
+{
+ struct utsname os_kernel_version;
+ int buf = uname(&os_kernel_version);
+ if (buf < 0)
+ {
+ throw Exception(
+ "EFAULT buf is not valid.",
+ ErrorCodes::BAD_ARGUMENTS);
+ }
+ else
+ {
+ // std::cout <<"sysname: " << os_kernel_version.sysname << " nodename: " << os_kernel_version.nodename
+ // << " release: " << os_kernel_version.release << " version: " << os_kernel_version.version
+ // << " machine: " << os_kernel_version.machine << std::endl;
+
+ return "sysname: " + String(os_kernel_version.sysname) + " nodename: " + String(os_kernel_version.nodename)
+ + " release: " + String(os_kernel_version.release) + " version: " + String(os_kernel_version.version)
+ + " machine: " + String(os_kernel_version.machine);
+ }
+}
+
+}
+
+#endif
\ No newline at end of file
diff --git a/src/Interpreters/getOSKernelVersion.h b/src/Interpreters/getOSKernelVersion.h
new file mode 100644
index 00000000000..14b42d2a19a
--- /dev/null
+++ b/src/Interpreters/getOSKernelVersion.h
@@ -0,0 +1,31 @@
+#if defined(OS_LINUX)
+#pragma once
+
+#include
+
+#include
+#include
+
+namespace DB
+{
+
+/// Returns String with OS Kernel version.
+/* To get name and information about current kernel.
+ For simplicity, the function can be implemented only for Linux.
+*/
+
+String getOSKernelVersion();
+
+// String getSysName();
+
+// String getNodeName();
+
+// String getReleaseName();
+
+// String getVersion();
+
+// String getMachineName();
+
+}
+
+#endif
\ No newline at end of file
From 1c62a53afe8344908a7d8b0c6ea3baf55e3aada9 Mon Sep 17 00:00:00 2001
From: Tatiana Kirillova
Date: Mon, 4 Oct 2021 20:49:21 +0300
Subject: [PATCH 018/104] Document the enable_positional_arguments setting
---
docs/en/operations/settings/settings.md | 36 +++++++++++++++++++
.../sql-reference/statements/select/index.md | 2 +-
2 files changed, 37 insertions(+), 1 deletion(-)
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index de7a1835038..320dadb6783 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3749,3 +3749,39 @@ Exception: Total regexp lengths too large.
**See Also**
- [max_hyperscan_regexp_length](#max-hyperscan-regexp-length)
+
+## enable_positional_arguments {#enable-positional-arguments}
+
+Enables or disables supporting positional arguments for [GROUP BY](../../sql-reference/statements/select/group-by.md), [LIMIT BY](../../sql-reference/statements/select/limit-by.md), [ORDER BY](../../sql-reference/statements/select/order-by.md) statement. When you want to use column numbers instead of titles in these clauses, set `enable_positional_arguments = 1`.
+
+Possible values:
+
+- 0 — Disabled.
+- 1 — Enabled.
+
+Default value: `0`.
+
+**Example**
+
+Query:
+
+```sql
+CREATE TABLE positional_arguments(one Int, two Int, tree Int) ENGINE=Memory();
+
+INSERT INTO positional_arguments VALUES (10, 20, 30), (20, 30, 10), (30, 10, 20);
+
+SET enable_positional_arguments = 1;
+
+SELECT * FROM positional_arguments ORDER BY 2,3;
+```
+
+Result:
+
+```text
+┌─one─┬─two─┬─tree─┐
+│ 30 │ 10 │ 20 │
+│ 10 │ 20 │ 30 │
+│ 20 │ 30 │ 10 │
+└─────┴─────┴──────┘
+
+```
\ No newline at end of file
diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md
index b3cc7555d91..2bfaf4af297 100644
--- a/docs/en/sql-reference/statements/select/index.md
+++ b/docs/en/sql-reference/statements/select/index.md
@@ -144,7 +144,7 @@ Extreme values are calculated for rows before `LIMIT`, but after `LIMIT BY`. How
You can use synonyms (`AS` aliases) in any part of a query.
-The `GROUP BY` and `ORDER BY` clauses do not support positional arguments. This contradicts MySQL, but conforms to standard SQL. For example, `GROUP BY 1, 2` will be interpreted as grouping by constants (i.e. aggregation of all rows into one).
+The `GROUP BY`, `ORDER BY` and `LIMIT BY` clauses support positional arguments. For example, ORDER BY 1,2 will be sorting rows on the table on the first and then the second column.
## Implementation Details {#implementation-details}
From 83424685fc6af68d66e3eacf6d7cc5f095ff6947 Mon Sep 17 00:00:00 2001
From: Tatiana Kirillova
Date: Mon, 4 Oct 2021 21:11:12 +0300
Subject: [PATCH 019/104] update example
---
docs/en/operations/settings/settings.md | 6 +++---
docs/en/sql-reference/statements/select/index.md | 2 +-
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 320dadb6783..46f72bd4f2b 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3752,7 +3752,7 @@ Exception: Total regexp lengths too large.
## enable_positional_arguments {#enable-positional-arguments}
-Enables or disables supporting positional arguments for [GROUP BY](../../sql-reference/statements/select/group-by.md), [LIMIT BY](../../sql-reference/statements/select/limit-by.md), [ORDER BY](../../sql-reference/statements/select/order-by.md) statement. When you want to use column numbers instead of titles in these clauses, set `enable_positional_arguments = 1`.
+Enables or disables supporting positional arguments for [GROUP BY](../../sql-reference/statements/select/group-by.md), [LIMIT BY](../../sql-reference/statements/select/limit-by.md), [ORDER BY](../../sql-reference/statements/select/order-by.md) statements. When you want to use column numbers instead of titles in these clauses, set `enable_positional_arguments = 1`.
Possible values:
@@ -3768,7 +3768,7 @@ Query:
```sql
CREATE TABLE positional_arguments(one Int, two Int, tree Int) ENGINE=Memory();
-INSERT INTO positional_arguments VALUES (10, 20, 30), (20, 30, 10), (30, 10, 20);
+INSERT INTO positional_arguments VALUES (10, 20, 30), (20, 20, 10), (30, 10, 20);
SET enable_positional_arguments = 1;
@@ -3780,8 +3780,8 @@ Result:
```text
┌─one─┬─two─┬─tree─┐
│ 30 │ 10 │ 20 │
+│ 20 │ 20 │ 10 │
│ 10 │ 20 │ 30 │
-│ 20 │ 30 │ 10 │
└─────┴─────┴──────┘
```
\ No newline at end of file
diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md
index 2bfaf4af297..35bbb7de2cf 100644
--- a/docs/en/sql-reference/statements/select/index.md
+++ b/docs/en/sql-reference/statements/select/index.md
@@ -144,7 +144,7 @@ Extreme values are calculated for rows before `LIMIT`, but after `LIMIT BY`. How
You can use synonyms (`AS` aliases) in any part of a query.
-The `GROUP BY`, `ORDER BY` and `LIMIT BY` clauses support positional arguments. For example, ORDER BY 1,2 will be sorting rows on the table on the first and then the second column.
+The `GROUP BY`, `ORDER BY`, and `LIMIT BY` clauses support positional arguments. For example, `ORDER BY 1,2` will be sorting rows on the table on the first and then the second column.
## Implementation Details {#implementation-details}
From c049170f7820a57cd8f7c5d7fc037dc6f88b110d Mon Sep 17 00:00:00 2001
From: Tatiana Kirillova
Date: Mon, 4 Oct 2021 21:26:04 +0300
Subject: [PATCH 020/104] minor fix
---
docs/en/operations/settings/settings.md | 4 ++--
docs/en/sql-reference/statements/select/index.md | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 46f72bd4f2b..e6e912e74db 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3766,7 +3766,7 @@ Default value: `0`.
Query:
```sql
-CREATE TABLE positional_arguments(one Int, two Int, tree Int) ENGINE=Memory();
+CREATE TABLE positional_arguments(one Int, two Int, three Int) ENGINE=Memory();
INSERT INTO positional_arguments VALUES (10, 20, 30), (20, 20, 10), (30, 10, 20);
@@ -3778,7 +3778,7 @@ SELECT * FROM positional_arguments ORDER BY 2,3;
Result:
```text
-┌─one─┬─two─┬─tree─┐
+┌─one─┬─two─┬─three─┐
│ 30 │ 10 │ 20 │
│ 20 │ 20 │ 10 │
│ 10 │ 20 │ 30 │
diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md
index 35bbb7de2cf..e31ba6b1f4b 100644
--- a/docs/en/sql-reference/statements/select/index.md
+++ b/docs/en/sql-reference/statements/select/index.md
@@ -144,7 +144,7 @@ Extreme values are calculated for rows before `LIMIT`, but after `LIMIT BY`. How
You can use synonyms (`AS` aliases) in any part of a query.
-The `GROUP BY`, `ORDER BY`, and `LIMIT BY` clauses support positional arguments. For example, `ORDER BY 1,2` will be sorting rows on the table on the first and then the second column.
+The `GROUP BY`, `ORDER BY`, and `LIMIT BY` clauses support positional arguments. For example, `ORDER BY 1,2` will be sorting rows in the table on the first and then the second column.
## Implementation Details {#implementation-details}
From 1afe2dfb40840e34dfd2087e7b23e07191b117e4 Mon Sep 17 00:00:00 2001
From: Tatiana Kirillova
Date: Mon, 4 Oct 2021 21:35:54 +0300
Subject: [PATCH 021/104] fix example
---
docs/en/operations/settings/settings.md | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index e6e912e74db..246f97e47fc 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3779,9 +3779,9 @@ Result:
```text
┌─one─┬─two─┬─three─┐
-│ 30 │ 10 │ 20 │
-│ 20 │ 20 │ 10 │
-│ 10 │ 20 │ 30 │
-└─────┴─────┴──────┘
+│ 30 │ 10 │ 20 │
+│ 20 │ 20 │ 10 │
+│ 10 │ 20 │ 30 │
+└─────┴─────┴─────-─┘
```
\ No newline at end of file
From 2001ebbf9d493b5803b0370efc9f0fa272fbf98f Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov
Date: Mon, 4 Oct 2021 21:52:31 +0300
Subject: [PATCH 022/104] Fix build.
---
src/DataStreams/ColumnGathererStream.h | 2 +-
src/Storages/MergeTree/DataPartsExchange.cpp | 2 --
src/Storages/MergeTree/MergeTask.cpp | 29 +++++++++++--------
src/Storages/MergeTree/MergeTask.h | 5 ++--
.../MergeTree/MergeTreeDataPartInMemory.cpp | 2 --
.../MergeTree/MergeTreeDataWriter.cpp | 2 --
.../MergeTree/MergeTreeWriteAheadLog.cpp | 1 -
src/Storages/MergeTree/MutateTask.cpp | 3 --
src/Storages/StorageReplicatedMergeTree.cpp | 1 -
9 files changed, 21 insertions(+), 26 deletions(-)
diff --git a/src/DataStreams/ColumnGathererStream.h b/src/DataStreams/ColumnGathererStream.h
index 43cbf7094d8..39ba57bce35 100644
--- a/src/DataStreams/ColumnGathererStream.h
+++ b/src/DataStreams/ColumnGathererStream.h
@@ -137,7 +137,7 @@ void ColumnGathererStream::gather(Column & column_res)
column_res.reserve(cur_block_preferred_size);
}
- size_t cur_size = column_res->size();
+ size_t cur_size = column_res.size();
next_required_source = -1;
while (row_source_pos < row_sources_end && cur_size < cur_block_preferred_size)
diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp
index 2683b5a6042..e8ba5d6acd9 100644
--- a/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -589,7 +589,6 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory(
block.getNamesAndTypesList(),
{},
CompressionCodecFactory::instance().get("NONE", {}));
- part_out.writePrefix();
part_out.write(block);
part_out.writeSuffixAndFinalizePart(new_projection_part);
new_projection_part->checksums.checkEqual(checksums, /* have_uncompressed = */ true);
@@ -612,7 +611,6 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory(
MergedBlockOutputStream part_out(
new_data_part, metadata_snapshot, block.getNamesAndTypesList(), {}, CompressionCodecFactory::instance().get("NONE", {}));
- part_out.writePrefix();
part_out.write(block);
part_out.writeSuffixAndFinalizePart(new_data_part);
new_data_part->checksums.checkEqual(checksums, /* have_uncompressed = */ true);
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index 54230f56a6c..70451413b41 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -350,8 +350,6 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const
global_ctx->merge_list_element_ptr->columns_written = global_ctx->merging_column_names.size();
global_ctx->merge_list_element_ptr->progress.store(ctx->column_sizes->keyColumnsWeight(), std::memory_order_relaxed);
- ctx->column_part_streams = BlockInputStreams(global_ctx->future_part->parts.size());
-
ctx->rows_sources_write_buf->next();
ctx->rows_sources_uncompressed_write_buf->next();
/// Ensure data has written to disk.
@@ -386,6 +384,7 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const
global_ctx->column_progress = std::make_unique(ctx->progress_before, ctx->column_sizes->columnWeight(column_name));
+ Pipes pipes;
for (size_t part_num = 0; part_num < global_ctx->future_part->parts.size(); ++part_num)
{
auto column_part_source = std::make_shared(
@@ -395,20 +394,22 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const
column_part_source->setProgressCallback(
MergeProgressCallback(global_ctx->merge_list_element_ptr, global_ctx->watch_prev_elapsed, *global_ctx->column_progress));
- QueryPipeline column_part_pipeline(Pipe(std::move(column_part_source)));
- column_part_pipeline.setNumThreads(1);
-
- ctx->column_part_streams[part_num] =
- std::make_shared(std::move(column_part_pipeline));
+ pipes.emplace_back(std::move(column_part_source));
}
+ auto pipe = Pipe::unitePipes(std::move(pipes));
+
ctx->rows_sources_read_buf->seek(0, 0);
- ctx->column_gathered_stream = std::make_unique(column_name, ctx->column_part_streams, *ctx->rows_sources_read_buf);
+ auto transform = std::make_unique(pipe.getHeader(), pipe.numOutputPorts(), *ctx->rows_sources_read_buf);
+ pipe.addTransform(std::move(transform));
+
+ ctx->column_parts_pipeline = QueryPipeline(std::move(pipe));
+ ctx->executor = std::make_unique(ctx->column_parts_pipeline);
ctx->column_to = std::make_unique(
global_ctx->new_data_part,
global_ctx->metadata_snapshot,
- ctx->column_gathered_stream->getHeader(),
+ ctx->executor->getHeader(),
ctx->compression_codec,
/// we don't need to recalc indices here
/// because all of them were already recalculated and written
@@ -424,7 +425,7 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const
bool MergeTask::VerticalMergeStage::executeVerticalMergeForOneColumn() const
{
Block block;
- if (!global_ctx->merges_blocker->isCancelled() && (block = ctx->column_gathered_stream->read()))
+ if (!global_ctx->merges_blocker->isCancelled() && ctx->executor->pull(block))
{
ctx->column_elems_written += block.rows();
ctx->column_to->write(block);
@@ -442,7 +443,7 @@ void MergeTask::VerticalMergeStage::finalizeVerticalMergeForOneColumn() const
if (global_ctx->merges_blocker->isCancelled())
throw Exception("Cancelled merging parts", ErrorCodes::ABORTED);
- ctx->column_gathered_stream->readSuffix();
+ ctx->executor.reset();
auto changed_checksums = ctx->column_to->writeSuffixAndGetChecksums(global_ctx->new_data_part, global_ctx->checksums_gathered_columns, ctx->need_sync);
global_ctx->checksums_gathered_columns.add(std::move(changed_checksums));
@@ -452,10 +453,14 @@ void MergeTask::VerticalMergeStage::finalizeVerticalMergeForOneColumn() const
", but " + toString(global_ctx->rows_written) + " rows of PK columns", ErrorCodes::LOGICAL_ERROR);
}
+ UInt64 rows = 0;
+ UInt64 bytes = 0;
+ ctx->column_parts_pipeline.tryGetResultRowsAndBytes(rows, bytes);
+
/// NOTE: 'progress' is modified by single thread, but it may be concurrently read from MergeListElement::getInfo() (StorageSystemMerges).
global_ctx->merge_list_element_ptr->columns_written += 1;
- global_ctx->merge_list_element_ptr->bytes_written_uncompressed += ctx->column_gathered_stream->getProfileInfo().bytes;
+ global_ctx->merge_list_element_ptr->bytes_written_uncompressed += bytes;
global_ctx->merge_list_element_ptr->progress.store(ctx->progress_before + ctx->column_sizes->columnWeight(column_name), std::memory_order_relaxed);
/// This is the external cycle increment.
diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h
index 54b0255fd5c..ce628d831ae 100644
--- a/src/Storages/MergeTree/MergeTask.h
+++ b/src/Storages/MergeTree/MergeTask.h
@@ -9,6 +9,7 @@
#include
#include
#include
+#include
#include
#include
@@ -263,8 +264,8 @@ private:
Float64 progress_before = 0;
std::unique_ptr column_to{nullptr};
size_t column_elems_written{0};
- BlockInputStreams column_part_streams;
- std::unique_ptr column_gathered_stream;
+ QueryPipeline column_parts_pipeline;
+ std::unique_ptr executor;
std::unique_ptr rows_sources_read_buf{nullptr};
};
diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp
index 635da7e2ede..d43855fa8de 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp
@@ -92,7 +92,6 @@ void MergeTreeDataPartInMemory::flushToDisk(const String & base_path, const Stri
auto compression_codec = storage.getContext()->chooseCompressionCodec(0, 0);
auto indices = MergeTreeIndexFactory::instance().getMany(metadata_snapshot->getSecondaryIndices());
MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, indices, compression_codec);
- out.writePrefix();
out.write(block);
const auto & projections = metadata_snapshot->getProjections();
for (const auto & [projection_name, projection] : projection_parts)
@@ -123,7 +122,6 @@ void MergeTreeDataPartInMemory::flushToDisk(const String & base_path, const Stri
auto projection_indices = MergeTreeIndexFactory::instance().getMany(desc.metadata->getSecondaryIndices());
MergedBlockOutputStream projection_out(
projection_data_part, desc.metadata, projection_part->columns, projection_indices, projection_compression_codec);
- projection_out.writePrefix();
projection_out.write(projection_part->block);
projection_out.writeSuffixAndFinalizePart(projection_data_part);
new_data_part->addProjectionPart(projection_name, std::move(projection_data_part));
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index 95ddf105b79..d939312c0bb 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -412,7 +412,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(
MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec);
bool sync_on_insert = data.getSettings()->fsync_after_insert;
- out.writePrefix();
out.writeWithPermutation(block, perm_ptr);
for (const auto & projection : metadata_snapshot->getProjections())
@@ -508,7 +507,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeProjectionPartImpl(
{},
compression_codec);
- out.writePrefix();
out.writeWithPermutation(block, perm_ptr);
out.writeSuffixAndFinalizePart(new_data_part);
diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
index 3e8aa6af536..2c1d785236c 100644
--- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
@@ -202,7 +202,6 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor
if (metadata_snapshot->hasSortingKey())
metadata_snapshot->getSortingKey().expression->execute(block);
- part_out.writePrefix();
part_out.write(block);
for (const auto & projection : metadata_snapshot->getProjections())
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 0655806bf0e..b93075d0ce6 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -793,7 +793,6 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections()
if (ctx->minmax_idx)
ctx->minmax_idx->update(block, ctx->data->getMinMaxColumnsNames(ctx->metadata_snapshot->getPartitionKey()));
- ctx->out->write(block);
for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i)
{
@@ -952,7 +951,6 @@ private:
ctx->compression_codec);
ctx->mutating_stream->readPrefix();
- ctx->out->writePrefix();
part_merger_writer_task = std::make_unique(ctx);
}
@@ -1102,7 +1100,6 @@ private:
);
ctx->mutating_stream->readPrefix();
- ctx->out->writePrefix();
ctx->projections_to_build = std::vector{ctx->projections_to_recalc.begin(), ctx->projections_to_recalc.end()};
part_merger_writer_task = std::make_unique(ctx);
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 3b84c4c4a03..eafe81f8c88 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -7157,7 +7157,6 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP
MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec);
bool sync_on_insert = settings->fsync_after_insert;
- out.writePrefix();
out.write(block);
/// TODO(ab): What projections should we add to the empty part? How can we make sure that it
/// won't block future merges? Perhaps we should also check part emptiness when selecting parts
From 6febfeea77ad6e28e23a2ccb0cde99e7bbca35b5 Mon Sep 17 00:00:00 2001
From: Tatiana Kirillova
Date: Mon, 4 Oct 2021 21:55:03 +0300
Subject: [PATCH 023/104] fix table
---
docs/en/operations/settings/settings.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 246f97e47fc..22bde8cffa8 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3782,6 +3782,6 @@ Result:
│ 30 │ 10 │ 20 │
│ 20 │ 20 │ 10 │
│ 10 │ 20 │ 30 │
-└─────┴─────┴─────-─┘
+└─────┴─────┴───────┘
```
\ No newline at end of file
From a638c40fdcfd1b8b01153713e070664c1c38976f Mon Sep 17 00:00:00 2001
From: WangZengrui
Date: Tue, 5 Oct 2021 09:08:16 +0800
Subject: [PATCH 024/104] add getOSKernelVersion
---
src/Interpreters/getOSKernelVersion.cpp | 18 ++++++------------
src/Interpreters/getOSKernelVersion.h | 12 +-----------
2 files changed, 7 insertions(+), 23 deletions(-)
diff --git a/src/Interpreters/getOSKernelVersion.cpp b/src/Interpreters/getOSKernelVersion.cpp
index 44df948be3c..c4b4564f46e 100644
--- a/src/Interpreters/getOSKernelVersion.cpp
+++ b/src/Interpreters/getOSKernelVersion.cpp
@@ -7,28 +7,22 @@ namespace DB
namespace ErrorCodes
{
- extern const int BAD_ARGUMENTS;
+ extern const int SYSTEM_ERROR;
}
String getOSKernelVersion()
{
- struct utsname os_kernel_version;
- int buf = uname(&os_kernel_version);
+ struct utsname os_kernel_info;
+ int buf = uname(&os_kernel_info);
if (buf < 0)
{
throw Exception(
- "EFAULT buf is not valid.",
- ErrorCodes::BAD_ARGUMENTS);
+ "EFAULT buffer is not valid.",
+ ErrorCodes::SYSTEM_ERROR);
}
else
{
- // std::cout <<"sysname: " << os_kernel_version.sysname << " nodename: " << os_kernel_version.nodename
- // << " release: " << os_kernel_version.release << " version: " << os_kernel_version.version
- // << " machine: " << os_kernel_version.machine << std::endl;
-
- return "sysname: " + String(os_kernel_version.sysname) + " nodename: " + String(os_kernel_version.nodename)
- + " release: " + String(os_kernel_version.release) + " version: " + String(os_kernel_version.version)
- + " machine: " + String(os_kernel_version.machine);
+ return String(os_kernel_info.sysname) + " " + String(os_kernel_info.release);
}
}
diff --git a/src/Interpreters/getOSKernelVersion.h b/src/Interpreters/getOSKernelVersion.h
index 14b42d2a19a..fc3c7583aef 100644
--- a/src/Interpreters/getOSKernelVersion.h
+++ b/src/Interpreters/getOSKernelVersion.h
@@ -1,5 +1,5 @@
-#if defined(OS_LINUX)
#pragma once
+#if defined(OS_LINUX)
#include
@@ -16,16 +16,6 @@ namespace DB
String getOSKernelVersion();
-// String getSysName();
-
-// String getNodeName();
-
-// String getReleaseName();
-
-// String getVersion();
-
-// String getMachineName();
-
}
#endif
\ No newline at end of file
From c106f1e38000dd7b8847ad99c6dac231af3e20b8 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov
Date: Tue, 5 Oct 2021 12:51:43 +0300
Subject: [PATCH 025/104] Fix tests.
---
src/DataStreams/ColumnGathererStream.cpp | 2 +-
src/Processors/Chunk.cpp | 4 +++-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/DataStreams/ColumnGathererStream.cpp b/src/DataStreams/ColumnGathererStream.cpp
index 90da7792c21..add473190c0 100644
--- a/src/DataStreams/ColumnGathererStream.cpp
+++ b/src/DataStreams/ColumnGathererStream.cpp
@@ -105,7 +105,7 @@ IMergingAlgorithm::Status ColumnGathererStream::merge()
merged_rows += col->size();
merged_bytes += col->allocatedBytes();
res.addColumn(std::move(col));
- return Status(std::move(res), row_sources_buf.eof());
+ return Status(std::move(res), row_sources_buf.eof() && !source_to_fully_copy);
}
diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp
index 4800bfca2ce..d904f3755bc 100644
--- a/src/Processors/Chunk.cpp
+++ b/src/Processors/Chunk.cpp
@@ -104,7 +104,9 @@ Columns Chunk::detachColumns()
void Chunk::addColumn(ColumnPtr column)
{
- if (column->size() != num_rows)
+ if (empty())
+ num_rows = column->size();
+ else if (column->size() != num_rows)
throw Exception("Invalid number of rows in Chunk column " + column->getName()+ ": expected " +
toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR);
From 82e6ac8fa2cdba5ef016bbe4278d7c17888dafb7 Mon Sep 17 00:00:00 2001
From: Filatenkov Artur <58165623+FArthur-cmd@users.noreply.github.com>
Date: Tue, 5 Oct 2021 13:39:18 +0300
Subject: [PATCH 026/104] Update HTTPHandler.cpp
---
src/Server/HTTPHandler.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index 413cfe18696..1036d5031f7 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -24,7 +24,7 @@
#include
#include
#include
-#include "common/logger_useful.h"
+#include
#include
#include
#include
From 96d070a5baf2847ea35a9587944f73772295afa2 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov
Date: Tue, 5 Oct 2021 16:58:24 +0300
Subject: [PATCH 027/104] Fix tests.
---
src/Storages/MergeTree/IMergedBlockOutputStream.h | 4 ++++
src/Storages/MergeTree/MergedBlockOutputStream.h | 2 +-
src/Storages/MergeTree/MergedColumnOnlyOutputStream.h | 2 +-
src/Storages/MergeTree/MutateTask.cpp | 1 +
4 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.h b/src/Storages/MergeTree/IMergedBlockOutputStream.h
index 133f0804838..36fbe76cca2 100644
--- a/src/Storages/MergeTree/IMergedBlockOutputStream.h
+++ b/src/Storages/MergeTree/IMergedBlockOutputStream.h
@@ -15,8 +15,12 @@ public:
const MergeTreeDataPartPtr & data_part,
const StorageMetadataPtr & metadata_snapshot_);
+ virtual ~IMergedBlockOutputStream() = default;
+
using WrittenOffsetColumns = std::set;
+ virtual void write(const Block & block) = 0;
+
const MergeTreeIndexGranularity & getIndexGranularity() const
{
return writer->getIndexGranularity();
diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h
index 3586ac17298..5965331ee81 100644
--- a/src/Storages/MergeTree/MergedBlockOutputStream.h
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.h
@@ -24,7 +24,7 @@ public:
Block getHeader() const { return metadata_snapshot->getSampleBlock(); }
/// If the data is pre-sorted.
- void write(const Block & block);
+ void write(const Block & block) override;
/** If the data is not sorted, but we have previously calculated the permutation, that will sort it.
* This method is used to save RAM, since you do not need to keep two blocks at once - the original one and the sorted one.
diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h
index 7a146a91331..4b75bc52f72 100644
--- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h
+++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h
@@ -24,7 +24,7 @@ public:
const MergeTreeIndexGranularityInfo * index_granularity_info_ = nullptr);
Block getHeader() const { return header; }
- void write(const Block & block);
+ void write(const Block & block) override;
MergeTreeData::DataPart::Checksums
writeSuffixAndGetChecksums(MergeTreeData::MutableDataPartPtr & new_part, MergeTreeData::DataPart::Checksums & all_checksums, bool sync = false);
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index b93075d0ce6..1900b694a8d 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -793,6 +793,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections()
if (ctx->minmax_idx)
ctx->minmax_idx->update(block, ctx->data->getMinMaxColumnsNames(ctx->metadata_snapshot->getPartitionKey()));
+ ctx->out->write(block);
for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i)
{
From 9b1a39fdb9c4b79d2f045f88d97a1ce33c7d4797 Mon Sep 17 00:00:00 2001
From: Artur Filatenkov <613623@mail.ru>
Date: Tue, 5 Oct 2021 17:43:33 +0300
Subject: [PATCH 028/104] refactor after move common to base
---
src/Server/HTTPHandler.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index 1036d5031f7..9edef8a7223 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -24,15 +24,15 @@
#include
#include
#include
-#include
+#include
#include
#include
#include
#include
#include
-#include
-#include
+#include
+#include
#include
#if !defined(ARCADIA_BUILD)
From aeb670dbe00a9328ed361b84e277f3138fa2ab64 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin
Date: Tue, 5 Oct 2021 21:02:00 +0300
Subject: [PATCH 029/104] Simplify obtaining of server pid in fuzzer tests
---
docker/test/fuzzer/run-fuzzer.sh | 22 +++-------------------
1 file changed, 3 insertions(+), 19 deletions(-)
diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh
index 3c3fcd42fde..15590902b68 100755
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@@ -125,25 +125,9 @@ function fuzz
# interferes with gdb
export CLICKHOUSE_WATCHDOG_ENABLE=0
- # NOTE: that $! cannot be used to obtain the server pid, since it will be
- # the pid of the bash, due to piping the output of clickhouse-server to
- # tail
- PID_FILE=clickhouse-server.pid
- clickhouse-server --pidfile=$PID_FILE --config-file db/config.xml -- --path db 2>&1 | tail -100000 > server.log &
-
- server_pid=-1
- for _ in {1..60}; do
- if [ -s $PID_FILE ]; then
- server_pid=$(cat $PID_FILE)
- break
- fi
- sleep 1
- done
-
- if [ $server_pid = -1 ]; then
- echo "Server did not started" >&2
- exit 1
- fi
+ # NOTE: we use process substitution here to preserve keep $! as a pid of clickhouse-server
+ clickhouse-server --config-file db/config.xml -- --path db > >(tail -100000 > server.log) 2>&1 &
+ server_pid=$!
kill -0 $server_pid
From bd4b0af2e14dfa22257d4778bf135f65dee1723c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov
Date: Wed, 6 Oct 2021 02:34:48 +0300
Subject: [PATCH 030/104] Fix bad cast in ParserCreateQuery
---
src/Parsers/ParserCreateQuery.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index d4525883e36..2ea1663fc80 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -481,7 +481,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
if (attach && s_from.ignore(pos, expected))
{
- ParserLiteral from_path_p;
+ ParserStringLiteral from_path_p;
if (!from_path_p.parse(pos, from_path, expected))
return false;
}
From a518f5ea11667e56e48b065354dd3f81dae945e5 Mon Sep 17 00:00:00 2001
From: Tatiana Kirillova
Date: Wed, 6 Oct 2021 12:26:22 +0300
Subject: [PATCH 031/104] Update docs/en/operations/settings/settings.md
Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com>
---
docs/en/operations/settings/settings.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 22bde8cffa8..fa4b96b8fbc 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3752,7 +3752,7 @@ Exception: Total regexp lengths too large.
## enable_positional_arguments {#enable-positional-arguments}
-Enables or disables supporting positional arguments for [GROUP BY](../../sql-reference/statements/select/group-by.md), [LIMIT BY](../../sql-reference/statements/select/limit-by.md), [ORDER BY](../../sql-reference/statements/select/order-by.md) statements. When you want to use column numbers instead of titles in these clauses, set `enable_positional_arguments = 1`.
+Enables or disables supporting positional arguments for [GROUP BY](../../sql-reference/statements/select/group-by.md), [LIMIT BY](../../sql-reference/statements/select/limit-by.md), [ORDER BY](../../sql-reference/statements/select/order-by.md) statements. When you want to use column numbers instead of column names in these clauses, set `enable_positional_arguments = 1`.
Possible values:
From 6b619512aa1c1feef9b8bde48fa91b279e2266e6 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov
Date: Wed, 6 Oct 2021 12:37:56 +0300
Subject: [PATCH 032/104] Fix style.
---
src/DataStreams/ColumnGathererStream.cpp | 1 -
src/DataStreams/ColumnGathererStream.h | 2 +-
src/Storages/MergeTree/MergedBlockOutputStream.cpp | 1 -
3 files changed, 1 insertion(+), 3 deletions(-)
diff --git a/src/DataStreams/ColumnGathererStream.cpp b/src/DataStreams/ColumnGathererStream.cpp
index 319580cf8f3..9b2fac79bb0 100644
--- a/src/DataStreams/ColumnGathererStream.cpp
+++ b/src/DataStreams/ColumnGathererStream.cpp
@@ -11,7 +11,6 @@ namespace DB
namespace ErrorCodes
{
- extern const int INCOMPATIBLE_COLUMNS;
extern const int INCORRECT_NUMBER_OF_COLUMNS;
extern const int EMPTY_DATA_PASSED;
extern const int RECEIVED_EMPTY_DATA;
diff --git a/src/DataStreams/ColumnGathererStream.h b/src/DataStreams/ColumnGathererStream.h
index 39ba57bce35..2d013e596ce 100644
--- a/src/DataStreams/ColumnGathererStream.h
+++ b/src/DataStreams/ColumnGathererStream.h
@@ -96,7 +96,7 @@ private:
Source * source_to_fully_copy = nullptr;
ssize_t next_required_source = -1;
- size_t cur_block_preferred_size;
+ size_t cur_block_preferred_size = 0;
UInt64 merged_rows = 0;
UInt64 merged_bytes = 0;
diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
index 495ce5ee933..5206f77290b 100644
--- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
@@ -8,7 +8,6 @@ namespace DB
namespace ErrorCodes
{
- extern const int NOT_IMPLEMENTED;
extern const int LOGICAL_ERROR;
}
From c0ee6d46f7dafda4784c4c2db4837015ea934005 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov
Date: Wed, 6 Oct 2021 15:39:14 +0300
Subject: [PATCH 033/104] resolve dependency of StorageDictionary
---
src/Databases/DDLDependencyVisitor.cpp | 12 ++++++++++++
src/Databases/DDLDependencyVisitor.h | 2 ++
.../integration/helpers/0_common_instance_config.xml | 2 +-
.../integration/test_dictionaries_dependency/test.py | 2 ++
4 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp
index 73800e84256..98f697ef641 100644
--- a/src/Databases/DDLDependencyVisitor.cpp
+++ b/src/Databases/DDLDependencyVisitor.cpp
@@ -16,6 +16,8 @@ void DDLDependencyVisitor::visit(const ASTPtr & ast, Data & data)
visit(*function, data);
else if (const auto * dict_source = ast->as())
visit(*dict_source, data);
+ else if (const auto * storage = ast->as())
+ visit(*storage, data);
}
bool DDLDependencyVisitor::needChildVisit(const ASTPtr & node, const ASTPtr & child)
@@ -66,6 +68,16 @@ void DDLDependencyVisitor::visit(const ASTFunctionWithKeyValueArguments & dict_s
data.dependencies.emplace(std::move(info->table_name));
}
+void DDLDependencyVisitor::visit(const ASTStorage & storage, Data & data)
+{
+ if (!storage.engine)
+ return;
+ if (storage.engine->name != "Dictionary")
+ return;
+
+ extractTableNameFromArgument(*storage.engine, data, 0);
+}
+
void DDLDependencyVisitor::extractTableNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx)
{
diff --git a/src/Databases/DDLDependencyVisitor.h b/src/Databases/DDLDependencyVisitor.h
index c0b39d70b08..5779aee7d33 100644
--- a/src/Databases/DDLDependencyVisitor.h
+++ b/src/Databases/DDLDependencyVisitor.h
@@ -8,6 +8,7 @@ namespace DB
class ASTFunction;
class ASTFunctionWithKeyValueArguments;
+class ASTStorage;
/// Visits ASTCreateQuery and extracts names of table (or dictionary) dependencies
/// from column default expressions (joinGet, dictGet, etc)
@@ -33,6 +34,7 @@ public:
private:
static void visit(const ASTFunction & function, Data & data);
static void visit(const ASTFunctionWithKeyValueArguments & dict_source, Data & data);
+ static void visit(const ASTStorage & storage, Data & data);
static void extractTableNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx);
};
diff --git a/tests/integration/helpers/0_common_instance_config.xml b/tests/integration/helpers/0_common_instance_config.xml
index 717f6db7e4b..c848ebdf45c 100644
--- a/tests/integration/helpers/0_common_instance_config.xml
+++ b/tests/integration/helpers/0_common_instance_config.xml
@@ -9,7 +9,7 @@
users.xml
- trace
+ test
/var/log/clickhouse-server/clickhouse-server.log
/var/log/clickhouse-server/clickhouse-server.err.log
1000M
diff --git a/tests/integration/test_dictionaries_dependency/test.py b/tests/integration/test_dictionaries_dependency/test.py
index d615f90dc79..ceab39aacb8 100644
--- a/tests/integration/test_dictionaries_dependency/test.py
+++ b/tests/integration/test_dictionaries_dependency/test.py
@@ -36,6 +36,8 @@ def cleanup_after_test():
yield
finally:
for node in nodes:
+ for i in range(4):
+ node.query("DROP DICTIONARY IF EXISTS test.other_{}".format(i))
node.query("DROP DICTIONARY IF EXISTS test.adict")
node.query("DROP DICTIONARY IF EXISTS test.zdict")
node.query("DROP DICTIONARY IF EXISTS atest.dict")
From d0c6f11fcb203b50e32fbab6b9488c6ffa87fcde Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov
Date: Wed, 6 Oct 2021 20:59:27 +0300
Subject: [PATCH 034/104] More.
---
.../DistinctSortedBlockInputStream.cpp | 78 +++++++++---------
.../DistinctSortedBlockInputStream.h | 22 +++--
src/DataStreams/TTLBlockInputStream.cpp | 40 ++++++---
src/DataStreams/TTLBlockInputStream.h | 16 ++--
src/DataStreams/TTLCalcInputStream.cpp | 34 +++++---
src/DataStreams/TTLCalcInputStream.h | 15 ++--
src/Interpreters/MutationsInterpreter.cpp | 7 +-
src/Interpreters/MutationsInterpreter.h | 2 +-
src/Storages/MergeTree/MergeTask.cpp | 37 +++++----
src/Storages/MergeTree/MergeTask.h | 3 +-
src/Storages/MergeTree/MutateTask.cpp | 82 ++++++++++++-------
src/Storages/StorageJoin.cpp | 10 +--
src/Storages/StorageMemory.cpp | 9 +-
13 files changed, 197 insertions(+), 158 deletions(-)
diff --git a/src/DataStreams/DistinctSortedBlockInputStream.cpp b/src/DataStreams/DistinctSortedBlockInputStream.cpp
index eab706924c1..a3105d6330c 100644
--- a/src/DataStreams/DistinctSortedBlockInputStream.cpp
+++ b/src/DataStreams/DistinctSortedBlockInputStream.cpp
@@ -8,40 +8,28 @@ namespace ErrorCodes
extern const int SET_SIZE_LIMIT_EXCEEDED;
}
-DistinctSortedBlockInputStream::DistinctSortedBlockInputStream(
- const BlockInputStreamPtr & input, SortDescription sort_description, const SizeLimits & set_size_limits_, UInt64 limit_hint_, const Names & columns)
- : description(std::move(sort_description))
+DistinctSortedTransform::DistinctSortedTransform(
+ const Block & header, SortDescription sort_description, const SizeLimits & set_size_limits_, UInt64 limit_hint_, const Names & columns)
+ : ISimpleTransform(header, header, true)
+ , description(std::move(sort_description))
, columns_names(columns)
, limit_hint(limit_hint_)
, set_size_limits(set_size_limits_)
{
- children.push_back(input);
}
-Block DistinctSortedBlockInputStream::readImpl()
+void DistinctSortedTransform::transform(Chunk & chunk)
{
- /// Execute until end of stream or until
- /// a block with some new records will be gotten.
- for (;;)
- {
- /// Stop reading if we already reached the limit.
- if (limit_hint && data.getTotalRowCount() >= limit_hint)
- return Block();
-
- Block block = children.back()->read();
- if (!block)
- return Block();
-
- const ColumnRawPtrs column_ptrs(getKeyColumns(block));
+ const ColumnRawPtrs column_ptrs(getKeyColumns(chunk));
if (column_ptrs.empty())
- return block;
+ return;
- const ColumnRawPtrs clearing_hint_columns(getClearingColumns(block, column_ptrs));
+ const ColumnRawPtrs clearing_hint_columns(getClearingColumns(chunk, column_ptrs));
if (data.type == ClearableSetVariants::Type::EMPTY)
data.init(ClearableSetVariants::chooseMethod(column_ptrs, key_sizes));
- const size_t rows = block.rows();
+ const size_t rows = chunk.getNumRows();
IColumn::Filter filter(rows);
bool has_new_data = false;
@@ -59,25 +47,33 @@ Block DistinctSortedBlockInputStream::readImpl()
/// Just go to the next block if there isn't any new record in the current one.
if (!has_new_data)
- continue;
+ return;
if (!set_size_limits.check(data.getTotalRowCount(), data.getTotalByteCount(), "DISTINCT", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED))
- return {};
+ {
+ stopReading();
+ chunk.clear();
+ return;
+ }
- prev_block.block = block;
- prev_block.clearing_hint_columns = std::move(clearing_hint_columns);
+ /// Stop reading if we already reached the limit.
+ if (limit_hint && data.getTotalRowCount() >= limit_hint)
+ stopReading();
- size_t all_columns = block.columns();
+ prev_chunk.chunk = std::move(chunk);
+ prev_chunk.clearing_hint_columns = std::move(clearing_hint_columns);
+
+ size_t all_columns = prev_chunk.chunk.getNumColumns();
+ Chunk res_chunk;
for (size_t i = 0; i < all_columns; ++i)
- block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(filter, -1);
+ res_chunk.addColumn(prev_chunk.chunk.getColumns().at(i)->filter(filter, -1));
- return block;
- }
+ chunk = std::move(res_chunk);
}
template
-bool DistinctSortedBlockInputStream::buildFilter(
+bool DistinctSortedTransform::buildFilter(
Method & method,
const ColumnRawPtrs & columns,
const ColumnRawPtrs & clearing_hint_columns,
@@ -90,8 +86,8 @@ bool DistinctSortedBlockInputStream::buildFilter(
/// Compare last row of previous block and first row of current block,
/// If rows not equal, we can clear HashSet,
/// If clearing_hint_columns is empty, we CAN'T clear HashSet.
- if (!clearing_hint_columns.empty() && !prev_block.clearing_hint_columns.empty()
- && !rowsEqual(clearing_hint_columns, 0, prev_block.clearing_hint_columns, prev_block.block.rows() - 1))
+ if (!clearing_hint_columns.empty() && !prev_chunk.clearing_hint_columns.empty()
+ && !rowsEqual(clearing_hint_columns, 0, prev_chunk.clearing_hint_columns, prev_chunk.chunk.getNumRows() - 1))
{
method.data.clear();
}
@@ -117,18 +113,20 @@ bool DistinctSortedBlockInputStream::buildFilter(
return has_new_data;
}
-ColumnRawPtrs DistinctSortedBlockInputStream::getKeyColumns(const Block & block) const
+ColumnRawPtrs DistinctSortedTransform::getKeyColumns(const Chunk & chunk) const
{
- size_t columns = columns_names.empty() ? block.columns() : columns_names.size();
+ size_t columns = columns_names.empty() ? chunk.getNumColumns() : columns_names.size();
ColumnRawPtrs column_ptrs;
column_ptrs.reserve(columns);
for (size_t i = 0; i < columns; ++i)
{
- const auto & column = columns_names.empty()
- ? block.safeGetByPosition(i).column
- : block.getByName(columns_names[i]).column;
+ auto pos = i;
+ if (!columns_names.empty())
+ pos = input.getHeader().getPositionByName(columns_names[i]);
+
+ const auto & column = chunk.getColumns()[pos];
/// Ignore all constant columns.
if (!isColumnConst(*column))
@@ -138,13 +136,13 @@ ColumnRawPtrs DistinctSortedBlockInputStream::getKeyColumns(const Block & block)
return column_ptrs;
}
-ColumnRawPtrs DistinctSortedBlockInputStream::getClearingColumns(const Block & block, const ColumnRawPtrs & key_columns) const
+ColumnRawPtrs DistinctSortedTransform::getClearingColumns(const Chunk & chunk, const ColumnRawPtrs & key_columns) const
{
ColumnRawPtrs clearing_hint_columns;
clearing_hint_columns.reserve(description.size());
for (const auto & sort_column_description : description)
{
- const auto * sort_column_ptr = block.safeGetByPosition(sort_column_description.column_number).column.get();
+ const auto * sort_column_ptr = chunk.getColumns().at(sort_column_description.column_number).get();
const auto it = std::find(key_columns.cbegin(), key_columns.cend(), sort_column_ptr);
if (it != key_columns.cend()) /// if found in key_columns
clearing_hint_columns.emplace_back(sort_column_ptr);
@@ -154,7 +152,7 @@ ColumnRawPtrs DistinctSortedBlockInputStream::getClearingColumns(const Block & b
return clearing_hint_columns;
}
-bool DistinctSortedBlockInputStream::rowsEqual(const ColumnRawPtrs & lhs, size_t n, const ColumnRawPtrs & rhs, size_t m)
+bool DistinctSortedTransform::rowsEqual(const ColumnRawPtrs & lhs, size_t n, const ColumnRawPtrs & rhs, size_t m)
{
for (size_t column_index = 0, num_columns = lhs.size(); column_index < num_columns; ++column_index)
{
diff --git a/src/DataStreams/DistinctSortedBlockInputStream.h b/src/DataStreams/DistinctSortedBlockInputStream.h
index 146c9326e5d..ddac6c18a64 100644
--- a/src/DataStreams/DistinctSortedBlockInputStream.h
+++ b/src/DataStreams/DistinctSortedBlockInputStream.h
@@ -1,6 +1,6 @@
#pragma once
-#include
+#include
#include
#include
@@ -18,24 +18,22 @@ namespace DB
* set limit_hint to non zero value. So we stop emitting new rows after
* count of already emitted rows will reach the limit_hint.
*/
-class DistinctSortedBlockInputStream : public IBlockInputStream
+class DistinctSortedTransform : public ISimpleTransform
{
public:
/// Empty columns_ means all columns.
- DistinctSortedBlockInputStream(const BlockInputStreamPtr & input, SortDescription sort_description, const SizeLimits & set_size_limits_, UInt64 limit_hint_, const Names & columns);
+ DistinctSortedTransform(const Block & header, SortDescription sort_description, const SizeLimits & set_size_limits_, UInt64 limit_hint_, const Names & columns);
- String getName() const override { return "DistinctSorted"; }
-
- Block getHeader() const override { return children.at(0)->getHeader(); }
+ String getName() const override { return "DistinctSortedTransform"; }
protected:
- Block readImpl() override;
+ void transform(Chunk & chunk) override;
private:
- ColumnRawPtrs getKeyColumns(const Block & block) const;
+ ColumnRawPtrs getKeyColumns(const Chunk & chunk) const;
/// When clearing_columns changed, we can clean HashSet to memory optimization
/// clearing_columns is a left-prefix of SortDescription exists in key_columns
- ColumnRawPtrs getClearingColumns(const Block & block, const ColumnRawPtrs & key_columns) const;
+ ColumnRawPtrs getClearingColumns(const Chunk & chunk, const ColumnRawPtrs & key_columns) const;
static bool rowsEqual(const ColumnRawPtrs & lhs, size_t n, const ColumnRawPtrs & rhs, size_t m);
/// return true if has new data
@@ -50,12 +48,12 @@ private:
SortDescription description;
- struct PreviousBlock
+ struct PreviousChunk
{
- Block block;
+ Chunk chunk;
ColumnRawPtrs clearing_hint_columns;
};
- PreviousBlock prev_block;
+ PreviousChunk prev_chunk;
Names columns_names;
ClearableSetVariants data;
diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp
index 05d4ba0a395..1a1484fc08e 100644
--- a/src/DataStreams/TTLBlockInputStream.cpp
+++ b/src/DataStreams/TTLBlockInputStream.cpp
@@ -16,18 +16,17 @@
namespace DB
{
-TTLBlockInputStream::TTLBlockInputStream(
- const BlockInputStreamPtr & input_,
+TTLTransform::TTLTransform(
+ const Block & header_,
const MergeTreeData & storage_,
const StorageMetadataPtr & metadata_snapshot_,
const MergeTreeData::MutableDataPartPtr & data_part_,
time_t current_time_,
bool force_)
- : data_part(data_part_)
- , log(&Poco::Logger::get(storage_.getLogName() + " (TTLBlockInputStream)"))
+ : ISimpleTransform(header_, header_, false)
+ , data_part(data_part_)
+ , log(&Poco::Logger::get(storage_.getLogName() + " (TTLTransform)"))
{
- children.push_back(input_);
- header = children.at(0)->getHeader();
auto old_ttl_infos = data_part->ttl_infos;
if (metadata_snapshot_->hasRowsTTL())
@@ -50,7 +49,7 @@ TTLBlockInputStream::TTLBlockInputStream(
for (const auto & group_by_ttl : metadata_snapshot_->getGroupByTTLs())
algorithms.emplace_back(std::make_unique(
- group_by_ttl, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_, header, storage_));
+ group_by_ttl, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_, getInputPort().getHeader(), storage_));
if (metadata_snapshot_->hasAnyColumnTTL())
{
@@ -98,22 +97,28 @@ Block reorderColumns(Block block, const Block & header)
return res;
}
-Block TTLBlockInputStream::readImpl()
+void TTLTransform::transform(Chunk & chunk)
{
if (all_data_dropped)
- return {};
+ {
+ stopReading();
+ chunk.clear();
+ return;
+ }
- auto block = children.at(0)->read();
+ auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns());
for (const auto & algorithm : algorithms)
algorithm->execute(block);
if (!block)
- return block;
+ return;
- return reorderColumns(std::move(block), header);
+ size_t num_rows = block.rows();
+
+ chunk = Chunk(reorderColumns(std::move(block), getOutputPort().getHeader()).getColumns(), num_rows);
}
-void TTLBlockInputStream::readSuffixImpl()
+void TTLTransform::finalize()
{
data_part->ttl_infos = {};
for (const auto & algorithm : algorithms)
@@ -126,4 +131,13 @@ void TTLBlockInputStream::readSuffixImpl()
}
}
+IProcessor::Status TTLTransform::prepare()
+{
+ auto status = ISimpleTransform::prepare();
+ if (status == Status::Finished)
+ finalize();
+
+ return status;
+}
+
}
diff --git a/src/DataStreams/TTLBlockInputStream.h b/src/DataStreams/TTLBlockInputStream.h
index bf854d9cc9c..986181df652 100644
--- a/src/DataStreams/TTLBlockInputStream.h
+++ b/src/DataStreams/TTLBlockInputStream.h
@@ -1,5 +1,5 @@
#pragma once
-#include
+#include
#include
#include
#include
@@ -12,11 +12,11 @@
namespace DB
{
-class TTLBlockInputStream : public IBlockInputStream
+class TTLTransform : public ISimpleTransform
{
public:
- TTLBlockInputStream(
- const BlockInputStreamPtr & input_,
+ TTLTransform(
+ const Block & header_,
const MergeTreeData & storage_,
const StorageMetadataPtr & metadata_snapshot_,
const MergeTreeData::MutableDataPartPtr & data_part_,
@@ -25,13 +25,14 @@ public:
);
String getName() const override { return "TTL"; }
- Block getHeader() const override { return header; }
+
+ Status prepare() override;
protected:
- Block readImpl() override;
+ void transform(Chunk & chunk) override;
/// Finalizes ttl infos and updates data part
- void readSuffixImpl() override;
+ void finalize();
private:
std::vector algorithms;
@@ -41,7 +42,6 @@ private:
/// ttl_infos and empty_columns are updating while reading
const MergeTreeData::MutableDataPartPtr & data_part;
Poco::Logger * log;
- Block header;
};
}
diff --git a/src/DataStreams/TTLCalcInputStream.cpp b/src/DataStreams/TTLCalcInputStream.cpp
index 2353e9ec259..c156b31428a 100644
--- a/src/DataStreams/TTLCalcInputStream.cpp
+++ b/src/DataStreams/TTLCalcInputStream.cpp
@@ -4,18 +4,17 @@
namespace DB
{
-TTLCalcInputStream::TTLCalcInputStream(
- const BlockInputStreamPtr & input_,
+TTLCalcTransform::TTLCalcTransform(
+ const Block & header_,
const MergeTreeData & storage_,
const StorageMetadataPtr & metadata_snapshot_,
const MergeTreeData::MutableDataPartPtr & data_part_,
time_t current_time_,
bool force_)
- : data_part(data_part_)
+ : ISimpleTransform(header_, header_, true)
+ , data_part(data_part_)
, log(&Poco::Logger::get(storage_.getLogName() + " (TTLCalcInputStream)"))
{
- children.push_back(input_);
- header = children.at(0)->getHeader();
auto old_ttl_infos = data_part->ttl_infos;
if (metadata_snapshot_->hasRowsTTL())
@@ -51,27 +50,36 @@ TTLCalcInputStream::TTLCalcInputStream(
recompression_ttl, TTLUpdateField::RECOMPRESSION_TTL, recompression_ttl.result_column, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
}
-Block TTLCalcInputStream::readImpl()
+void TTLCalcTransform::transform(Chunk & chunk)
{
- auto block = children.at(0)->read();
+ auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns());
for (const auto & algorithm : algorithms)
algorithm->execute(block);
if (!block)
- return block;
+ return;
- Block res;
- for (const auto & col : header)
- res.insert(block.getByName(col.name));
+ Chunk res;
+ for (const auto & col : getOutputPort().getHeader())
+ res.addColumn(block.getByName(col.name).column);
- return res;
+ chunk = std::move(res);
}
-void TTLCalcInputStream::readSuffixImpl()
+void TTLCalcTransform::finalize()
{
data_part->ttl_infos = {};
for (const auto & algorithm : algorithms)
algorithm->finalize(data_part);
}
+IProcessor::Status TTLCalcTransform::prepare()
+{
+ auto status = ISimpleTransform::prepare();
+ if (status == Status::Finished)
+ finalize();
+
+ return status;
+}
+
}
diff --git a/src/DataStreams/TTLCalcInputStream.h b/src/DataStreams/TTLCalcInputStream.h
index 20148eadfc2..d0e7b0055f2 100644
--- a/src/DataStreams/TTLCalcInputStream.h
+++ b/src/DataStreams/TTLCalcInputStream.h
@@ -1,5 +1,5 @@
#pragma once
-#include
+#include
#include
#include
#include
@@ -11,11 +11,11 @@
namespace DB
{
-class TTLCalcInputStream : public IBlockInputStream
+class TTLCalcTransform : public ISimpleTransform
{
public:
- TTLCalcInputStream(
- const BlockInputStreamPtr & input_,
+ TTLCalcTransform(
+ const Block & header_,
const MergeTreeData & storage_,
const StorageMetadataPtr & metadata_snapshot_,
const MergeTreeData::MutableDataPartPtr & data_part_,
@@ -24,13 +24,13 @@ public:
);
String getName() const override { return "TTL_CALC"; }
- Block getHeader() const override { return header; }
+ Status prepare() override;
protected:
- Block readImpl() override;
+ void transform(Chunk & chunk) override;
/// Finalizes ttl infos and updates data part
- void readSuffixImpl() override;
+ void finalize();
private:
std::vector algorithms;
@@ -38,7 +38,6 @@ private:
/// ttl_infos and empty_columns are updating while reading
const MergeTreeData::MutableDataPartPtr & data_part;
Poco::Logger * log;
- Block header;
};
}
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index 540d5c76c97..e5a129cbe12 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -932,7 +932,7 @@ void MutationsInterpreter::validate()
auto pipeline = addStreamsForLaterStages(stages, plan);
}
-BlockInputStreamPtr MutationsInterpreter::execute()
+QueryPipeline MutationsInterpreter::execute()
{
if (!can_execute)
throw Exception("Cannot execute mutations interpreter because can_execute flag set to false", ErrorCodes::LOGICAL_ERROR);
@@ -956,12 +956,11 @@ BlockInputStreamPtr MutationsInterpreter::execute()
}
auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder));
- BlockInputStreamPtr result_stream = std::make_shared(std::move(pipeline));
if (!updated_header)
- updated_header = std::make_unique(result_stream->getHeader());
+ updated_header = std::make_unique(pipeline.getHeader());
- return result_stream;
+ return pipeline;
}
Block MutationsInterpreter::getUpdatedHeader() const
diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h
index b0540f7d2ed..7b0ccb3bae5 100644
--- a/src/Interpreters/MutationsInterpreter.h
+++ b/src/Interpreters/MutationsInterpreter.h
@@ -50,7 +50,7 @@ public:
size_t evaluateCommandsSize();
/// The resulting stream will return blocks containing only changed columns and columns, that we need to recalculate indices.
- BlockInputStreamPtr execute();
+ QueryPipeline execute();
/// Only changed columns.
Block getUpdatedHeader() const;
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index e5fcaae3417..0810d45a805 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -11,6 +11,7 @@
#include "Storages/MergeTree/MergeTreeSequentialSource.h"
#include "Storages/MergeTree/FutureMergedMutatedPart.h"
#include "Processors/Transforms/ExpressionTransform.h"
+#include "Processors/Transforms/MaterializingTransform.h"
#include "Processors/Merges/MergingSortedTransform.h"
#include "Processors/Merges/CollapsingSortedTransform.h"
#include "Processors/Merges/SummingSortedTransform.h"
@@ -236,8 +237,6 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
ctx->compression_codec,
ctx->blocks_are_granules_size);
- global_ctx->merged_stream->readPrefix();
-
global_ctx->rows_written = 0;
ctx->initial_reservation = global_ctx->space_reservation ? global_ctx->space_reservation->getSize() : 0;
@@ -298,14 +297,17 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::execute()
bool MergeTask::ExecuteAndFinalizeHorizontalPart::executeImpl()
{
Block block;
- if (!ctx->is_cancelled() && (block = global_ctx->merged_stream->read()))
+ if (!ctx->is_cancelled() && (global_ctx->merging_executor->pull(block)))
{
global_ctx->rows_written += block.rows();
const_cast(*global_ctx->to).write(block);
- global_ctx->merge_list_element_ptr->rows_written = global_ctx->merged_stream->getProfileInfo().rows;
- global_ctx->merge_list_element_ptr->bytes_written_uncompressed = global_ctx->merged_stream->getProfileInfo().bytes;
+ UInt64 result_rows = 0;
+ UInt64 result_bytes = 0;
+ global_ctx->merged_pipeline.tryGetResultRowsAndBytes(result_rows, result_bytes);
+ global_ctx->merge_list_element_ptr->rows_written = result_rows;
+ global_ctx->merge_list_element_ptr->bytes_written_uncompressed = result_bytes;
/// Reservation updates is not performed yet, during the merge it may lead to higher free space requirements
if (global_ctx->space_reservation && ctx->sum_input_rows_upper_bound)
@@ -323,8 +325,8 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::executeImpl()
return true;
}
- global_ctx->merged_stream->readSuffix();
- global_ctx->merged_stream.reset();
+ global_ctx->merging_executor.reset();
+ global_ctx->merged_pipeline.reset();
if (global_ctx->merges_blocker->isCancelled())
throw Exception("Cancelled merging parts", ErrorCodes::ABORTED);
@@ -799,26 +801,25 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
auto res_pipe = Pipe::unitePipes(std::move(pipes));
res_pipe.addTransform(std::move(merged_transform));
- QueryPipeline pipeline(std::move(res_pipe));
- pipeline.setNumThreads(1);
-
- global_ctx->merged_stream = std::make_shared(std::move(pipeline));
if (global_ctx->deduplicate)
- global_ctx->merged_stream = std::make_shared(
- global_ctx->merged_stream, sort_description, SizeLimits(), 0 /*limit_hint*/, global_ctx->deduplicate_by_columns);
+ res_pipe.addTransform(std::make_shared(
+ res_pipe.getHeader(), sort_description, SizeLimits(), 0 /*limit_hint*/, global_ctx->deduplicate_by_columns));
if (ctx->need_remove_expired_values)
- global_ctx->merged_stream = std::make_shared(
- global_ctx->merged_stream, *global_ctx->data, global_ctx->metadata_snapshot, global_ctx->new_data_part, global_ctx->time_of_merge, ctx->force_ttl);
+ res_pipe.addTransform(std::make_shared(
+ res_pipe.getHeader(), *global_ctx->data, global_ctx->metadata_snapshot, global_ctx->new_data_part, global_ctx->time_of_merge, ctx->force_ttl));
if (global_ctx->metadata_snapshot->hasSecondaryIndices())
{
const auto & indices = global_ctx->metadata_snapshot->getSecondaryIndices();
- global_ctx->merged_stream = std::make_shared(
- global_ctx->merged_stream, indices.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), global_ctx->data->getContext()));
- global_ctx->merged_stream = std::make_shared(global_ctx->merged_stream);
+ res_pipe.addTransform(std::make_shared(
+ res_pipe.getHeader(), indices.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), global_ctx->data->getContext())));
+ res_pipe.addTransform(std::make_shared(res_pipe.getHeader()));
}
+
+ global_ctx->merged_pipeline = QueryPipeline(std::move(res_pipe));
+ global_ctx->merging_executor = std::make_unique(global_ctx->merged_pipeline);
}
diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h
index ce628d831ae..aceca912cea 100644
--- a/src/Storages/MergeTree/MergeTask.h
+++ b/src/Storages/MergeTree/MergeTask.h
@@ -148,7 +148,8 @@ private:
std::unique_ptr column_progress{nullptr};
std::shared_ptr to{nullptr};
- BlockInputStreamPtr merged_stream{nullptr};
+ QueryPipeline merged_pipeline;
+ std::unique_ptr merging_executor;
SyncGuardPtr sync_guard{nullptr};
MergeTreeData::MutableDataPartPtr new_data_part{nullptr};
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 746f0c879d3..e38342e21dd 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -11,6 +11,9 @@
#include
#include
#include
+#include
+#include
+#include
#include
#include
#include
@@ -182,7 +185,7 @@ static std::vector getProjectionsForNewDataPart(
/// Return set of indices which should be recalculated during mutation also
/// wraps input stream into additional expression stream
static std::set getIndicesToRecalculate(
- BlockInputStreamPtr & input_stream,
+ QueryPipeline & pipeline,
const NameSet & updated_columns,
const StorageMetadataPtr & metadata_snapshot,
ContextPtr context,
@@ -234,9 +237,9 @@ static std::set getIndicesToRecalculate(
}
}
- if (!indices_to_recalc.empty() && input_stream)
+ if (!indices_to_recalc.empty() && pipeline.initialized())
{
- auto indices_recalc_syntax = TreeRewriter(context).analyze(indices_recalc_expr_list, input_stream->getHeader().getNamesAndTypesList());
+ auto indices_recalc_syntax = TreeRewriter(context).analyze(indices_recalc_expr_list, pipeline.getHeader().getNamesAndTypesList());
auto indices_recalc_expr = ExpressionAnalyzer(
indices_recalc_expr_list,
indices_recalc_syntax, context).getActions(false);
@@ -246,8 +249,11 @@ static std::set getIndicesToRecalculate(
/// MutationsInterpreter which knows about skip indices and stream 'in' already has
/// all required columns.
/// TODO move this logic to single place.
- input_stream = std::make_shared(
- std::make_shared(input_stream, indices_recalc_expr));
+ QueryPipelineBuilder builder;
+ builder.init(std::move(pipeline));
+ builder.addTransform(std::make_shared(builder.getHeader(), indices_recalc_expr));
+ builder.addTransform(std::make_shared(builder.getHeader()));
+ pipeline = QueryPipelineBuilder::getPipeline(std::move(builder));
}
return indices_to_recalc;
}
@@ -500,7 +506,8 @@ struct MutationContext
std::unique_ptr num_mutations;
- BlockInputStreamPtr mutating_stream{nullptr}; // in
+ QueryPipeline mutating_pipeline; // in
+ std::unique_ptr mutating_executor;
Block updated_header;
std::unique_ptr interpreter;
@@ -795,24 +802,25 @@ void PartMergerWriter::prepare()
bool PartMergerWriter::mutateOriginalPartAndPrepareProjections()
{
- if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && (block = ctx->mutating_stream->read()))
+ Block cur_block;
+ if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block))
{
if (ctx->minmax_idx)
- ctx->minmax_idx->update(block, ctx->data->getMinMaxColumnsNames(ctx->metadata_snapshot->getPartitionKey()));
+ ctx->minmax_idx->update(cur_block, ctx->data->getMinMaxColumnsNames(ctx->metadata_snapshot->getPartitionKey()));
- ctx->out->write(block);
+ ctx->out->write(cur_block);
for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i)
{
const auto & projection = *ctx->projections_to_build[i];
- auto projection_block = projection_squashes[i].add(projection.calculate(block, ctx->context));
+ auto projection_block = projection_squashes[i].add(projection.calculate(cur_block, ctx->context));
if (projection_block)
projection_parts[projection.name].emplace_back(MergeTreeDataWriter::writeTempProjectionPart(
*ctx->data, ctx->log, projection_block, projection, ctx->new_data_part.get(), ++block_num));
}
- (*ctx->mutate_entry)->rows_written += block.rows();
- (*ctx->mutate_entry)->bytes_written_uncompressed += block.bytes();
+ (*ctx->mutate_entry)->rows_written += cur_block.rows();
+ (*ctx->mutate_entry)->bytes_written_uncompressed += cur_block.bytes();
/// Need execute again
return true;
@@ -937,18 +945,25 @@ private:
auto skip_part_indices = MutationHelpers::getIndicesForNewDataPart(ctx->metadata_snapshot->getSecondaryIndices(), ctx->for_file_renames);
ctx->projections_to_build = MutationHelpers::getProjectionsForNewDataPart(ctx->metadata_snapshot->getProjections(), ctx->for_file_renames);
- if (ctx->mutating_stream == nullptr)
+ if (!ctx->mutating_pipeline.initialized())
throw Exception("Cannot mutate part columns with uninitialized mutations stream. It's a bug", ErrorCodes::LOGICAL_ERROR);
+ QueryPipelineBuilder builder;
+ builder.init(std::move(ctx->mutating_pipeline));
+
if (ctx->metadata_snapshot->hasPrimaryKey() || ctx->metadata_snapshot->hasSecondaryIndices())
- ctx->mutating_stream = std::make_shared(
- std::make_shared(ctx->mutating_stream, ctx->data->getPrimaryKeyAndSkipIndicesExpression(ctx->metadata_snapshot)));
+ {
+ builder.addTransform(
+ std::make_shared(builder.getHeader(), ctx->data->getPrimaryKeyAndSkipIndicesExpression(ctx->metadata_snapshot)));
+
+ builder.addTransform(std::make_shared(builder.getHeader()));
+ }
if (ctx->execute_ttl_type == ExecuteTTLType::NORMAL)
- ctx->mutating_stream = std::make_shared(ctx->mutating_stream, *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true);
+ builder.addTransform(std::make_shared(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true));
if (ctx->execute_ttl_type == ExecuteTTLType::RECALCULATE)
- ctx->mutating_stream = std::make_shared(ctx->mutating_stream, *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true);
+ builder.addTransform(std::make_shared(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true));
ctx->minmax_idx = std::make_shared();
@@ -959,7 +974,8 @@ private:
skip_part_indices,
ctx->compression_codec);
- ctx->mutating_stream->readPrefix();
+ ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder));
+ ctx->mutating_executor = std::make_unique(ctx->mutating_pipeline);
part_merger_writer_task = std::make_unique(ctx);
}
@@ -968,7 +984,8 @@ private:
void finalize()
{
ctx->new_data_part->minmax_idx = std::move(ctx->minmax_idx);
- ctx->mutating_stream->readSuffix();
+ ctx->mutating_executor.reset();
+ ctx->mutating_pipeline.reset();
static_pointer_cast(ctx->out)->writeSuffixAndFinalizePart(ctx->new_data_part, ctx->need_sync);
}
@@ -1087,16 +1104,16 @@ private:
ctx->compression_codec = ctx->source_part->default_codec;
- if (ctx->mutating_stream)
+ if (ctx->mutating_pipeline.initialized())
{
- if (ctx->mutating_stream == nullptr)
- throw Exception("Cannot mutate part columns with uninitialized mutations stream. It's a bug", ErrorCodes::LOGICAL_ERROR);
+ QueryPipelineBuilder builder;
+ builder.init(std::move(ctx->mutating_pipeline));
if (ctx->execute_ttl_type == ExecuteTTLType::NORMAL)
- ctx->mutating_stream = std::make_shared(ctx->mutating_stream, *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true);
+ builder.addTransform(std::make_shared(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true));
if (ctx->execute_ttl_type == ExecuteTTLType::RECALCULATE)
- ctx->mutating_stream = std::make_shared(ctx->mutating_stream, *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true);
+ builder.addTransform(std::make_shared(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true));
ctx->out = std::make_shared(
ctx->new_data_part,
@@ -1109,7 +1126,9 @@ private:
&ctx->source_part->index_granularity_info
);
- ctx->mutating_stream->readPrefix();
+ ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder));
+ ctx->mutating_executor = std::make_unique(ctx->mutating_pipeline);
+
ctx->projections_to_build = std::vector{ctx->projections_to_recalc.begin(), ctx->projections_to_recalc.end()};
part_merger_writer_task = std::make_unique(ctx);
@@ -1119,9 +1138,10 @@ private:
void finalize()
{
- if (ctx->mutating_stream)
+ if (ctx->mutating_executor)
{
- ctx->mutating_stream->readSuffix();
+ ctx->mutating_executor.reset();
+ ctx->mutating_pipeline.reset();
auto changed_checksums =
static_pointer_cast(ctx->out)->writeSuffixAndGetChecksums(
@@ -1267,9 +1287,9 @@ bool MutateTask::prepare()
ctx->materialized_indices = ctx->interpreter->grabMaterializedIndices();
ctx->materialized_projections = ctx->interpreter->grabMaterializedProjections();
ctx->mutation_kind = ctx->interpreter->getMutationKind();
- ctx->mutating_stream = ctx->interpreter->execute();
+ ctx->mutating_pipeline = ctx->interpreter->execute();
ctx->updated_header = ctx->interpreter->getUpdatedHeader();
- ctx->mutating_stream->setProgressCallback(MergeProgressCallback((*ctx->mutate_entry)->ptr(), ctx->watch_prev_elapsed, *ctx->stage_progress));
+ ctx->mutating_pipeline.setProgressCallback(MergeProgressCallback((*ctx->mutate_entry)->ptr(), ctx->watch_prev_elapsed, *ctx->stage_progress));
}
ctx->single_disk_volume = std::make_shared("volume_" + ctx->future_part->name, ctx->space_reservation->getDisk(), 0);
@@ -1299,7 +1319,7 @@ bool MutateTask::prepare()
ctx->need_sync = needSyncPart(ctx->source_part->rows_count, ctx->source_part->getBytesOnDisk(), *data_settings);
ctx->execute_ttl_type = ExecuteTTLType::NONE;
- if (ctx->mutating_stream)
+ if (ctx->mutating_pipeline.initialized())
ctx->execute_ttl_type = MergeTreeDataMergerMutator::shouldExecuteTTL(ctx->metadata_snapshot, ctx->interpreter->getColumnDependencies());
@@ -1318,7 +1338,7 @@ bool MutateTask::prepare()
ctx->updated_columns.emplace(name_type.name);
ctx->indices_to_recalc = MutationHelpers::getIndicesToRecalculate(
- ctx->mutating_stream, ctx->updated_columns, ctx->metadata_snapshot, ctx->context, ctx->materialized_indices, ctx->source_part);
+ ctx->mutating_pipeline, ctx->updated_columns, ctx->metadata_snapshot, ctx->context, ctx->materialized_indices, ctx->source_part);
ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate(
ctx->updated_columns, ctx->metadata_snapshot, ctx->materialized_projections, ctx->source_part);
diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp
index e45183591f2..2acdba18c2d 100644
--- a/src/Storages/StorageJoin.cpp
+++ b/src/Storages/StorageJoin.cpp
@@ -18,6 +18,7 @@
#include
#include
#include
+#include
#include /// toLower
@@ -114,17 +115,16 @@ void StorageJoin::mutate(const MutationCommands & commands, ContextPtr context)
{
auto storage_ptr = DatabaseCatalog::instance().getTable(getStorageID(), context);
auto interpreter = std::make_unique(storage_ptr, metadata_snapshot, commands, context, true);
- auto in = interpreter->execute();
- in->readPrefix();
+ auto pipeline = interpreter->execute();
+ PullingPipelineExecutor executor(pipeline);
- while (const Block & block = in->read())
+ Block block;
+ while (executor.pull(block))
{
new_data->addJoinedBlock(block, true);
if (persistent)
backup_stream.write(block);
}
-
- in->readSuffix();
}
/// Now acquire exclusive lock and modify storage.
diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp
index 3fe6083ab13..299e39a3836 100644
--- a/src/Storages/StorageMemory.cpp
+++ b/src/Storages/StorageMemory.cpp
@@ -10,6 +10,7 @@
#include
#include
#include
+#include
namespace DB
@@ -263,11 +264,12 @@ void StorageMemory::mutate(const MutationCommands & commands, ContextPtr context
new_context->setSetting("max_threads", 1);
auto interpreter = std::make_unique(storage_ptr, metadata_snapshot, commands, new_context, true);
- auto in = interpreter->execute();
+ auto pipeline = interpreter->execute();
+ PullingPipelineExecutor executor(pipeline);
- in->readPrefix();
Blocks out;
- while (Block block = in->read())
+ Block block;
+ while (executor.pull(block))
{
if (compress)
for (auto & elem : block)
@@ -275,7 +277,6 @@ void StorageMemory::mutate(const MutationCommands & commands, ContextPtr context
out.push_back(block);
}
- in->readSuffix();
std::unique_ptr new_data;
From 2bfcdc431b4f15d6f3610745db164b3296ecb8e8 Mon Sep 17 00:00:00 2001
From: Tatiana Kirillova
Date: Wed, 6 Oct 2021 21:42:07 +0300
Subject: [PATCH 035/104] translate
---
docs/en/operations/settings/settings.md | 4 +--
.../statements/select/group-by.md | 2 ++
.../statements/select/limit-by.md | 3 ++
.../statements/select/order-by.md | 4 ++-
docs/ru/operations/settings/settings.md | 36 +++++++++++++++++++
.../statements/select/group-by.md | 2 ++
.../sql-reference/statements/select/index.md | 3 +-
.../statements/select/limit-by.md | 2 ++
.../statements/select/order-by.md | 4 ++-
9 files changed, 54 insertions(+), 6 deletions(-)
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index fa4b96b8fbc..7868dfaaf79 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3756,8 +3756,8 @@ Enables or disables supporting positional arguments for [GROUP BY](../../sql-ref
Possible values:
-- 0 — Disabled.
-- 1 — Enabled.
+- 1 — Column numbers can use instead of column names.
+- 0 — Positional arguments aren't supported.
Default value: `0`.
diff --git a/docs/en/sql-reference/statements/select/group-by.md b/docs/en/sql-reference/statements/select/group-by.md
index 7c2d3a20f43..26dd51d806d 100644
--- a/docs/en/sql-reference/statements/select/group-by.md
+++ b/docs/en/sql-reference/statements/select/group-by.md
@@ -10,6 +10,8 @@ toc_title: GROUP BY
- All the expressions in the [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having.md), and [ORDER BY](../../../sql-reference/statements/select/order-by.md) clauses **must** be calculated based on key expressions **or** on [aggregate functions](../../../sql-reference/aggregate-functions/index.md) over non-key expressions (including plain columns). In other words, each column selected from the table must be used either in a key expression or inside an aggregate function, but not both.
- Result of aggregating `SELECT` query will contain as many rows as there were unique values of “grouping key” in source table. Usually this signficantly reduces the row count, often by orders of magnitude, but not necessarily: row count stays the same if all “grouping key” values were distinct.
+When you want to group data in the table by column numbers instead of column names, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments).
+
!!! note "Note"
There’s an additional way to run aggregation over a table. If a query contains table columns only inside aggregate functions, the `GROUP BY clause` can be omitted, and aggregation by an empty set of keys is assumed. Such queries always return exactly one row.
diff --git a/docs/en/sql-reference/statements/select/limit-by.md b/docs/en/sql-reference/statements/select/limit-by.md
index 34645b68b03..e13c46e0c29 100644
--- a/docs/en/sql-reference/statements/select/limit-by.md
+++ b/docs/en/sql-reference/statements/select/limit-by.md
@@ -16,6 +16,9 @@ During query processing, ClickHouse selects data ordered by sorting key. The sor
!!! note "Note"
`LIMIT BY` is not related to [LIMIT](../../../sql-reference/statements/select/limit.md). They can both be used in the same query.
+If you want to use column numbers instead of column names in the `LIMIT BY` clause, enable setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments).
+
+
## Examples {#examples}
Sample table:
diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md
index 030f04d5e83..8a281a8c16f 100644
--- a/docs/en/sql-reference/statements/select/order-by.md
+++ b/docs/en/sql-reference/statements/select/order-by.md
@@ -4,7 +4,9 @@ toc_title: ORDER BY
# ORDER BY Clause {#select-order-by}
-The `ORDER BY` clause contains a list of expressions, which can each be attributed with `DESC` (descending) or `ASC` (ascending) modifier which determine the sorting direction. If the direction is not specified, `ASC` is assumed, so it’s usually omitted. The sorting direction applies to a single expression, not to the entire list. Example: `ORDER BY Visits DESC, SearchPhrase`
+The `ORDER BY` clause contains a list of expressions, which can each be attributed with `DESC` (descending) or `ASC` (ascending) modifier which determine the sorting direction. If the direction is not specified, `ASC` is assumed, so it’s usually omitted. The sorting direction applies to a single expression, not to the entire list. Example: `ORDER BY Visits DESC, SearchPhrase`.
+
+If you want to sort by column numbers instead of column names, set `enable_positional_arguments = 1`.
Rows that have identical values for the list of sorting expressions are output in an arbitrary order, which can also be non-deterministic (different each time).
If the ORDER BY clause is omitted, the order of the rows is also undefined, and may be non-deterministic as well.
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index e639c0a0df2..430a26f71d1 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -3538,3 +3538,39 @@ Exception: Total regexp lengths too large.
**См. также**
- [max_hyperscan_regexp_length](#max-hyperscan-regexp-length)
+
+## enable_positional_arguments {#enable-positional-arguments}
+
+Включает и отключает поддержку позиционных аргументов для [GROUP BY](../../sql-reference/statements/select/group-by.md), [LIMIT BY](../../sql-reference/statements/select/limit-by.md), [ORDER BY](../../sql-reference/statements/select/order-by.md). Если вы хотите использовать номера столбцов вместо названий в выражениях этих операторов, установите `enable_positional_arguments = 1`.
+
+Возможные значения:
+
+- 1 — Можно использовать номера столбцов вместо названий столбцов.
+- 0 — Позиционные аргументы не поддерживаются.
+
+Значение по умолчанию: `0`.
+
+**Пример**
+
+Запрос:
+
+```sql
+CREATE TABLE positional_arguments(one Int, two Int, three Int) ENGINE=Memory();
+
+INSERT INTO positional_arguments VALUES (10, 20, 30), (20, 20, 10), (30, 10, 20);
+
+SET enable_positional_arguments = 1;
+
+SELECT * FROM positional_arguments ORDER BY 2,3;
+```
+
+Результат:
+
+```text
+┌─one─┬─two─┬─three─┐
+│ 30 │ 10 │ 20 │
+│ 20 │ 20 │ 10 │
+│ 10 │ 20 │ 30 │
+└─────┴─────┴───────┘
+
+```
\ No newline at end of file
diff --git a/docs/ru/sql-reference/statements/select/group-by.md b/docs/ru/sql-reference/statements/select/group-by.md
index 2f0cabd14fb..ac06636e79f 100644
--- a/docs/ru/sql-reference/statements/select/group-by.md
+++ b/docs/ru/sql-reference/statements/select/group-by.md
@@ -10,6 +10,8 @@ toc_title: GROUP BY
- Все выражения в секциях [SELECT](index.md), [HAVING](having.md), и [ORDER BY](order-by.md) статьи **должны** быть вычисленными на основе ключевых выражений **или** на [агрегатных функций](../../../sql-reference/aggregate-functions/index.md) над неключевыми выражениями (включая столбцы). Другими словами, каждый столбец, выбранный из таблицы, должен использоваться либо в ключевом выражении, либо внутри агрегатной функции, но не в обоих.
- В результате агрегирования `SELECT` запрос будет содержать столько строк, сколько было уникальных значений ключа группировки в исходной таблице. Обычно агрегация значительно уменьшает количество строк, часто на порядки, но не обязательно: количество строк остается неизменным, если все исходные значения ключа группировки ценности были различны.
+Если вы хотите группировать данные в таблице по номерам столбцов, а не по названиям, включите настройку [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments).
+
!!! note "Примечание"
Есть ещё один способ запустить агрегацию по таблице. Если запрос содержит столбцы исходной таблицы только внутри агрегатных функций, то `GROUP BY` секцию можно опустить, и предполагается агрегирование по пустому набору ключей. Такие запросы всегда возвращают ровно одну строку.
diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md
index c2820bc7be4..f4e8a3a24b0 100644
--- a/docs/ru/sql-reference/statements/select/index.md
+++ b/docs/ru/sql-reference/statements/select/index.md
@@ -140,8 +140,7 @@ Code: 42. DB::Exception: Received from localhost:9000. DB::Exception: Number of
Вы можете использовать синонимы (алиасы `AS`) в любом месте запроса.
-В секциях `GROUP BY`, `ORDER BY`, в отличие от диалекта MySQL, и в соответствии со стандартным SQL, не поддерживаются позиционные аргументы.
-Например, если вы напишите `GROUP BY 1, 2` - то это будет воспринято, как группировка по константам (то есть, агрегация всех строк в одну).
+В секциях `GROUP BY`, `ORDER BY` и `LIMIT BY` поддерживаются позиционные аргументы. Например, если вы напишите `ORDER BY 1,2` - будет выполнена сортировка сначала по первому, а затем по второму столбцу.
## Детали реализации {#implementation-details}
diff --git a/docs/ru/sql-reference/statements/select/limit-by.md b/docs/ru/sql-reference/statements/select/limit-by.md
index fba81c023b5..861d88dcafb 100644
--- a/docs/ru/sql-reference/statements/select/limit-by.md
+++ b/docs/ru/sql-reference/statements/select/limit-by.md
@@ -15,6 +15,8 @@ ClickHouse поддерживает следующий синтаксис:
`LIMIT BY` не связана с секцией `LIMIT`. Их можно использовать в одном запросе.
+Если вы хотите использовать в секции `LIMIT BY` номера столбцов вместо названий, включите настройку [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments).
+
## Примеры
Образец таблицы:
diff --git a/docs/ru/sql-reference/statements/select/order-by.md b/docs/ru/sql-reference/statements/select/order-by.md
index d7d2e9c7574..fca5653d0bc 100644
--- a/docs/ru/sql-reference/statements/select/order-by.md
+++ b/docs/ru/sql-reference/statements/select/order-by.md
@@ -4,7 +4,9 @@ toc_title: ORDER BY
# Секция ORDER BY {#select-order-by}
-Секция `ORDER BY` содержит список выражений, к каждому из которых также может быть приписано `DESC` или `ASC` (направление сортировки). Если ничего не приписано - это аналогично приписыванию `ASC`. `ASC` - сортировка по возрастанию, `DESC` - сортировка по убыванию. Обозначение направления сортировки действует на одно выражение, а не на весь список. Пример: `ORDER BY Visits DESC, SearchPhrase`
+Секция `ORDER BY` содержит список выражений, к каждому из которых также может быть приписано `DESC` или `ASC` (направление сортировки). Если ничего не приписано - это аналогично приписыванию `ASC`. `ASC` - сортировка по возрастанию, `DESC` - сортировка по убыванию. Обозначение направления сортировки действует на одно выражение, а не на весь список. Пример: `ORDER BY Visits DESC, SearchPhrase`.
+
+Если вы хотите сортировать данные по номерам столбцов, а не по названиям, установите настройку `enable_positional_arguments = 1`.
Строки, для которых список выражений, по которым производится сортировка, принимает одинаковые значения, выводятся в произвольном порядке, который может быть также недетерминированным (каждый раз разным).
Если секция ORDER BY отсутствует, то, аналогично, порядок, в котором идут строки, не определён, и может быть недетерминированным.
From 88c4a9030cd47f3e595bc6b4c2c33655a1206eb0 Mon Sep 17 00:00:00 2001
From: Tatiana Kirillova
Date: Thu, 7 Oct 2021 08:25:44 +0300
Subject: [PATCH 036/104] Update docs/en/operations/settings/settings.md
Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com>
---
docs/en/operations/settings/settings.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 7868dfaaf79..ff0ce28a5ba 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3756,8 +3756,8 @@ Enables or disables supporting positional arguments for [GROUP BY](../../sql-ref
Possible values:
-- 1 — Column numbers can use instead of column names.
- 0 — Positional arguments aren't supported.
+- 1 — Positional arguments are supported: column numbers can use instead of column names.
Default value: `0`.
From 21197e1820f9c9e7b43b606dc8e4c0fc7f0a4d7d Mon Sep 17 00:00:00 2001
From: Tatiana Kirillova
Date: Thu, 7 Oct 2021 08:26:07 +0300
Subject: [PATCH 037/104] Update docs/en/operations/settings/settings.md
Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com>
---
docs/en/operations/settings/settings.md | 1 -
1 file changed, 1 deletion(-)
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index ff0ce28a5ba..f78fbc8a2bc 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3783,5 +3783,4 @@ Result:
│ 20 │ 20 │ 10 │
│ 10 │ 20 │ 30 │
└─────┴─────┴───────┘
-
```
\ No newline at end of file
From c70ce36ff40a75b031922f0d06e05194bd4587f5 Mon Sep 17 00:00:00 2001
From: Tatiana Kirillova
Date: Thu, 7 Oct 2021 08:27:00 +0300
Subject: [PATCH 038/104] Update
docs/en/sql-reference/statements/select/index.md
Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com>
---
docs/en/sql-reference/statements/select/index.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md
index e31ba6b1f4b..0427764475a 100644
--- a/docs/en/sql-reference/statements/select/index.md
+++ b/docs/en/sql-reference/statements/select/index.md
@@ -144,7 +144,7 @@ Extreme values are calculated for rows before `LIMIT`, but after `LIMIT BY`. How
You can use synonyms (`AS` aliases) in any part of a query.
-The `GROUP BY`, `ORDER BY`, and `LIMIT BY` clauses support positional arguments. For example, `ORDER BY 1,2` will be sorting rows in the table on the first and then the second column.
+The `GROUP BY`, `ORDER BY`, and `LIMIT BY` clauses can support positional arguments. To enable this, switch on the [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments) setting. Then, for example, `ORDER BY 1,2` will be sorting rows in the table on the first and then the second column.
## Implementation Details {#implementation-details}
From 0ed02e689dc939939d95b2adccb3a0be8028237c Mon Sep 17 00:00:00 2001
From: Tatiana Kirillova
Date: Thu, 7 Oct 2021 08:27:17 +0300
Subject: [PATCH 039/104] Update
docs/en/sql-reference/statements/select/limit-by.md
Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com>
---
docs/en/sql-reference/statements/select/limit-by.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/en/sql-reference/statements/select/limit-by.md b/docs/en/sql-reference/statements/select/limit-by.md
index e13c46e0c29..e1ca58cdec8 100644
--- a/docs/en/sql-reference/statements/select/limit-by.md
+++ b/docs/en/sql-reference/statements/select/limit-by.md
@@ -16,7 +16,7 @@ During query processing, ClickHouse selects data ordered by sorting key. The sor
!!! note "Note"
`LIMIT BY` is not related to [LIMIT](../../../sql-reference/statements/select/limit.md). They can both be used in the same query.
-If you want to use column numbers instead of column names in the `LIMIT BY` clause, enable setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments).
+If you want to use column numbers instead of column names in the `LIMIT BY` clause, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments).
## Examples {#examples}
From 5aef3e761792a2bf4d60e6ad81d6d8a26e8f7f49 Mon Sep 17 00:00:00 2001
From: Tatiana Kirillova
Date: Thu, 7 Oct 2021 08:27:30 +0300
Subject: [PATCH 040/104] Update docs/ru/operations/settings/settings.md
Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com>
---
docs/ru/operations/settings/settings.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index 430a26f71d1..50376b64e5e 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -3545,8 +3545,8 @@ Exception: Total regexp lengths too large.
Возможные значения:
-- 1 — Можно использовать номера столбцов вместо названий столбцов.
- 0 — Позиционные аргументы не поддерживаются.
+- 1 — Позиционные аргументы поддерживаются: можно использовать номера столбцов вместо названий столбцов.
Значение по умолчанию: `0`.
From f3e0edd9a98cc97f332d1a51a32699c6bd55348d Mon Sep 17 00:00:00 2001
From: Tatiana Kirillova
Date: Thu, 7 Oct 2021 08:27:40 +0300
Subject: [PATCH 041/104] Update docs/ru/operations/settings/settings.md
Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com>
---
docs/ru/operations/settings/settings.md | 1 -
1 file changed, 1 deletion(-)
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index 50376b64e5e..500485aea2f 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -3572,5 +3572,4 @@ SELECT * FROM positional_arguments ORDER BY 2,3;
│ 20 │ 20 │ 10 │
│ 10 │ 20 │ 30 │
└─────┴─────┴───────┘
-
```
\ No newline at end of file
From 6b927f1bda3ebefe819ccb9db4b119e0b60065d6 Mon Sep 17 00:00:00 2001
From: Tatiana Kirillova
Date: Thu, 7 Oct 2021 08:28:10 +0300
Subject: [PATCH 042/104] Update
docs/ru/sql-reference/statements/select/group-by.md
Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com>
---
docs/ru/sql-reference/statements/select/group-by.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/ru/sql-reference/statements/select/group-by.md b/docs/ru/sql-reference/statements/select/group-by.md
index ac06636e79f..8bc1b765ad3 100644
--- a/docs/ru/sql-reference/statements/select/group-by.md
+++ b/docs/ru/sql-reference/statements/select/group-by.md
@@ -10,7 +10,7 @@ toc_title: GROUP BY
- Все выражения в секциях [SELECT](index.md), [HAVING](having.md), и [ORDER BY](order-by.md) статьи **должны** быть вычисленными на основе ключевых выражений **или** на [агрегатных функций](../../../sql-reference/aggregate-functions/index.md) над неключевыми выражениями (включая столбцы). Другими словами, каждый столбец, выбранный из таблицы, должен использоваться либо в ключевом выражении, либо внутри агрегатной функции, но не в обоих.
- В результате агрегирования `SELECT` запрос будет содержать столько строк, сколько было уникальных значений ключа группировки в исходной таблице. Обычно агрегация значительно уменьшает количество строк, часто на порядки, но не обязательно: количество строк остается неизменным, если все исходные значения ключа группировки ценности были различны.
-Если вы хотите группировать данные в таблице по номерам столбцов, а не по названиям, включите настройку [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments).
+Если вы хотите для группировки данных в таблице указывать номера столбцов, а не названия, включите настройку [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments).
!!! note "Примечание"
Есть ещё один способ запустить агрегацию по таблице. Если запрос содержит столбцы исходной таблицы только внутри агрегатных функций, то `GROUP BY` секцию можно опустить, и предполагается агрегирование по пустому набору ключей. Такие запросы всегда возвращают ровно одну строку.
From fc627bc096fc33ec1c5aae65e20be965136fa51d Mon Sep 17 00:00:00 2001
From: Tatiana Kirillova
Date: Thu, 7 Oct 2021 08:28:37 +0300
Subject: [PATCH 043/104] Update
docs/ru/sql-reference/statements/select/order-by.md
Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com>
---
docs/ru/sql-reference/statements/select/order-by.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/ru/sql-reference/statements/select/order-by.md b/docs/ru/sql-reference/statements/select/order-by.md
index fca5653d0bc..190a46dacc9 100644
--- a/docs/ru/sql-reference/statements/select/order-by.md
+++ b/docs/ru/sql-reference/statements/select/order-by.md
@@ -6,7 +6,7 @@ toc_title: ORDER BY
Секция `ORDER BY` содержит список выражений, к каждому из которых также может быть приписано `DESC` или `ASC` (направление сортировки). Если ничего не приписано - это аналогично приписыванию `ASC`. `ASC` - сортировка по возрастанию, `DESC` - сортировка по убыванию. Обозначение направления сортировки действует на одно выражение, а не на весь список. Пример: `ORDER BY Visits DESC, SearchPhrase`.
-Если вы хотите сортировать данные по номерам столбцов, а не по названиям, установите настройку `enable_positional_arguments = 1`.
+Если вы хотите для сортировки данных указывать номера столбцов, а не названия, включите настройку [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments).
Строки, для которых список выражений, по которым производится сортировка, принимает одинаковые значения, выводятся в произвольном порядке, который может быть также недетерминированным (каждый раз разным).
Если секция ORDER BY отсутствует, то, аналогично, порядок, в котором идут строки, не определён, и может быть недетерминированным.
From a96cae23244cccb77b3bf012aac9239221f5dd3f Mon Sep 17 00:00:00 2001
From: Tatiana Kirillova
Date: Thu, 7 Oct 2021 08:28:57 +0300
Subject: [PATCH 044/104] Update
docs/ru/sql-reference/statements/select/index.md
Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com>
---
docs/ru/sql-reference/statements/select/index.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md
index f4e8a3a24b0..ffaae74b1d9 100644
--- a/docs/ru/sql-reference/statements/select/index.md
+++ b/docs/ru/sql-reference/statements/select/index.md
@@ -140,7 +140,7 @@ Code: 42. DB::Exception: Received from localhost:9000. DB::Exception: Number of
Вы можете использовать синонимы (алиасы `AS`) в любом месте запроса.
-В секциях `GROUP BY`, `ORDER BY` и `LIMIT BY` поддерживаются позиционные аргументы. Например, если вы напишите `ORDER BY 1,2` - будет выполнена сортировка сначала по первому, а затем по второму столбцу.
+В секциях `GROUP BY`, `ORDER BY` и `LIMIT BY` можно использовать не названия столбцов, а номера. Для этого нужно включить настройку [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments). Тогда, например, в запросе с `ORDER BY 1,2` будет выполнена сортировка сначала по первому, а затем по второму столбцу.
## Детали реализации {#implementation-details}
From 0362083843ed014f0ba58a026d60875fcf80bf07 Mon Sep 17 00:00:00 2001
From: Tatiana Kirillova
Date: Thu, 7 Oct 2021 08:29:28 +0300
Subject: [PATCH 045/104] Update
docs/en/sql-reference/statements/select/order-by.md
Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com>
---
docs/en/sql-reference/statements/select/order-by.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md
index 8a281a8c16f..ee6893812cc 100644
--- a/docs/en/sql-reference/statements/select/order-by.md
+++ b/docs/en/sql-reference/statements/select/order-by.md
@@ -6,7 +6,7 @@ toc_title: ORDER BY
The `ORDER BY` clause contains a list of expressions, which can each be attributed with `DESC` (descending) or `ASC` (ascending) modifier which determine the sorting direction. If the direction is not specified, `ASC` is assumed, so it’s usually omitted. The sorting direction applies to a single expression, not to the entire list. Example: `ORDER BY Visits DESC, SearchPhrase`.
-If you want to sort by column numbers instead of column names, set `enable_positional_arguments = 1`.
+If you want to sort by column numbers instead of column names, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments).
Rows that have identical values for the list of sorting expressions are output in an arbitrary order, which can also be non-deterministic (different each time).
If the ORDER BY clause is omitted, the order of the rows is also undefined, and may be non-deterministic as well.
From 71cffbf521324ff2613d0e6b3fc2b6e4642fdc45 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin
Date: Thu, 7 Oct 2021 10:51:03 +0300
Subject: [PATCH 046/104] Make memory_profiler_step API cleaner
Right now to configure memory_profiler_step/total_memory_profiler_step
you need to call:
MemoryTracker::setOrRaiseProfilerLimit()
MemoryTracker::setProfilerStep()
But it is easy to forget about setOrRaiseProfilerLimit(), since there is
no even any comments about this.
So instead, make setOrRaiseProfilerLimit() private and call it from
setProfilerStep()
---
programs/server/Server.cpp | 1 -
src/Common/MemoryTracker.h | 4 +++-
src/Interpreters/ProcessList.cpp | 1 -
src/Storages/MergeTree/MergeList.cpp | 1 -
4 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index cd5d72cfba4..4ed5b114082 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1159,7 +1159,6 @@ if (ThreadFuzzer::instance().isEffective())
UInt64 total_memory_profiler_step = config().getUInt64("total_memory_profiler_step", 0);
if (total_memory_profiler_step)
{
- total_memory_tracker.setOrRaiseProfilerLimit(total_memory_profiler_step);
total_memory_tracker.setProfilerStep(total_memory_profiler_step);
}
diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h
index 36560fec334..8ca8e4a6455 100644
--- a/src/Common/MemoryTracker.h
+++ b/src/Common/MemoryTracker.h
@@ -61,6 +61,8 @@ private:
void updatePeak(Int64 will_be, bool log_memory_usage);
void logMemoryUsage(Int64 current) const;
+ void setOrRaiseProfilerLimit(Int64 value);
+
public:
explicit MemoryTracker(VariableContext level_ = VariableContext::Thread);
explicit MemoryTracker(MemoryTracker * parent_, VariableContext level_ = VariableContext::Thread);
@@ -106,7 +108,6 @@ public:
* Otherwise, set limit to new value, if new value is greater than previous limit.
*/
void setOrRaiseHardLimit(Int64 value);
- void setOrRaiseProfilerLimit(Int64 value);
void setFaultProbability(double value)
{
@@ -121,6 +122,7 @@ public:
void setProfilerStep(Int64 value)
{
profiler_step = value;
+ setOrRaiseProfilerLimit(value);
}
/// next should be changed only once: from nullptr to some value.
diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp
index 8576f4662ec..f8402cf0287 100644
--- a/src/Interpreters/ProcessList.cpp
+++ b/src/Interpreters/ProcessList.cpp
@@ -203,7 +203,6 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as
if (query_context->hasTraceCollector())
{
/// Set up memory profiling
- thread_group->memory_tracker.setOrRaiseProfilerLimit(settings.memory_profiler_step);
thread_group->memory_tracker.setProfilerStep(settings.memory_profiler_step);
thread_group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability);
}
diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp
index cd2668988a8..07aabf64dfd 100644
--- a/src/Storages/MergeTree/MergeList.cpp
+++ b/src/Storages/MergeTree/MergeList.cpp
@@ -82,7 +82,6 @@ MergeListElement::MergeListElement(
memory_tracker.setDescription("Mutate/Merge");
memory_tracker.setProfilerStep(memory_profiler_step);
- memory_tracker.setOrRaiseProfilerLimit(memory_profiler_step);
memory_tracker.setSampleProbability(memory_profiler_sample_probability);
}
From 298f613557ceff080498b8f4fff6a2abb211051b Mon Sep 17 00:00:00 2001
From: Shoh Jahon <16575814+Shohjahon@users.noreply.github.com>
Date: Thu, 7 Oct 2021 14:59:23 +0500
Subject: [PATCH 047/104] Update fetchPostgreSQLTableStructure.cpp
added partitioned table prefix 'p' for the query for fetching replica identity index @kssenii !
---
src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
index 4f5743035a7..5ac4180ec27 100644
--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
@@ -250,7 +250,7 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
"and i.oid = ix.indexrelid "
"and a.attrelid = t.oid "
"and a.attnum = ANY(ix.indkey) "
- "and t.relkind = 'r' " /// simple tables
+ "and t.relkind in ('r', 'p') " /// simple tables
"and t.relname = {} " /// Connection is already done to a needed database, only table name is needed.
"and ix.indisreplident = 't' " /// index is is replica identity index
"ORDER BY a.attname", /// column names
From 4119848a41512caf641c0f190d1caee245382a9f Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov
Date: Wed, 6 Oct 2021 20:07:35 +0300
Subject: [PATCH 048/104] better diagnostic for OPTIMIZE
---
.../MergeTree/MergeTreeDataMergerMutator.cpp | 6 +
src/Storages/StorageReplicatedMergeTree.cpp | 200 ++++++++----------
2 files changed, 96 insertions(+), 110 deletions(-)
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index 9885a10cd62..b35a41d5d19 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -328,7 +328,11 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti
MergeTreeData::DataPartsVector parts = selectAllPartsFromPartition(partition_id);
if (parts.empty())
+ {
+ if (out_disable_reason)
+ *out_disable_reason = "There are no parts inside partition";
return SelectPartsDecision::CANNOT_SELECT;
+ }
if (!final && parts.size() == 1)
{
@@ -342,6 +346,8 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti
if (final && optimize_skip_merged_partitions && parts.size() == 1 && parts[0]->info.level > 0 &&
(!metadata_snapshot->hasAnyTTL() || parts[0]->checkAllTTLCalculated(metadata_snapshot)))
{
+ if (out_disable_reason)
+ *out_disable_reason = "Partition skipped due to optimize_skip_merged_partitions";
return SelectPartsDecision::NOTHING_TO_MERGE;
}
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index e5d59f2a950..06bf06b0ef3 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -4203,143 +4203,123 @@ bool StorageReplicatedMergeTree::optimize(
if (!is_leader)
throw Exception("OPTIMIZE cannot be done on this replica because it is not a leader", ErrorCodes::NOT_A_LEADER);
- constexpr size_t max_retries = 10;
-
- std::vector merge_entries;
+ auto handle_noop = [&] (const String & message)
{
- auto zookeeper = getZooKeeper();
+ if (query_context->getSettingsRef().optimize_throw_if_noop)
+ throw Exception(message, ErrorCodes::CANNOT_ASSIGN_OPTIMIZE);
+ return false;
+ };
- auto handle_noop = [&] (const String & message)
+ auto zookeeper = getZooKeeper();
+ UInt64 disk_space = getStoragePolicy()->getMaxUnreservedFreeSpace();
+ const auto storage_settings_ptr = getSettings();
+ auto metadata_snapshot = getInMemoryMetadataPtr();
+ std::vector merge_entries;
+
+ auto try_assign_merge = [&](const String & partition_id) -> bool
+ {
+ constexpr size_t max_retries = 10;
+ size_t try_no = 0;
+ for (; try_no < max_retries; ++try_no)
{
- if (query_context->getSettingsRef().optimize_throw_if_noop)
- throw Exception(message, ErrorCodes::CANNOT_ASSIGN_OPTIMIZE);
- return false;
- };
+ /// We must select parts for merge under merge_selecting_mutex because other threads
+ /// (merge_selecting_thread or OPTIMIZE queries) could assign new merges.
+ std::lock_guard merge_selecting_lock(merge_selecting_mutex);
+ ReplicatedMergeTreeMergePredicate can_merge = queue.getMergePredicate(zookeeper);
- const auto storage_settings_ptr = getSettings();
- auto metadata_snapshot = getInMemoryMetadataPtr();
+ auto future_merged_part = std::make_shared();
+ if (storage_settings.get()->assign_part_uuids)
+ future_merged_part->uuid = UUIDHelpers::generateV4();
- if (!partition && final)
- {
- DataPartsVector data_parts = getDataPartsVector();
- std::unordered_set partition_ids;
+ constexpr const char * unknown_disable_reason = "unknown reason";
+ String disable_reason = unknown_disable_reason;
+ SelectPartsDecision select_decision = SelectPartsDecision::CANNOT_SELECT;
- for (const DataPartPtr & part : data_parts)
- partition_ids.emplace(part->info.partition_id);
-
- UInt64 disk_space = getStoragePolicy()->getMaxUnreservedFreeSpace();
-
- for (const String & partition_id : partition_ids)
+ if (partition_id.empty())
{
- size_t try_no = 0;
- for (; try_no < max_retries; ++try_no)
- {
- /// We must select parts for merge under merge_selecting_mutex because other threads
- /// (merge_selecting_thread or OPTIMIZE queries) could assign new merges.
- std::lock_guard merge_selecting_lock(merge_selecting_mutex);
- ReplicatedMergeTreeMergePredicate can_merge = queue.getMergePredicate(zookeeper);
-
- auto future_merged_part = std::make_shared();
-
- if (storage_settings.get()->assign_part_uuids)
- future_merged_part->uuid = UUIDHelpers::generateV4();
-
- SelectPartsDecision select_decision = merger_mutator.selectAllPartsToMergeWithinPartition(
- future_merged_part, disk_space, can_merge, partition_id, true, metadata_snapshot, nullptr, query_context->getSettingsRef().optimize_skip_merged_partitions);
-
- if (select_decision != SelectPartsDecision::SELECTED)
- break;
-
- ReplicatedMergeTreeLogEntryData merge_entry;
- CreateMergeEntryResult create_result = createLogEntryToMergeParts(
- zookeeper, future_merged_part->parts,
- future_merged_part->name, future_merged_part->uuid, future_merged_part->type,
- deduplicate, deduplicate_by_columns,
- &merge_entry, can_merge.getVersion(), future_merged_part->merge_type);
-
- if (create_result == CreateMergeEntryResult::MissingPart)
- return handle_noop("Can't create merge queue node in ZooKeeper, because some parts are missing");
-
- if (create_result == CreateMergeEntryResult::LogUpdated)
- continue;
-
- merge_entries.push_back(std::move(merge_entry));
- break;
- }
- if (try_no == max_retries)
- return handle_noop("Can't create merge queue node in ZooKeeper, because log was updated in every of "
- + toString(max_retries) + " tries");
+ select_decision = merger_mutator.selectPartsToMerge(
+ future_merged_part, /* aggressive */ true, storage_settings_ptr->max_bytes_to_merge_at_max_space_in_pool,
+ can_merge, /* merge_with_ttl_allowed */ false, &disable_reason);
}
- }
- else
- {
- size_t try_no = 0;
- for (; try_no < max_retries; ++try_no)
+ else
{
- std::lock_guard merge_selecting_lock(merge_selecting_mutex);
- ReplicatedMergeTreeMergePredicate can_merge = queue.getMergePredicate(zookeeper);
+ select_decision = merger_mutator.selectAllPartsToMergeWithinPartition(
+ future_merged_part, disk_space, can_merge, partition_id, final, metadata_snapshot,
+ &disable_reason, query_context->getSettingsRef().optimize_skip_merged_partitions);
+ }
- auto future_merged_part = std::make_shared();
- if (storage_settings.get()->assign_part_uuids)
- future_merged_part->uuid = UUIDHelpers::generateV4();
+ /// If there is nothing to merge then we treat this merge as successful (needed for optimize final optimization)
+ if (select_decision == SelectPartsDecision::NOTHING_TO_MERGE)
+ return false;
- String disable_reason;
- SelectPartsDecision select_decision = SelectPartsDecision::CANNOT_SELECT;
+ if (select_decision != SelectPartsDecision::SELECTED)
+ {
+ constexpr const char * message_fmt = "Cannot select parts for optimization: {}";
+ assert(disable_reason != unknown_disable_reason);
+ if (!partition_id.empty())
+ disable_reason += fmt::format(" (in partition {})", partition_id);
+ String message = fmt::format(message_fmt, disable_reason);
+ LOG_INFO(log, message);
+ return handle_noop(message);
+ }
- if (!partition)
- {
- select_decision = merger_mutator.selectPartsToMerge(
- future_merged_part, true, storage_settings_ptr->max_bytes_to_merge_at_max_space_in_pool, can_merge, false, &disable_reason);
- }
- else
- {
- UInt64 disk_space = getStoragePolicy()->getMaxUnreservedFreeSpace();
- String partition_id = getPartitionIDFromQuery(partition, query_context);
- select_decision = merger_mutator.selectAllPartsToMergeWithinPartition(
- future_merged_part, disk_space, can_merge, partition_id, final, metadata_snapshot, &disable_reason, query_context->getSettingsRef().optimize_skip_merged_partitions);
- }
+ ReplicatedMergeTreeLogEntryData merge_entry;
+ CreateMergeEntryResult create_result = createLogEntryToMergeParts(
+ zookeeper, future_merged_part->parts,
+ future_merged_part->name, future_merged_part->uuid, future_merged_part->type,
+ deduplicate, deduplicate_by_columns,
+ &merge_entry, can_merge.getVersion(), future_merged_part->merge_type);
- /// If there is nothing to merge then we treat this merge as successful (needed for optimize final optimization)
- if (select_decision == SelectPartsDecision::NOTHING_TO_MERGE)
- break;
+ if (create_result == CreateMergeEntryResult::MissingPart)
+ {
+ String message = "Can't create merge queue node in ZooKeeper, because some parts are missing";
+ LOG_TRACE(log, message);
+ return handle_noop(message);
+ }
- if (select_decision != SelectPartsDecision::SELECTED)
- {
- constexpr const char * message_fmt = "Cannot select parts for optimization: {}";
- if (disable_reason.empty())
- disable_reason = "unknown reason";
- LOG_INFO(log, message_fmt, disable_reason);
- return handle_noop(fmt::format(message_fmt, disable_reason));
- }
+ if (create_result == CreateMergeEntryResult::LogUpdated)
+ continue;
- ReplicatedMergeTreeLogEntryData merge_entry;
- CreateMergeEntryResult create_result = createLogEntryToMergeParts(
- zookeeper, future_merged_part->parts,
- future_merged_part->name, future_merged_part->uuid, future_merged_part->type,
- deduplicate, deduplicate_by_columns,
- &merge_entry, can_merge.getVersion(), future_merged_part->merge_type);
+ merge_entries.push_back(std::move(merge_entry));
+ return true;
+ }
- if (create_result == CreateMergeEntryResult::MissingPart)
- return handle_noop("Can't create merge queue node in ZooKeeper, because some parts are missing");
+ assert(try_no == max_retries);
+ String message = fmt::format("Can't create merge queue node in ZooKeeper, because log was updated in every of {} tries", try_no);
+ LOG_TRACE(log, message);
+ return handle_noop(message);
+ };
- if (create_result == CreateMergeEntryResult::LogUpdated)
- continue;
+ bool assigned = false;
+ if (!partition && final)
+ {
+ DataPartsVector data_parts = getDataPartsVector();
+ std::unordered_set partition_ids;
- merge_entries.push_back(std::move(merge_entry));
+ for (const DataPartPtr & part : data_parts)
+ partition_ids.emplace(part->info.partition_id);
+
+ for (const String & partition_id : partition_ids)
+ {
+ assigned = try_assign_merge(partition_id);
+ if (!assigned)
break;
- }
- if (try_no == max_retries)
- return handle_noop("Can't create merge queue node in ZooKeeper, because log was updated in every of "
- + toString(max_retries) + " tries");
}
}
+ else
+ {
+ String partition_id;
+ if (partition)
+ partition_id = getPartitionIDFromQuery(partition, query_context);
+ assigned = try_assign_merge(partition_id);
+ }
table_lock.reset();
for (auto & merge_entry : merge_entries)
waitForLogEntryToBeProcessedIfNecessary(merge_entry, query_context);
- return true;
+ return assigned;
}
bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMergeTree::LogEntry & entry)
From 0db1e3614c82777de8f4f389b2cb9c06146b7534 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov
Date: Thu, 7 Oct 2021 13:21:42 +0300
Subject: [PATCH 049/104] fix test
---
tests/integration/test_dictionaries_dependency/test.py | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/tests/integration/test_dictionaries_dependency/test.py b/tests/integration/test_dictionaries_dependency/test.py
index ceab39aacb8..9b1019822e3 100644
--- a/tests/integration/test_dictionaries_dependency/test.py
+++ b/tests/integration/test_dictionaries_dependency/test.py
@@ -106,8 +106,11 @@ def test_dependency_via_dictionary_database(node):
for d_name in d_names:
assert node.query("SELECT dictGet({}, 'y', toUInt64(5))".format(d_name)) == "6\n"
- check()
+
+ for d_name in d_names:
+ assert node.query("SELECT dictGet({}, 'y', toUInt64(5))".format(d_name)) == "6\n"
# Restart must not break anything.
node.restart_clickhouse()
- check()
+ for d_name in d_names:
+ assert node.query_with_retry("SELECT dictGet({}, 'y', toUInt64(5))".format(d_name)) == "6\n"
From 7e2bc184ec29358cce749059bf776eccc784231e Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov
Date: Thu, 7 Oct 2021 16:43:49 +0300
Subject: [PATCH 050/104] fix another suspicious places, add test
---
src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp | 2 +-
src/Parsers/ParserCreateQuery.cpp | 2 +-
src/Storages/MergeTree/MergeTreeIndexSet.cpp | 2 +-
tests/queries/0_stateless/01188_attach_table_from_path.sql | 1 +
4 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
index f2860235117..a96713e3b5d 100644
--- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
+++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
@@ -120,7 +120,7 @@ static NamesAndTypesList getColumnsList(const ASTExpressionList * columns_defini
auto * literal = child->as();
new_child->arguments = std::make_shared();
- new_child->arguments->children.push_back(std::make_shared(literal->value.get()));
+ new_child->arguments->children.push_back(std::make_shared(literal->value.safeGet()));
new_child->arguments->children.push_back(std::make_shared(Int16(++i)));
child = new_child;
}
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index 2ea1663fc80..1da1bfba491 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -896,7 +896,7 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
if (ParserKeyword{"TO INNER UUID"}.ignore(pos, expected))
{
- ParserLiteral literal_p;
+ ParserStringLiteral literal_p;
if (!literal_p.parse(pos, to_inner_uuid, expected))
return false;
}
diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp
index 024b87c9a3e..60b9ddae329 100644
--- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp
@@ -461,7 +461,7 @@ bool MergeTreeIndexConditionSet::checkASTUseless(const ASTPtr & node, bool atomi
[this](const auto & arg) { return checkASTUseless(arg, true); });
}
else if (const auto * literal = node->as())
- return !atomic && literal->value.get();
+ return !atomic && literal->value.safeGet();
else if (const auto * identifier = node->as())
return key_columns.find(identifier->getColumnName()) == std::end(key_columns);
else
diff --git a/tests/queries/0_stateless/01188_attach_table_from_path.sql b/tests/queries/0_stateless/01188_attach_table_from_path.sql
index 5b99c07e986..9bf401c8ea4 100644
--- a/tests/queries/0_stateless/01188_attach_table_from_path.sql
+++ b/tests/queries/0_stateless/01188_attach_table_from_path.sql
@@ -7,6 +7,7 @@ drop table if exists mt;
attach table test from 'some/path' (n UInt8) engine=Memory; -- { serverError 48 }
attach table test from '/etc/passwd' (s String) engine=File(TSVRaw); -- { serverError 481 }
attach table test from '../../../../../../../../../etc/passwd' (s String) engine=File(TSVRaw); -- { serverError 481 }
+attach table test from 42 (s String) engine=File(TSVRaw); -- { clientError 62 }
insert into table function file('01188_attach/file/data.TSV', 'TSV', 's String, n UInt8') values ('file', 42);
attach table file from '01188_attach/file' (s String, n UInt8) engine=File(TSV);
From d24bfce93fda2d35360213adc3f90936d8cab010 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov
Date: Thu, 7 Oct 2021 17:03:54 +0300
Subject: [PATCH 051/104] Add coroutines example.
---
src/Core/examples/CMakeLists.txt | 3 +
src/Core/examples/coro.cpp | 202 +++++++++++++++++++++++++++++++
2 files changed, 205 insertions(+)
create mode 100644 src/Core/examples/coro.cpp
diff --git a/src/Core/examples/CMakeLists.txt b/src/Core/examples/CMakeLists.txt
index 6b07dfbbfa6..c8846eb1743 100644
--- a/src/Core/examples/CMakeLists.txt
+++ b/src/Core/examples/CMakeLists.txt
@@ -13,3 +13,6 @@ target_link_libraries (mysql_protocol PRIVATE dbms)
if(USE_SSL)
target_include_directories (mysql_protocol SYSTEM PRIVATE ${OPENSSL_INCLUDE_DIR})
endif()
+
+add_executable (coro coro.cpp)
+target_link_libraries (coro PRIVATE clickhouse_common_io)
diff --git a/src/Core/examples/coro.cpp b/src/Core/examples/coro.cpp
new file mode 100644
index 00000000000..c8e2f7418e4
--- /dev/null
+++ b/src/Core/examples/coro.cpp
@@ -0,0 +1,202 @@
+#include
+
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+#if defined(__clang__)
+
+#include
+
+template
+using coroutine_handle = std::experimental::coroutine_handle;
+
+using default_coroutine_handle = std::experimental::coroutine_handle<>;
+
+using suspend_never = std::experimental::suspend_never;
+using suspend_always = std::experimental::suspend_always;
+
+#else
+
+#include
+
+template
+using coroutine_handle = std::coroutine_handle;
+
+using default_coroutine_handle = std::coroutine_handle<>;
+
+using suspend_never = std::suspend_never;
+using suspend_always = std::suspend_always;
+
+#endif
+
+
+template
+struct suspend_never_val
+{
+ constexpr bool await_ready() const noexcept { return true; }
+ constexpr void await_suspend(default_coroutine_handle) const noexcept {}
+ constexpr T await_resume() const noexcept
+ {
+ std::cout << " ret " << val << std::endl;
+ return val;
+ }
+
+ T val;
+};
+
+template
+struct resumable
+{
+ struct promise_type
+ {
+ using coro_handle = coroutine_handle;
+ auto get_return_object() { return coro_handle::from_promise(*this); }
+ auto initial_suspend() { return suspend_never(); }
+ auto final_suspend() noexcept { return suspend_never_val{*r->value}; }
+ //void return_void() {}
+ void return_value(T value_) { r->value = value_; }
+ void unhandled_exception()
+ {
+ DB::tryLogCurrentException("Logger");
+ r->exception = std::current_exception();
+ }
+
+ explicit promise_type(std::string tag_) : tag(tag_) {}
+ ~promise_type() { std::cout << "~promise_type " << tag << std::endl; }
+ std::string tag;
+ coro_handle next;
+ resumable * r = nullptr;
+ };
+
+ using coro_handle = coroutine_handle;
+
+ bool await_ready() const noexcept { return false; }
+ void await_suspend(coro_handle g) noexcept
+ {
+ std::cout << " await_suspend " << my.promise().tag << std::endl;
+ std::cout << " g tag " << g.promise().tag << std::endl;
+ g.promise().next = my;
+ }
+ T await_resume() noexcept
+ {
+ std::cout << " await_res " << my.promise().tag << std::endl;
+ return *value;
+ }
+
+ resumable(coro_handle handle) : my(handle), tag(handle.promise().tag)
+ {
+ assert(handle);
+ my.promise().r = this;
+ std::cout << " resumable " << tag << std::endl;
+ }
+ resumable(resumable &) = delete;
+ resumable(resumable &&rhs) : my(rhs.my), tag(rhs.tag)
+ {
+ rhs.my = {};
+ std::cout << " resumable&& " << tag << std::endl;
+ }
+ static bool resume_impl(resumable *r)
+ {
+ if (r->value)
+ return false;
+
+ auto & next = r->my.promise().next;
+
+ if (next)
+ {
+ if (resume_impl(next.promise().r))
+ return true;
+ next = {};
+ }
+
+ if (!r->value)
+ {
+ r->my.resume();
+ if (r->exception)
+ std::rethrow_exception(r->exception);
+ }
+ return !r->value;
+ }
+
+ bool resume()
+ {
+ return resume_impl(this);
+ }
+
+ T res()
+ {
+ return *value;
+ }
+
+ ~resumable()
+ {
+ std::cout << " ~resumable " << tag << std::endl;
+ }
+
+private:
+ coro_handle my;
+ std::string tag;
+ std::optional value;
+ std::exception_ptr exception;
+};
+
+resumable boo(std::string tag)
+{
+ std::cout << "x" << std::endl;
+ co_await suspend_always();
+ std::cout << StackTrace().toString();
+ std::cout << "y" << std::endl;
+ co_return 1;
+}
+
+resumable bar(std::string tag)
+{
+ std::cout << "a" << std::endl;
+ int res1 = co_await boo("boo1");
+ std::cout << "b " << res1 << std::endl;
+ int res2 = co_await boo("boo2");
+ if (res2 == 1)
+ throw DB::Exception(1, "hello");
+ std::cout << "c " << res2 << std::endl;
+ co_return res1 + res2; // 1 + 1 = 2
+}
+
+resumable foo(std::string tag) {
+ std::cout << "Hello" << std::endl;
+ auto res1 = co_await bar("bar1");
+ std::cout << "Coro " << res1 << std::endl;
+ auto res2 = co_await bar("bar2");
+ std::cout << "World " << res2 << std::endl;
+ co_return res1 * res2; // 2 * 2 = 4
+}
+
+int main()
+{
+ Poco::AutoPtr app_channel(new Poco::ConsoleChannel(std::cerr));
+ Poco::Logger::root().setChannel(app_channel);
+ Poco::Logger::root().setLevel("trace");
+
+ LOG_INFO(&Poco::Logger::get(""), "Starting");
+
+ try
+ {
+ auto t = foo("foo");
+ std::cout << ".. started" << std::endl;
+ while (t.resume())
+ std::cout << ".. yielded" << std::endl;
+ std::cout << ".. done: " << t.res() << std::endl;
+ }
+ catch (DB::Exception & e)
+ {
+ std::cout << "Got exception " << e.what() << std::endl;
+ std::cout << e.getStackTraceString() << std::endl;
+ }
+}
From 213d68d9958855c07f5551e617eaa091af6fa36e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov
Date: Thu, 7 Oct 2021 20:27:13 +0300
Subject: [PATCH 052/104] Fix some tests.
---
.../DistinctSortedBlockInputStream.cpp | 3 +++
src/DataStreams/TTLBlockInputStream.cpp | 24 ++++++++++++-----
src/DataStreams/TTLBlockInputStream.h | 7 ++---
src/DataStreams/TTLCalcInputStream.cpp | 26 +++++++++++++++----
src/DataStreams/TTLCalcInputStream.h | 7 ++---
5 files changed, 50 insertions(+), 17 deletions(-)
diff --git a/src/DataStreams/DistinctSortedBlockInputStream.cpp b/src/DataStreams/DistinctSortedBlockInputStream.cpp
index a3105d6330c..47421941b45 100644
--- a/src/DataStreams/DistinctSortedBlockInputStream.cpp
+++ b/src/DataStreams/DistinctSortedBlockInputStream.cpp
@@ -47,7 +47,10 @@ void DistinctSortedTransform::transform(Chunk & chunk)
/// Just go to the next block if there isn't any new record in the current one.
if (!has_new_data)
+ {
+ chunk.clear();
return;
+ }
if (!set_size_limits.check(data.getTotalRowCount(), data.getTotalByteCount(), "DISTINCT", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED))
{
diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp
index 1a1484fc08e..b476f689e60 100644
--- a/src/DataStreams/TTLBlockInputStream.cpp
+++ b/src/DataStreams/TTLBlockInputStream.cpp
@@ -23,7 +23,7 @@ TTLTransform::TTLTransform(
const MergeTreeData::MutableDataPartPtr & data_part_,
time_t current_time_,
bool force_)
- : ISimpleTransform(header_, header_, false)
+ : IAccumulatingTransform(header_, header_)
, data_part(data_part_)
, log(&Poco::Logger::get(storage_.getLogName() + " (TTLTransform)"))
{
@@ -97,16 +97,16 @@ Block reorderColumns(Block block, const Block & header)
return res;
}
-void TTLTransform::transform(Chunk & chunk)
+void TTLTransform::consume(Chunk chunk)
{
if (all_data_dropped)
{
- stopReading();
- chunk.clear();
+ finishConsume();
return;
}
auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns());
+
for (const auto & algorithm : algorithms)
algorithm->execute(block);
@@ -114,8 +114,20 @@ void TTLTransform::transform(Chunk & chunk)
return;
size_t num_rows = block.rows();
+ setReadyChunk(Chunk(reorderColumns(std::move(block), getOutputPort().getHeader()).getColumns(), num_rows));
+}
- chunk = Chunk(reorderColumns(std::move(block), getOutputPort().getHeader()).getColumns(), num_rows);
+Chunk TTLTransform::generate()
+{
+ Block block;
+ for (const auto & algorithm : algorithms)
+ algorithm->execute(block);
+
+ if (!block)
+ return {};
+
+ size_t num_rows = block.rows();
+ return Chunk(reorderColumns(std::move(block), getOutputPort().getHeader()).getColumns(), num_rows);
}
void TTLTransform::finalize()
@@ -133,7 +145,7 @@ void TTLTransform::finalize()
IProcessor::Status TTLTransform::prepare()
{
- auto status = ISimpleTransform::prepare();
+ auto status = IAccumulatingTransform::prepare();
if (status == Status::Finished)
finalize();
diff --git a/src/DataStreams/TTLBlockInputStream.h b/src/DataStreams/TTLBlockInputStream.h
index 986181df652..50b28e81bdf 100644
--- a/src/DataStreams/TTLBlockInputStream.h
+++ b/src/DataStreams/TTLBlockInputStream.h
@@ -1,5 +1,5 @@
#pragma once
-#include
+#include
#include
#include
#include
@@ -12,7 +12,7 @@
namespace DB
{
-class TTLTransform : public ISimpleTransform
+class TTLTransform : public IAccumulatingTransform
{
public:
TTLTransform(
@@ -29,7 +29,8 @@ public:
Status prepare() override;
protected:
- void transform(Chunk & chunk) override;
+ void consume(Chunk chunk) override;
+ Chunk generate() override;
/// Finalizes ttl infos and updates data part
void finalize();
diff --git a/src/DataStreams/TTLCalcInputStream.cpp b/src/DataStreams/TTLCalcInputStream.cpp
index c156b31428a..fe85e40c003 100644
--- a/src/DataStreams/TTLCalcInputStream.cpp
+++ b/src/DataStreams/TTLCalcInputStream.cpp
@@ -11,9 +11,9 @@ TTLCalcTransform::TTLCalcTransform(
const MergeTreeData::MutableDataPartPtr & data_part_,
time_t current_time_,
bool force_)
- : ISimpleTransform(header_, header_, true)
+ : IAccumulatingTransform(header_, header_)
, data_part(data_part_)
- , log(&Poco::Logger::get(storage_.getLogName() + " (TTLCalcInputStream)"))
+ , log(&Poco::Logger::get(storage_.getLogName() + " (TTLCalcTransform)"))
{
auto old_ttl_infos = data_part->ttl_infos;
@@ -50,7 +50,7 @@ TTLCalcTransform::TTLCalcTransform(
recompression_ttl, TTLUpdateField::RECOMPRESSION_TTL, recompression_ttl.result_column, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
}
-void TTLCalcTransform::transform(Chunk & chunk)
+void TTLCalcTransform::consume(Chunk chunk)
{
auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns());
for (const auto & algorithm : algorithms)
@@ -63,7 +63,23 @@ void TTLCalcTransform::transform(Chunk & chunk)
for (const auto & col : getOutputPort().getHeader())
res.addColumn(block.getByName(col.name).column);
- chunk = std::move(res);
+ setReadyChunk(std::move(res));
+}
+
+Chunk TTLCalcTransform::generate()
+{
+ Block block;
+ for (const auto & algorithm : algorithms)
+ algorithm->execute(block);
+
+ if (!block)
+ return {};
+
+ Chunk res;
+ for (const auto & col : getOutputPort().getHeader())
+ res.addColumn(block.getByName(col.name).column);
+
+ return res;
}
void TTLCalcTransform::finalize()
@@ -75,7 +91,7 @@ void TTLCalcTransform::finalize()
IProcessor::Status TTLCalcTransform::prepare()
{
- auto status = ISimpleTransform::prepare();
+ auto status = IAccumulatingTransform::prepare();
if (status == Status::Finished)
finalize();
diff --git a/src/DataStreams/TTLCalcInputStream.h b/src/DataStreams/TTLCalcInputStream.h
index d0e7b0055f2..b6318026b8c 100644
--- a/src/DataStreams/TTLCalcInputStream.h
+++ b/src/DataStreams/TTLCalcInputStream.h
@@ -1,5 +1,5 @@
#pragma once
-#include
+#include
#include
#include
#include
@@ -11,7 +11,7 @@
namespace DB
{
-class TTLCalcTransform : public ISimpleTransform
+class TTLCalcTransform : public IAccumulatingTransform
{
public:
TTLCalcTransform(
@@ -27,7 +27,8 @@ public:
Status prepare() override;
protected:
- void transform(Chunk & chunk) override;
+ void consume(Chunk chunk) override;
+ Chunk generate() override;
/// Finalizes ttl infos and updates data part
void finalize();
From ee347fc4160c36c0505fbf79bdab8e1f19dce4f4 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin
Date: Thu, 7 Oct 2021 21:02:38 +0300
Subject: [PATCH 053/104] Update changelog to include caveats about Nullable
types in data skipping indexes
---
CHANGELOG.md | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index babb5ebca8d..d203fcba4c8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -110,6 +110,7 @@
* Fix the issue that in case of some sophisticated query with column aliases identical to the names of expressions, bad cast may happen. This fixes [#25447](https://github.com/ClickHouse/ClickHouse/issues/25447). This fixes [#26914](https://github.com/ClickHouse/ClickHouse/issues/26914). This fix may introduce backward incompatibility: if there are different expressions with identical names, exception will be thrown. It may break some rare cases when `enable_optimize_predicate_expression` is set. [#26639](https://github.com/ClickHouse/ClickHouse/pull/26639) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Now, scalar subquery always returns `Nullable` result if it's type can be `Nullable`. It is needed because in case of empty subquery it's result should be `Null`. Previously, it was possible to get error about incompatible types (type deduction does not execute scalar subquery, and it could use not-nullable type). Scalar subquery with empty result which can't be converted to `Nullable` (like `Array` or `Tuple`) now throws error. Fixes [#25411](https://github.com/ClickHouse/ClickHouse/issues/25411). [#26423](https://github.com/ClickHouse/ClickHouse/pull/26423) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Introduce syntax for here documents. Example `SELECT $doc$ VALUE $doc$`. [#26671](https://github.com/ClickHouse/ClickHouse/pull/26671) ([Maksim Kita](https://github.com/kitaisreal)). This change is backward incompatible if in query there are identifiers that contain `$` [#28768](https://github.com/ClickHouse/ClickHouse/issues/28768).
+* Now indices can handle Nullable types, including `isNull` and `isNotNull`. [#12433](https://github.com/ClickHouse/ClickHouse/pull/12433) and [#12455](https://github.com/ClickHouse/ClickHouse/pull/12455) ([Amos Bird](https://github.com/amosbird)) and [#27250](https://github.com/ClickHouse/ClickHouse/pull/27250) ([Azat Khuzhin](https://github.com/azat)). But this was done with on-disk format changes, and even though new server can read old data, old server cannot. Also, in case you have `MINMAX` data skipping indices, you may get `Data after mutation/merge is not byte-identical` error, since new index will have `.idx2` extension while before it was `.idx`. That said, that you should not delay updating all existing replicas, in this case, otherwise, if old replica (<21.9) will download data from new replica with 21.9+ it will not be able to apply index for downloaded part.
#### New Feature
@@ -179,7 +180,6 @@
* Add setting `log_formatted_queries` to log additional formatted query into `system.query_log`. It's useful for normalized query analysis because functions like `normalizeQuery` and `normalizeQueryKeepNames` don't parse/format queries in order to achieve better performance. [#27380](https://github.com/ClickHouse/ClickHouse/pull/27380) ([Amos Bird](https://github.com/amosbird)).
* Add two settings `max_hyperscan_regexp_length` and `max_hyperscan_regexp_total_length` to prevent huge regexp being used in hyperscan related functions, such as `multiMatchAny`. [#27378](https://github.com/ClickHouse/ClickHouse/pull/27378) ([Amos Bird](https://github.com/amosbird)).
* Memory consumed by bitmap aggregate functions now is taken into account for memory limits. This closes [#26555](https://github.com/ClickHouse/ClickHouse/issues/26555). [#27252](https://github.com/ClickHouse/ClickHouse/pull/27252) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Add new index data skipping minmax index format for proper Nullable support. [#27250](https://github.com/ClickHouse/ClickHouse/pull/27250) ([Azat Khuzhin](https://github.com/azat)).
* Add 10 seconds cache for S3 proxy resolver. [#27216](https://github.com/ClickHouse/ClickHouse/pull/27216) ([ianton-ru](https://github.com/ianton-ru)).
* Split global mutex into individual regexp construction. This helps avoid huge regexp construction blocking other related threads. [#27211](https://github.com/ClickHouse/ClickHouse/pull/27211) ([Amos Bird](https://github.com/amosbird)).
* Support schema for PostgreSQL database engine. Closes [#27166](https://github.com/ClickHouse/ClickHouse/issues/27166). [#27198](https://github.com/ClickHouse/ClickHouse/pull/27198) ([Kseniia Sumarokova](https://github.com/kssenii)).
@@ -234,7 +234,6 @@
* Fix multiple block insertion into distributed table with `insert_distributed_one_random_shard = 1`. This is a marginal feature. Mark as improvement. [#23140](https://github.com/ClickHouse/ClickHouse/pull/23140) ([Amos Bird](https://github.com/amosbird)).
* Support `LowCardinality` and `FixedString` keys/values for `Map` type. [#21543](https://github.com/ClickHouse/ClickHouse/pull/21543) ([hexiaoting](https://github.com/hexiaoting)).
* Enable reloading of local disk config. [#19526](https://github.com/ClickHouse/ClickHouse/pull/19526) ([taiyang-li](https://github.com/taiyang-li)).
-* Now KeyConditions can correctly skip nullable keys, including `isNull` and `isNotNull`. https://github.com/ClickHouse/ClickHouse/pull/12433. [#12455](https://github.com/ClickHouse/ClickHouse/pull/12455) ([Amos Bird](https://github.com/amosbird)).
#### Bug Fix
From 1b8e3b6b962df0c0bf26fbdf86cefbc2e12ebaa0 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin
Date: Thu, 7 Oct 2021 21:10:44 +0300
Subject: [PATCH 054/104] Fix system tables recreation check (fails to detect
changes in enum values)
Fixes: #23934 (cc @kitaisreal)
---
src/Interpreters/SystemLog.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index 615180d27dd..ce0062e8c77 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -527,7 +527,7 @@ void SystemLog::prepareTable()
auto alias_columns = LogElement::getNamesAndAliases();
auto current_query = InterpreterCreateQuery::formatColumns(ordinary_columns, alias_columns);
- if (old_query->getTreeHash() != current_query->getTreeHash())
+ if (serializeAST(*old_query) != serializeAST(*current_query))
{
/// Rename the existing table.
int suffix = 0;
From bf88f102fe7e0cd94b80ab29dbf2ebaebd9645bb Mon Sep 17 00:00:00 2001
From: Azat Khuzhin
Date: Thu, 7 Oct 2021 21:13:56 +0300
Subject: [PATCH 055/104] Add ability to trace peak memory usage (with new
trace_type - MemoryPeak)
---
src/Common/MemoryTracker.cpp | 19 ++++++++++++++++---
src/Common/MemoryTracker.h | 2 +-
src/Common/TraceCollector.h | 3 ++-
src/Interpreters/TraceLog.cpp | 1 +
.../01092_memory_profiler.reference | 1 +
.../0_stateless/01092_memory_profiler.sql | 1 +
6 files changed, 22 insertions(+), 5 deletions(-)
diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp
index 205771a5f6c..013005442be 100644
--- a/src/Common/MemoryTracker.cpp
+++ b/src/Common/MemoryTracker.cpp
@@ -200,11 +200,13 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded)
}
+ bool allocation_traced = false;
if (unlikely(current_profiler_limit && will_be > current_profiler_limit))
{
BlockerInThread untrack_lock(VariableContext::Global);
DB::TraceCollector::collect(DB::TraceType::Memory, StackTrace(), size);
setOrRaiseProfilerLimit((will_be + profiler_step - 1) / profiler_step * profiler_step);
+ allocation_traced = true;
}
std::bernoulli_distribution sample(sample_probability);
@@ -212,6 +214,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded)
{
BlockerInThread untrack_lock(VariableContext::Global);
DB::TraceCollector::collect(DB::TraceType::MemorySample, StackTrace(), size);
+ allocation_traced = true;
}
if (unlikely(current_hard_limit && will_be > current_hard_limit) && memoryTrackerCanThrow(level, false) && throw_if_memory_exceeded)
@@ -230,17 +233,24 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded)
formatReadableSizeWithBinarySuffix(current_hard_limit));
}
+ bool peak_updated;
if (throw_if_memory_exceeded)
{
/// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc
BlockerInThread untrack_lock(VariableContext::Global);
bool log_memory_usage = true;
- updatePeak(will_be, log_memory_usage);
+ peak_updated = updatePeak(will_be, log_memory_usage);
}
else
{
bool log_memory_usage = false;
- updatePeak(will_be, log_memory_usage);
+ peak_updated = updatePeak(will_be, log_memory_usage);
+ }
+
+ if (peak_updated && allocation_traced)
+ {
+ BlockerInThread untrack_lock(VariableContext::Global);
+ DB::TraceCollector::collect(DB::TraceType::MemoryPeak, StackTrace(), will_be);
}
if (auto * loaded_next = parent.load(std::memory_order_relaxed))
@@ -259,7 +269,7 @@ void MemoryTracker::allocNoThrow(Int64 size)
allocImpl(size, throw_if_memory_exceeded);
}
-void MemoryTracker::updatePeak(Int64 will_be, bool log_memory_usage)
+bool MemoryTracker::updatePeak(Int64 will_be, bool log_memory_usage)
{
auto peak_old = peak.load(std::memory_order_relaxed);
if (will_be > peak_old) /// Races doesn't matter. Could rewrite with CAS, but not worth.
@@ -269,7 +279,10 @@ void MemoryTracker::updatePeak(Int64 will_be, bool log_memory_usage)
if (log_memory_usage && (level == VariableContext::Process || level == VariableContext::Global)
&& will_be / log_peak_memory_usage_every > peak_old / log_peak_memory_usage_every)
logMemoryUsage(will_be);
+
+ return true;
}
+ return false;
}
diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h
index 36560fec334..af5cdd1e41b 100644
--- a/src/Common/MemoryTracker.h
+++ b/src/Common/MemoryTracker.h
@@ -58,7 +58,7 @@ private:
/// This description will be used as prefix into log messages (if isn't nullptr)
std::atomic description_ptr = nullptr;
- void updatePeak(Int64 will_be, bool log_memory_usage);
+ bool updatePeak(Int64 will_be, bool log_memory_usage);
void logMemoryUsage(Int64 current) const;
public:
diff --git a/src/Common/TraceCollector.h b/src/Common/TraceCollector.h
index 86e9d659d0a..d3bbc74726e 100644
--- a/src/Common/TraceCollector.h
+++ b/src/Common/TraceCollector.h
@@ -20,7 +20,8 @@ enum class TraceType : uint8_t
Real,
CPU,
Memory,
- MemorySample
+ MemorySample,
+ MemoryPeak,
};
class TraceCollector
diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp
index dac27aebe58..c16a73e75dc 100644
--- a/src/Interpreters/TraceLog.cpp
+++ b/src/Interpreters/TraceLog.cpp
@@ -19,6 +19,7 @@ const TraceDataType::Values TraceLogElement::trace_values =
{"CPU", static_cast(TraceType::CPU)},
{"Memory", static_cast(TraceType::Memory)},
{"MemorySample", static_cast(TraceType::MemorySample)},
+ {"MemoryPeak", static_cast(TraceType::MemoryPeak)},
};
NamesAndTypesList TraceLogElement::getNamesAndTypes()
diff --git a/tests/queries/0_stateless/01092_memory_profiler.reference b/tests/queries/0_stateless/01092_memory_profiler.reference
index 0d66ea1aee9..986394f7c0f 100644
--- a/tests/queries/0_stateless/01092_memory_profiler.reference
+++ b/tests/queries/0_stateless/01092_memory_profiler.reference
@@ -1,2 +1,3 @@
0
1
+1
diff --git a/tests/queries/0_stateless/01092_memory_profiler.sql b/tests/queries/0_stateless/01092_memory_profiler.sql
index 0988f56413e..658877a1dee 100644
--- a/tests/queries/0_stateless/01092_memory_profiler.sql
+++ b/tests/queries/0_stateless/01092_memory_profiler.sql
@@ -6,3 +6,4 @@ SET memory_profiler_step = 1000000;
SELECT ignore(groupArray(number), 'test memory profiler') FROM numbers(10000000);
SYSTEM FLUSH LOGS;
WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND trace_type = 'Memory' AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND event_date >= yesterday() AND query LIKE '%test memory profiler%' ORDER BY event_time DESC LIMIT 1);
+WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND trace_type = 'MemoryPeak' AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND event_date >= yesterday() AND query LIKE '%test memory profiler%' ORDER BY event_time DESC LIMIT 1);
From 0c0427dc45e0e30b47ac65fd376e14f6f8d0894d Mon Sep 17 00:00:00 2001
From: Azat Khuzhin
Date: Thu, 7 Oct 2021 21:13:56 +0300
Subject: [PATCH 056/104] Cover memory_profiler_sample_probability in
01092_memory_profiler
---
tests/queries/0_stateless/01092_memory_profiler.reference | 1 +
tests/queries/0_stateless/01092_memory_profiler.sql | 3 +++
2 files changed, 4 insertions(+)
diff --git a/tests/queries/0_stateless/01092_memory_profiler.reference b/tests/queries/0_stateless/01092_memory_profiler.reference
index 986394f7c0f..74240c4b196 100644
--- a/tests/queries/0_stateless/01092_memory_profiler.reference
+++ b/tests/queries/0_stateless/01092_memory_profiler.reference
@@ -1,3 +1,4 @@
0
1
1
+1
diff --git a/tests/queries/0_stateless/01092_memory_profiler.sql b/tests/queries/0_stateless/01092_memory_profiler.sql
index 658877a1dee..b997550e093 100644
--- a/tests/queries/0_stateless/01092_memory_profiler.sql
+++ b/tests/queries/0_stateless/01092_memory_profiler.sql
@@ -3,7 +3,10 @@
SET allow_introspection_functions = 1;
SET memory_profiler_step = 1000000;
+SET memory_profiler_sample_probability = 1;
+
SELECT ignore(groupArray(number), 'test memory profiler') FROM numbers(10000000);
SYSTEM FLUSH LOGS;
WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND trace_type = 'Memory' AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND event_date >= yesterday() AND query LIKE '%test memory profiler%' ORDER BY event_time DESC LIMIT 1);
WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND trace_type = 'MemoryPeak' AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND event_date >= yesterday() AND query LIKE '%test memory profiler%' ORDER BY event_time DESC LIMIT 1);
+WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND trace_type = 'MemorySample' AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND event_date >= yesterday() AND query LIKE '%test memory profiler%' ORDER BY event_time DESC LIMIT 1);
From a171dfd75ec3f27e630696b46ee30f77bca66eb6 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin
Date: Thu, 7 Oct 2021 21:13:56 +0300
Subject: [PATCH 057/104] Make 01092_memory_profiler more robust for dev env
---
tests/queries/0_stateless/01092_memory_profiler.sql | 1 +
1 file changed, 1 insertion(+)
diff --git a/tests/queries/0_stateless/01092_memory_profiler.sql b/tests/queries/0_stateless/01092_memory_profiler.sql
index b997550e093..0db88bb14b3 100644
--- a/tests/queries/0_stateless/01092_memory_profiler.sql
+++ b/tests/queries/0_stateless/01092_memory_profiler.sql
@@ -4,6 +4,7 @@ SET allow_introspection_functions = 1;
SET memory_profiler_step = 1000000;
SET memory_profiler_sample_probability = 1;
+SET log_queries = 1;
SELECT ignore(groupArray(number), 'test memory profiler') FROM numbers(10000000);
SYSTEM FLUSH LOGS;
From 90cc63aecd37ffe7a3f6497b462be55540bc70a5 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov
Date: Thu, 7 Oct 2021 22:05:51 +0300
Subject: [PATCH 058/104] fix tests
---
tests/integration/helpers/cluster.py | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index 5ac4dbc8ad0..5ba67085d73 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -2357,17 +2357,20 @@ class ClickHouseInstance:
dictionaries_dir = p.abspath(p.join(instance_config_dir, 'dictionaries'))
os.mkdir(dictionaries_dir)
- def write_embedded_config(name, dest_dir):
+ def write_embedded_config(name, dest_dir, fix_log_level=False):
with open(p.join(HELPERS_DIR, name), 'r') as f:
data = f.read()
data = data.replace('yandex', self.config_root_name)
+ if fix_log_level:
+ data = data.replace('test', 'trace')
with open(p.join(dest_dir, name), 'w') as r:
r.write(data)
logging.debug("Copy common configuration from helpers")
# The file is named with 0_ prefix to be processed before other configuration overloads.
if self.copy_common_configs:
- write_embedded_config('0_common_instance_config.xml', self.config_d_dir)
+ need_fix_log_level = self.tag != 'latest'
+ write_embedded_config('0_common_instance_config.xml', self.config_d_dir, need_fix_log_level)
write_embedded_config('0_common_instance_users.xml', users_d_dir)
From c734ada95ba7d5f13591acd32edb38f716784f64 Mon Sep 17 00:00:00 2001
From: kssenii
Date: Thu, 7 Oct 2021 20:26:58 +0000
Subject: [PATCH 059/104] Fix
---
src/Client/ClientBase.cpp | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index cde5a5f9977..b68df11fd60 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -426,10 +426,8 @@ void ClientBase::processTextAsSingleQuery(const String & full_query)
catch (Exception & e)
{
if (!is_interactive)
- {
e.addMessage("(in query: {})", full_query);
- throw;
- }
+ throw;
}
if (have_error)
From e07a6f3fc0ea0b496483287d85b50d29f5a8c330 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin
Date: Thu, 7 Oct 2021 21:09:35 +0300
Subject: [PATCH 060/104] docker: add pandas/clickhouse_driver into test images
---
docker/test/fasttest/Dockerfile | 2 +-
docker/test/fuzzer/Dockerfile | 2 +-
docker/test/stateless/Dockerfile | 2 +-
docker/test/style/Dockerfile | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile
index 798910fb952..f50c65bb9f2 100644
--- a/docker/test/fasttest/Dockerfile
+++ b/docker/test/fasttest/Dockerfile
@@ -67,7 +67,7 @@ RUN apt-get update \
unixodbc \
--yes --no-install-recommends
-RUN pip3 install numpy scipy pandas Jinja2
+RUN pip3 install numpy scipy pandas Jinja2 pandas clickhouse_driver
# This symlink required by gcc to find lld compiler
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile
index 6444e745c47..13353bc2960 100644
--- a/docker/test/fuzzer/Dockerfile
+++ b/docker/test/fuzzer/Dockerfile
@@ -27,7 +27,7 @@ RUN apt-get update \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
-RUN pip3 install Jinja2
+RUN pip3 install Jinja2 pandas clickhouse_driver
COPY * /
diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile
index 7de8c061673..a5733d11dd2 100644
--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@@ -34,7 +34,7 @@ RUN apt-get update -y \
postgresql-client \
sqlite3
-RUN pip3 install numpy scipy pandas Jinja2
+RUN pip3 install numpy scipy pandas Jinja2 clickhouse_driver
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \
diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile
index 33cdb9db57a..64cc0c9c7b7 100644
--- a/docker/test/style/Dockerfile
+++ b/docker/test/style/Dockerfile
@@ -10,7 +10,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
python3-pip \
pylint \
yamllint \
- && pip3 install codespell
+ && pip3 install codespell pandas clickhouse_driver
COPY run.sh /
COPY process_style_check_result.py /
From 9dd0fca1edd383c00667ce4c1a953e4f6d2bca1a Mon Sep 17 00:00:00 2001
From: Azat Khuzhin
Date: Thu, 7 Oct 2021 23:45:18 +0300
Subject: [PATCH 061/104] Suppress some existed warnings in clickhouse-test
(will be fixed separately)
---
tests/clickhouse-test | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 19080f3934f..2c8093190ea 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1,6 +1,12 @@
#!/usr/bin/env python3
# pylint: disable=too-many-return-statements
+# pylint: disable=consider-using-f-string
+# pylint: disable=global-variable-not-assigned
+# pylint: disable=consider-using-with
+# pylint: disable=unspecified-encoding
+# pylint: disable=consider-using-min-builtin
+
import enum
import shutil
import sys
From df129d7efc70eb2abc394b72a0dd64c421de8549 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin
Date: Thu, 7 Oct 2021 21:05:42 +0300
Subject: [PATCH 062/104] Rewrite clickhouse-test to use python
clickhouse_driver
Pros:
- Using native protocol over executing binaries is always better
- `clickhouse-client` in debug build takes almost a second to execute simple `SELECT 1`
and `clickhouse-test` requires ~5 queries at start (determine some
flags, zk, alive, create database)
Notes:
- `FORMAT Vertical` had been replaced with printing of `pandas.DataFrame`
And after this patch tiny tests work with the speed of the test, and
does not requires +-5 seconds of bootstrapping.
---
tests/clickhouse-test | 424 +++++++++++++++++++-----------------------
1 file changed, 193 insertions(+), 231 deletions(-)
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 2c8093190ea..e8c85a6ae79 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -19,13 +19,10 @@ import traceback
import math
from argparse import ArgumentParser
-from typing import Tuple, Union, Optional, TextIO, Dict, Set, List
-import shlex
+from typing import Tuple, Union, Optional, Dict, Set, List
import subprocess
from subprocess import Popen
from subprocess import PIPE
-from subprocess import CalledProcessError
-from subprocess import TimeoutExpired
from datetime import datetime
from time import time, sleep
from errno import ESRCH
@@ -41,6 +38,9 @@ import multiprocessing
import socket
from contextlib import closing
+import clickhouse_driver
+import pandas
+
USE_JINJA = True
try:
import jinja2
@@ -48,20 +48,45 @@ except ImportError:
USE_JINJA = False
print('WARNING: jinja2 not installed! Template tests will be skipped.')
-DISTRIBUTED_DDL_TIMEOUT_MSG = "is executing longer than distributed_ddl_task_timeout"
-
MESSAGES_TO_RETRY = [
"ConnectionPoolWithFailover: Connection failed at try",
"DB::Exception: New table appeared in database being dropped or detached. Try again",
"is already started to be removing by another replica right now",
"DB::Exception: Cannot enqueue query",
- DISTRIBUTED_DDL_TIMEOUT_MSG # FIXME
+ "is executing longer than distributed_ddl_task_timeout" # FIXME
+]
+error_codes = clickhouse_driver.errors.ErrorCodes
+error_codes.NOT_A_LEADER = 529
+ERROR_CODES_TO_RETRY = [
+ error_codes.ALL_CONNECTION_TRIES_FAILED,
+ error_codes.DATABASE_NOT_EMPTY,
+ error_codes.NOT_A_LEADER,
+ error_codes.UNFINISHED,
]
MAX_RETRIES = 3
TEST_FILE_EXTENSIONS = ['.sql', '.sql.j2', '.sh', '.py', '.expect']
+class Client(clickhouse_driver.Client):
+ # return first column of the first row
+ def execute_one(self, *args, **kwargs):
+ return super().execute(*args, **kwargs)[0][0]
+
+ # return pandas.DataFrame
+ def execute_pandas(self, *args, **kwargs):
+ data = super().execute(*args, **kwargs, with_column_types=True)
+ return Client.__combine(data)
+
+ @staticmethod
+ def __combine(data):
+ cols = data[1]
+ rows = data[0]
+ header = [ i[0] for i in cols ]
+ data = pandas.DataFrame(data=rows, columns=header)
+ return data
+
+
class Terminated(KeyboardInterrupt):
pass
@@ -103,18 +128,16 @@ def get_db_engine(args, database_name):
def get_zookeeper_session_uptime(args):
+ global clickhouse_client
+
try:
- query = b"SELECT zookeeperSessionUptime()"
-
if args.replicated_database:
- query = b"SELECT min(materialize(zookeeperSessionUptime())) " \
- b"FROM clusterAllReplicas('test_cluster_database_replicated', system.one) "
-
- clickhouse_proc = open_client_process(args.client)
-
- (stdout, _) = clickhouse_proc.communicate((query), timeout=20)
-
- return int(stdout.decode('utf-8').strip())
+ return int(clickhouse_client.execute_one("""
+ SELECT min(materialize(zookeeperSessionUptime()))
+ FROM clusterAllReplicas('test_cluster_database_replicated', system.one)
+ """))
+ else:
+ return int(clickhouse_client.execute_one('SELECT zookeeperSessionUptime()'))
except:
return None
@@ -128,24 +151,31 @@ def need_retry(args, stdout, stderr, total_time):
return True
return any(msg in stdout for msg in MESSAGES_TO_RETRY) or any(msg in stderr for msg in MESSAGES_TO_RETRY)
+def need_retry_error(args, error, total_time):
+ # Sometimes we may get unexpected exception like "Replica is readonly" or "Shutdown is called for table"
+ # instead of "Session expired" or "Connection loss"
+ # Retry if session was expired during test execution
+ session_uptime = get_zookeeper_session_uptime(args)
+ if session_uptime is not None and session_uptime < math.ceil(total_time):
+ return True
+ if isinstance(error, clickhouse_driver.errors.Error):
+ if error.code in ERROR_CODES_TO_RETRY:
+ return True
+ if any(msg in error.message for msg in MESSAGES_TO_RETRY):
+ return True
+ return False
+
def get_processlist(args):
- try:
- query = b"SHOW PROCESSLIST FORMAT Vertical"
-
- if args.replicated_database:
- query = b"SELECT materialize((hostName(), tcpPort())) as host, * " \
- b"FROM clusterAllReplicas('test_cluster_database_replicated', system.processes) " \
- b"WHERE query NOT LIKE '%system.processes%' FORMAT Vertical"
-
- clickhouse_proc = open_client_process(args.client)
-
- (stdout, _) = clickhouse_proc.communicate((query), timeout=20)
-
- return False, stdout.decode('utf-8')
- except Exception as ex:
- print("Exception", ex)
- return True, ""
+ global clickhouse_client
+ if args.replicated_database:
+ return clickhouse_client.execute_pandas("""
+ SELECT materialize((hostName(), tcpPort())) as host, *
+ FROM clusterAllReplicas('test_cluster_database_replicated', system.processes)
+ WHERE query NOT LIKE '%system.processes%'
+ """)
+ else:
+ return clickhouse_client.execute_pandas('SHOW PROCESSLIST')
# collect server stacktraces using gdb
@@ -311,7 +341,8 @@ class TestCase:
return None
@staticmethod
- def configure_testcase_args(args, case_file, suite_tmp_dir, stderr_file):
+ def configure_testcase_args(args, case_file, suite_tmp_dir):
+ global clickhouse_client
testcase_args = copy.deepcopy(args)
testcase_args.testcase_start_time = datetime.now()
@@ -331,23 +362,11 @@ class TestCase:
database = 'test_{suffix}'.format(suffix=random_str())
- with open(stderr_file, 'w') as stderr:
- client_cmd = testcase_args.testcase_client + " " \
- + get_additional_client_options(args)
-
- clickhouse_proc_create = open_client_process(
- universal_newlines=True,
- client_args=client_cmd,
- stderr_file=stderr)
-
- try:
- clickhouse_proc_create.communicate(
- ("CREATE DATABASE " + database + get_db_engine(testcase_args, database)),
- timeout=testcase_args.timeout)
- except TimeoutExpired:
- total_time = (datetime.now() - testcase_args.testcase_start_time).total_seconds()
- return clickhouse_proc_create, "", "Timeout creating database {} before test".format(
- database), total_time
+ try:
+ clickhouse_client.execute("CREATE DATABASE " + database + get_db_engine(testcase_args, database), settings={'log_comment': testcase_basename})
+ except (TimeoutError, clickhouse_driver.errors.SocketTimeoutError):
+ total_time = (datetime.now() - testcase_args.testcase_start_time).total_seconds()
+ return None, "", f"Timeout creating database {database} before test", total_time
os.environ["CLICKHOUSE_DATABASE"] = database
# Set temporary directory to match the randomly generated database,
@@ -418,41 +437,42 @@ class TestCase:
def process_result_impl(self, proc, stdout: str, stderr: str, total_time: float):
description = ""
- if proc.returncode is None:
- try:
- proc.kill()
- except OSError as e:
- if e.errno != ESRCH:
- raise
+ if proc:
+ if proc.returncode is None:
+ try:
+ proc.kill()
+ except OSError as e:
+ if e.errno != ESRCH:
+ raise
- if stderr:
- description += stderr
- return TestResult(self.name, TestStatus.FAIL, FailureReason.TIMEOUT, total_time, description)
+ if stderr:
+ description += stderr
+ return TestResult(self.name, TestStatus.FAIL, FailureReason.TIMEOUT, total_time, description)
- if proc.returncode != 0:
- reason = FailureReason.EXIT_CODE
- description += str(proc.returncode)
+ if proc.returncode != 0:
+ reason = FailureReason.EXIT_CODE
+ description += str(proc.returncode)
- if stderr:
- description += "\n"
- description += stderr
+ if stderr:
+ description += "\n"
+ description += stderr
- # Stop on fatal errors like segmentation fault. They are sent to client via logs.
- if ' ' in stderr:
- reason = FailureReason.SERVER_DIED
+ # Stop on fatal errors like segmentation fault. They are sent to client via logs.
+ if ' ' in stderr:
+ reason = FailureReason.SERVER_DIED
- if self.testcase_args.stop \
- and ('Connection refused' in stderr or 'Attempt to read after eof' in stderr) \
- and 'Received exception from server' not in stderr:
- reason = FailureReason.SERVER_DIED
+ if self.testcase_args.stop \
+ and ('Connection refused' in stderr or 'Attempt to read after eof' in stderr) \
+ and 'Received exception from server' not in stderr:
+ reason = FailureReason.SERVER_DIED
- if os.path.isfile(self.stdout_file):
- description += ", result:\n\n"
- description += '\n'.join(open(self.stdout_file).read().split('\n')[:100])
- description += '\n'
+ if os.path.isfile(self.stdout_file):
+ description += ", result:\n\n"
+ description += '\n'.join(open(self.stdout_file).read().split('\n')[:100])
+ description += '\n'
- description += "\nstdout:\n{}\n".format(stdout)
- return TestResult(self.name, TestStatus.FAIL, reason, total_time, description)
+ description += "\nstdout:\n{}\n".format(stdout)
+ return TestResult(self.name, TestStatus.FAIL, reason, total_time, description)
if stderr:
description += "\n{}\n".format('\n'.join(stderr.split('\n')[:100]))
@@ -516,21 +536,12 @@ class TestCase:
@staticmethod
def send_test_name_failed(suite: str, case: str) -> bool:
- clickhouse_proc = open_client_process(args.client, universal_newlines=True)
-
- failed_to_check = False
-
+ global clickhouse_client
pid = os.getpid()
- query = f"SELECT 'Running test {suite}/{case} from pid={pid}';"
-
- try:
- clickhouse_proc.communicate((query), timeout=20)
- except:
- failed_to_check = True
-
- return failed_to_check or clickhouse_proc.returncode != 0
+ clickhouse_client.execute(f"SELECT 'Running test {suite}/{case} from pid={pid}'")
def run_single_test(self, server_logs_level, client_options):
+ global clickhouse_client
args = self.testcase_args
client = args.testcase_client
start_time = args.testcase_start_time
@@ -572,28 +583,13 @@ class TestCase:
need_drop_database = not maybe_passed
if need_drop_database:
- with open(self.stderr_file, 'a') as stderr:
- clickhouse_proc_create = open_client_process(client, universal_newlines=True, stderr_file=stderr)
-
seconds_left = max(args.timeout - (datetime.now() - start_time).total_seconds(), 20)
-
try:
- drop_database_query = "DROP DATABASE " + database
- if args.replicated_database:
- drop_database_query += " ON CLUSTER test_cluster_database_replicated"
- clickhouse_proc_create.communicate((drop_database_query), timeout=seconds_left)
- except TimeoutExpired:
- # kill test process because it can also hung
- if proc.returncode is None:
- try:
- proc.kill()
- except OSError as e:
- if e.errno != ESRCH:
- raise
-
+ with clickhouse_client.connection.timeout_setter(seconds_left):
+ clickhouse_client.execute("DROP DATABASE " + database)
+ except (TimeoutError, clickhouse_driver.errors.SocketTimeoutError):
total_time = (datetime.now() - start_time).total_seconds()
- return clickhouse_proc_create, "", f"Timeout dropping database {database} after test", total_time
-
+ return None, "", f"Timeout dropping database {database} after test", total_time
shutil.rmtree(args.test_tmp_dir)
total_time = (datetime.now() - start_time).total_seconds()
@@ -624,12 +620,15 @@ class TestCase:
if skip_reason is not None:
return TestResult(self.name, TestStatus.SKIPPED, skip_reason, 0., "")
- if args.testname and self.send_test_name_failed(suite.suite, self.case):
- description = "\nServer does not respond to health check\n"
- return TestResult(self.name, TestStatus.FAIL, FailureReason.SERVER_DIED, 0., description)
+ if args.testname:
+ try:
+ self.send_test_name_failed(suite.suite, self.case)
+ except:
+ return TestResult(self.name, TestStatus.FAIL, FailureReason.SERVER_DIED, 0.,
+ "\nServer does not respond to health check\n")
self.runs_count += 1
- self.testcase_args = self.configure_testcase_args(args, self.case_file, suite.suite_tmp_path, self.stderr_file)
+ self.testcase_args = self.configure_testcase_args(args, self.case_file, suite.suite_tmp_path)
proc, stdout, stderr, total_time = self.run_single_test(server_logs_level, client_options)
result = self.process_result_impl(proc, stdout, stderr, total_time)
@@ -794,12 +793,8 @@ class TestSuite:
@staticmethod
def readTestSuite(args, suite_dir_name: str):
def is_data_present():
- clickhouse_proc = open_client_process(args.client)
- (stdout, stderr) = clickhouse_proc.communicate(b"EXISTS TABLE test.hits")
- if clickhouse_proc.returncode != 0:
- raise CalledProcessError(clickhouse_proc.returncode, args.client, stderr)
-
- return stdout.startswith(b'1')
+ global clickhouse_client
+ return int(clickhouse_client.execute_one('EXISTS TABLE test.hits'))
base_dir = os.path.abspath(args.queries)
tmp_dir = os.path.abspath(args.tmp)
@@ -832,6 +827,7 @@ class TestSuite:
stop_time = None
+clickhouse_client = None
exit_code = None
server_died = None
stop_tests_triggered_lock = None
@@ -961,42 +957,26 @@ def run_tests_array(all_tests_with_params):
server_logs_level = "warning"
-def check_server_started(client, retry_count):
+def check_server_started(retry_count):
+ global clickhouse_client
print("Connecting to ClickHouse server...", end='')
sys.stdout.flush()
-
while retry_count > 0:
- clickhouse_proc = open_client_process(client)
- (stdout, stderr) = clickhouse_proc.communicate(b"SELECT 1")
-
- if clickhouse_proc.returncode == 0 and stdout.startswith(b"1"):
+ try:
+ clickhouse_client.execute('SELECT 1')
print(" OK")
sys.stdout.flush()
return True
-
- if clickhouse_proc.returncode == 210:
- # Connection refused, retry
+ except (ConnectionRefusedError, ConnectionResetError, clickhouse_driver.errors.NetworkError):
print('.', end='')
sys.stdout.flush()
retry_count -= 1
sleep(0.5)
continue
- code: int = clickhouse_proc.returncode
-
- print(f"\nClient invocation failed with code {code}:\n\
- stdout: {stdout}\n\
- stderr: {stderr}\n\
- args: {''.join(clickhouse_proc.args)}\n")
-
- sys.stdout.flush()
-
- return False
-
print('\nAll connection tries failed')
sys.stdout.flush()
-
return False
@@ -1012,60 +992,38 @@ class BuildFlags():
POLYMORPHIC_PARTS = 'polymorphic-parts'
-def collect_build_flags(client):
- clickhouse_proc = open_client_process(client)
- (stdout, stderr) = clickhouse_proc.communicate(b"SELECT value FROM system.build_options WHERE name = 'CXX_FLAGS'")
+def collect_build_flags():
+ global clickhouse_client
+
result = []
- if clickhouse_proc.returncode == 0:
- if b'-fsanitize=thread' in stdout:
- result.append(BuildFlags.THREAD)
- elif b'-fsanitize=address' in stdout:
- result.append(BuildFlags.ADDRESS)
- elif b'-fsanitize=undefined' in stdout:
- result.append(BuildFlags.UNDEFINED)
- elif b'-fsanitize=memory' in stdout:
- result.append(BuildFlags.MEMORY)
- else:
- raise Exception("Cannot get information about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))
+ value = clickhouse_client.execute_one("SELECT value FROM system.build_options WHERE name = 'CXX_FLAGS'")
+ if '-fsanitize=thread' in value:
+ result.append(BuildFlags.THREAD)
+ elif '-fsanitize=address' in value:
+ result.append(BuildFlags.ADDRESS)
+ elif '-fsanitize=undefined' in value:
+ result.append(BuildFlags.UNDEFINED)
+ elif '-fsanitize=memory' in value:
+ result.append(BuildFlags.MEMORY)
- clickhouse_proc = open_client_process(client)
- (stdout, stderr) = clickhouse_proc.communicate(b"SELECT value FROM system.build_options WHERE name = 'BUILD_TYPE'")
+ value = clickhouse_client.execute_one("SELECT value FROM system.build_options WHERE name = 'BUILD_TYPE'")
+ if 'Debug' in value:
+ result.append(BuildFlags.DEBUG)
+ elif 'RelWithDebInfo' in value or 'Release' in value:
+ result.append(BuildFlags.RELEASE)
- if clickhouse_proc.returncode == 0:
- if b'Debug' in stdout:
- result.append(BuildFlags.DEBUG)
- elif b'RelWithDebInfo' in stdout or b'Release' in stdout:
- result.append(BuildFlags.RELEASE)
- else:
- raise Exception("Cannot get information about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))
+ value = clickhouse_client.execute_one("SELECT value FROM system.build_options WHERE name = 'UNBUNDLED'")
+ if value in ('ON', '1'):
+ result.append(BuildFlags.UNBUNDLED)
- clickhouse_proc = open_client_process(client)
- (stdout, stderr) = clickhouse_proc.communicate(b"SELECT value FROM system.build_options WHERE name = 'UNBUNDLED'")
+ value = clickhouse_client.execute_one("SELECT value FROM system.settings WHERE name = 'default_database_engine'")
+ if value == 'Ordinary':
+ result.append(BuildFlags.ORDINARY_DATABASE)
- if clickhouse_proc.returncode == 0:
- if b'ON' in stdout or b'1' in stdout:
- result.append(BuildFlags.UNBUNDLED)
- else:
- raise Exception("Cannot get information about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))
-
- clickhouse_proc = open_client_process(client)
- (stdout, stderr) = clickhouse_proc.communicate(b"SELECT value FROM system.settings WHERE name = 'default_database_engine'")
-
- if clickhouse_proc.returncode == 0:
- if b'Ordinary' in stdout:
- result.append(BuildFlags.ORDINARY_DATABASE)
- else:
- raise Exception("Cannot get information about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))
-
- clickhouse_proc = open_client_process(client)
- (stdout, stderr) = clickhouse_proc.communicate(b"SELECT value FROM system.merge_tree_settings WHERE name = 'min_bytes_for_wide_part'")
-
- if clickhouse_proc.returncode == 0:
- if stdout == b'0\n':
- result.append(BuildFlags.POLYMORPHIC_PARTS)
- else:
- raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))
+ value = int(clickhouse_client.execute_one("SELECT value FROM system.merge_tree_settings WHERE name = 'min_bytes_for_wide_part'"))
+ if value == 0:
+ result.append(BuildFlags.POLYMORPHIC_PARTS)
return result
@@ -1092,16 +1050,6 @@ def extract_key(key: str) -> str:
args.configserver + key)[1]
-def open_client_process(
- client_args: str,
- universal_newlines: bool = False,
- stderr_file: Optional[TextIO] = None):
- return Popen(
- shlex.split(client_args), stdin=PIPE, stdout=PIPE,
- stderr=stderr_file if stderr_file is not None else PIPE,
- universal_newlines=True if universal_newlines else None)
-
-
def do_run_tests(jobs, test_suite: TestSuite, parallel):
if jobs > 1 and len(test_suite.parallel_tests) > 0:
print("Found", len(test_suite.parallel_tests), "parallel tests and", len(test_suite.sequential_tests), "sequential tests")
@@ -1170,8 +1118,9 @@ def main(args):
global exit_code
global server_logs_level
global restarted_tests
+ global clickhouse_client
- if not check_server_started(args.client, args.server_check_retries):
+ if not check_server_started(args.server_check_retries):
msg = "Server is not responding. Cannot execute 'SELECT 1' query. \
If you are using split build, you have to specify -c option."
if args.hung_check:
@@ -1181,13 +1130,12 @@ def main(args):
print_stacktraces()
raise Exception(msg)
- args.build_flags = collect_build_flags(args.client)
+ args.build_flags = collect_build_flags()
if args.skip:
args.skip = set(args.skip)
base_dir = os.path.abspath(args.queries)
- tmp_dir = os.path.abspath(args.tmp)
# Keep same default values as in queries/shell_config.sh
os.environ.setdefault("CLICKHOUSE_BINARY", args.binary)
@@ -1218,17 +1166,12 @@ def main(args):
create_database_retries = 0
while create_database_retries < MAX_RETRIES:
start_time = datetime.now()
-
- client_cmd = args.client + " " + get_additional_client_options(args)
-
- clickhouse_proc_create = open_client_process(client_cmd, universal_newlines=True)
-
- (stdout, stderr) = clickhouse_proc_create.communicate(("CREATE DATABASE IF NOT EXISTS " + db_name + get_db_engine(args, db_name)))
-
- total_time = (datetime.now() - start_time).total_seconds()
-
- if not need_retry(args, stdout, stderr, total_time):
- break
+ try:
+ clickhouse_client.execute("CREATE DATABASE IF NOT EXISTS " + db_name + get_db_engine(args, db_name))
+ except Exception as e:
+ total_time = (datetime.now() - start_time).total_seconds()
+ if not need_retry_error(args, e, total_time):
+ break
create_database_retries += 1
if args.database and args.database != "test":
@@ -1255,18 +1198,14 @@ def main(args):
# Some queries may execute in background for some time after test was finished. This is normal.
for _ in range(1, 60):
- timeout, processlist = get_processlist(args)
- if timeout or not processlist:
+ processlist = get_processlist(args)
+ if processlist.empty:
break
sleep(1)
- if timeout or processlist:
- if processlist:
- print(colored("\nFound hung queries in processlist:", args, "red", attrs=["bold"]))
- print(processlist)
- else:
- print(colored("Seems like server hung and cannot respond to queries", args, "red", attrs=["bold"]))
-
+ if not processlist.empty:
+ print(colored("\nFound hung queries in processlist:", args, "red", attrs=["bold"]))
+ print(processlist)
print_stacktraces()
exit_code.value = 1
@@ -1311,16 +1250,20 @@ def find_binary(name):
def get_additional_client_options(args):
if args.client_option:
return ' '.join('--' + option for option in args.client_option)
-
return ''
-
def get_additional_client_options_url(args):
if args.client_option:
return '&'.join(args.client_option)
-
return ''
+def get_additional_client_options_dict(args):
+ settings = {}
+ if args.client_option:
+ for key, value in map(lambda x: x.split('='), args.client_option):
+ settings[key] = value
+ return settings
+
if __name__ == '__main__':
stop_time = None
@@ -1439,14 +1382,24 @@ if __name__ == '__main__':
if args.configclient:
args.client += ' --config-file=' + args.configclient
- if os.getenv("CLICKHOUSE_HOST"):
- args.client += ' --host=' + os.getenv("CLICKHOUSE_HOST")
+ tcp_host = os.getenv("CLICKHOUSE_HOST")
+ if tcp_host is not None:
+ args.client += f' --host={tcp_host}'
+ else:
+ tcp_host = 'localhost'
- args.tcp_port = int(os.getenv("CLICKHOUSE_PORT_TCP", "9000"))
- args.client += f" --port={args.tcp_port}"
+ tcp_port = os.getenv("CLICKHOUSE_PORT_TCP")
+ if tcp_port is not None:
+ args.tcp_port = int(tcp_port)
+ args.client += f" --port={tcp_port}"
+ else:
+ args.tcp_port = 9000
- if os.getenv("CLICKHOUSE_DATABASE"):
- args.client += ' --database=' + os.getenv("CLICKHOUSE_DATABASE")
+ client_database = os.getenv("CLICKHOUSE_DATABASE")
+ if client_database is not None:
+ args.client += f' --database={client_database}'
+ else:
+ client_database = 'default'
if args.client_option:
# Set options for client
@@ -1474,4 +1427,13 @@ if __name__ == '__main__':
if args.jobs is None:
args.jobs = multiprocessing.cpu_count()
+ # configure pandas to make it more like Vertical format
+ pandas.options.display.max_columns = None
+ pandas.options.display.width = None
+
+ clickhouse_client = Client(host=tcp_host,
+ port=args.tcp_port,
+ database=client_database,
+ settings=get_additional_client_options_dict(args))
+
main(args)
From e2d6698244d43979b3fe2478dfdcd8dc3a91a0fd Mon Sep 17 00:00:00 2001
From: Azat Khuzhin
Date: Fri, 8 Oct 2021 00:07:05 +0300
Subject: [PATCH 063/104] clickhouse-test: do not use persistent connection for
simplicity (due to threads)
---
tests/clickhouse-test | 77 +++++++++++++++++++++----------------------
1 file changed, 38 insertions(+), 39 deletions(-)
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index e8c85a6ae79..6bbfa97ab66 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -86,6 +86,17 @@ class Client(clickhouse_driver.Client):
data = pandas.DataFrame(data=rows, columns=header)
return data
+# Helpers
+def make_clickhouse_client(base_args, *args, **kwargs):
+ return Client(host=base_args.tcp_host, port=base_args.tcp_port,
+ settings=get_additional_client_options_dict(base_args))
+def clickhouse_execute_one(base_args, *args, **kwargs):
+ return make_clickhouse_client(base_args).execute_one(*args, **kwargs)
+def clickhouse_execute(base_args, *args, **kwargs):
+ return make_clickhouse_client(base_args).execute(*args, **kwargs)
+def clickhouse_execute_pandas(base_args, *args, **kwargs):
+ return make_clickhouse_client(base_args).execute_pandas(*args, **kwargs)
+
class Terminated(KeyboardInterrupt):
pass
@@ -128,16 +139,14 @@ def get_db_engine(args, database_name):
def get_zookeeper_session_uptime(args):
- global clickhouse_client
-
try:
if args.replicated_database:
- return int(clickhouse_client.execute_one("""
+ return int(clickhouse_execute_one(args, """
SELECT min(materialize(zookeeperSessionUptime()))
FROM clusterAllReplicas('test_cluster_database_replicated', system.one)
"""))
else:
- return int(clickhouse_client.execute_one('SELECT zookeeperSessionUptime()'))
+ return int(clickhouse_execute_one(args, 'SELECT zookeeperSessionUptime()'))
except:
return None
@@ -167,15 +176,14 @@ def need_retry_error(args, error, total_time):
def get_processlist(args):
- global clickhouse_client
if args.replicated_database:
- return clickhouse_client.execute_pandas("""
+ return clickhouse_execute_pandas(args, """
SELECT materialize((hostName(), tcpPort())) as host, *
FROM clusterAllReplicas('test_cluster_database_replicated', system.processes)
WHERE query NOT LIKE '%system.processes%'
""")
else:
- return clickhouse_client.execute_pandas('SHOW PROCESSLIST')
+ return clickhouse_execute_pandas(args, 'SHOW PROCESSLIST')
# collect server stacktraces using gdb
@@ -342,7 +350,6 @@ class TestCase:
@staticmethod
def configure_testcase_args(args, case_file, suite_tmp_dir):
- global clickhouse_client
testcase_args = copy.deepcopy(args)
testcase_args.testcase_start_time = datetime.now()
@@ -363,7 +370,7 @@ class TestCase:
database = 'test_{suffix}'.format(suffix=random_str())
try:
- clickhouse_client.execute("CREATE DATABASE " + database + get_db_engine(testcase_args, database), settings={'log_comment': testcase_basename})
+ clickhouse_execute(args, "CREATE DATABASE " + database + get_db_engine(testcase_args, database), settings={'log_comment': testcase_basename})
except (TimeoutError, clickhouse_driver.errors.SocketTimeoutError):
total_time = (datetime.now() - testcase_args.testcase_start_time).total_seconds()
return None, "", f"Timeout creating database {database} before test", total_time
@@ -536,12 +543,10 @@ class TestCase:
@staticmethod
def send_test_name_failed(suite: str, case: str) -> bool:
- global clickhouse_client
pid = os.getpid()
- clickhouse_client.execute(f"SELECT 'Running test {suite}/{case} from pid={pid}'")
+ clickhouse_execute(args, f"SELECT 'Running test {suite}/{case} from pid={pid}'")
def run_single_test(self, server_logs_level, client_options):
- global clickhouse_client
args = self.testcase_args
client = args.testcase_client
start_time = args.testcase_start_time
@@ -585,8 +590,10 @@ class TestCase:
if need_drop_database:
seconds_left = max(args.timeout - (datetime.now() - start_time).total_seconds(), 20)
try:
- with clickhouse_client.connection.timeout_setter(seconds_left):
- clickhouse_client.execute("DROP DATABASE " + database)
+ client = make_clickhouse_client(args)
+ client.connection.force_connect()
+ with client.connection.timeout_setter(seconds_left):
+ client.execute("DROP DATABASE " + database)
except (TimeoutError, clickhouse_driver.errors.SocketTimeoutError):
total_time = (datetime.now() - start_time).total_seconds()
return None, "", f"Timeout dropping database {database} after test", total_time
@@ -793,8 +800,7 @@ class TestSuite:
@staticmethod
def readTestSuite(args, suite_dir_name: str):
def is_data_present():
- global clickhouse_client
- return int(clickhouse_client.execute_one('EXISTS TABLE test.hits'))
+ return int(clickhouse_execute_one(args, 'EXISTS TABLE test.hits'))
base_dir = os.path.abspath(args.queries)
tmp_dir = os.path.abspath(args.tmp)
@@ -827,7 +833,6 @@ class TestSuite:
stop_time = None
-clickhouse_client = None
exit_code = None
server_died = None
stop_tests_triggered_lock = None
@@ -957,14 +962,14 @@ def run_tests_array(all_tests_with_params):
server_logs_level = "warning"
-def check_server_started(retry_count):
- global clickhouse_client
+def check_server_started(args):
print("Connecting to ClickHouse server...", end='')
sys.stdout.flush()
+ retry_count = args.server_check_retries
while retry_count > 0:
try:
- clickhouse_client.execute('SELECT 1')
+ clickhouse_execute(args, 'SELECT 1')
print(" OK")
sys.stdout.flush()
return True
@@ -992,12 +997,10 @@ class BuildFlags():
POLYMORPHIC_PARTS = 'polymorphic-parts'
-def collect_build_flags():
- global clickhouse_client
-
+def collect_build_flags(args):
result = []
- value = clickhouse_client.execute_one("SELECT value FROM system.build_options WHERE name = 'CXX_FLAGS'")
+ value = clickhouse_execute_one(args, "SELECT value FROM system.build_options WHERE name = 'CXX_FLAGS'")
if '-fsanitize=thread' in value:
result.append(BuildFlags.THREAD)
elif '-fsanitize=address' in value:
@@ -1007,21 +1010,21 @@ def collect_build_flags():
elif '-fsanitize=memory' in value:
result.append(BuildFlags.MEMORY)
- value = clickhouse_client.execute_one("SELECT value FROM system.build_options WHERE name = 'BUILD_TYPE'")
+ value = clickhouse_execute_one(args, "SELECT value FROM system.build_options WHERE name = 'BUILD_TYPE'")
if 'Debug' in value:
result.append(BuildFlags.DEBUG)
elif 'RelWithDebInfo' in value or 'Release' in value:
result.append(BuildFlags.RELEASE)
- value = clickhouse_client.execute_one("SELECT value FROM system.build_options WHERE name = 'UNBUNDLED'")
+ value = clickhouse_execute_one(args, "SELECT value FROM system.build_options WHERE name = 'UNBUNDLED'")
if value in ('ON', '1'):
result.append(BuildFlags.UNBUNDLED)
- value = clickhouse_client.execute_one("SELECT value FROM system.settings WHERE name = 'default_database_engine'")
+ value = clickhouse_execute_one(args, "SELECT value FROM system.settings WHERE name = 'default_database_engine'")
if value == 'Ordinary':
result.append(BuildFlags.ORDINARY_DATABASE)
- value = int(clickhouse_client.execute_one("SELECT value FROM system.merge_tree_settings WHERE name = 'min_bytes_for_wide_part'"))
+ value = int(clickhouse_execute_one(args, "SELECT value FROM system.merge_tree_settings WHERE name = 'min_bytes_for_wide_part'"))
if value == 0:
result.append(BuildFlags.POLYMORPHIC_PARTS)
@@ -1118,9 +1121,8 @@ def main(args):
global exit_code
global server_logs_level
global restarted_tests
- global clickhouse_client
- if not check_server_started(args.server_check_retries):
+ if not check_server_started(args):
msg = "Server is not responding. Cannot execute 'SELECT 1' query. \
If you are using split build, you have to specify -c option."
if args.hung_check:
@@ -1130,7 +1132,7 @@ def main(args):
print_stacktraces()
raise Exception(msg)
- args.build_flags = collect_build_flags()
+ args.build_flags = collect_build_flags(args)
if args.skip:
args.skip = set(args.skip)
@@ -1167,7 +1169,7 @@ def main(args):
while create_database_retries < MAX_RETRIES:
start_time = datetime.now()
try:
- clickhouse_client.execute("CREATE DATABASE IF NOT EXISTS " + db_name + get_db_engine(args, db_name))
+ clickhouse_execute(args, "CREATE DATABASE IF NOT EXISTS " + db_name + get_db_engine(args, db_name))
except Exception as e:
total_time = (datetime.now() - start_time).total_seconds()
if not need_retry_error(args, e, total_time):
@@ -1384,9 +1386,10 @@ if __name__ == '__main__':
tcp_host = os.getenv("CLICKHOUSE_HOST")
if tcp_host is not None:
+ args.tcp_host = tcp_host
args.client += f' --host={tcp_host}'
else:
- tcp_host = 'localhost'
+ args.tcp_host = 'localhost'
tcp_port = os.getenv("CLICKHOUSE_PORT_TCP")
if tcp_port is not None:
@@ -1398,8 +1401,9 @@ if __name__ == '__main__':
client_database = os.getenv("CLICKHOUSE_DATABASE")
if client_database is not None:
args.client += f' --database={client_database}'
+ args.client_database = client_database
else:
- client_database = 'default'
+ args.client_database = 'default'
if args.client_option:
# Set options for client
@@ -1431,9 +1435,4 @@ if __name__ == '__main__':
pandas.options.display.max_columns = None
pandas.options.display.width = None
- clickhouse_client = Client(host=tcp_host,
- port=args.tcp_port,
- database=client_database,
- settings=get_additional_client_options_dict(args))
-
main(args)
From 004b71d5464cbc608a9f82371f504ec305c4779a Mon Sep 17 00:00:00 2001
From: pawelsz-rb <76971683+pawelsz-rb@users.noreply.github.com>
Date: Thu, 7 Oct 2021 15:27:14 -0700
Subject: [PATCH 064/104] Update adopters.md
added Rollbar
---
docs/en/introduction/adopters.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md
index 72ebe33292f..20bf9a10986 100644
--- a/docs/en/introduction/adopters.md
+++ b/docs/en/introduction/adopters.md
@@ -102,6 +102,7 @@ toc_title: Adopters
|