Merge branch 'master' into fix-debian

This commit is contained in:
Alexey Milovidov 2021-04-13 20:34:16 +03:00
commit afa138fa27
132 changed files with 2305 additions and 619 deletions

3
.gitmodules vendored
View File

@ -221,6 +221,9 @@
[submodule "contrib/NuRaft"]
path = contrib/NuRaft
url = https://github.com/ClickHouse-Extras/NuRaft.git
[submodule "contrib/nanodbc"]
path = contrib/nanodbc
url = https://github.com/ClickHouse-Extras/nanodbc.git
[submodule "contrib/datasketches-cpp"]
path = contrib/datasketches-cpp
url = https://github.com/ClickHouse-Extras/datasketches-cpp.git

View File

@ -14,7 +14,8 @@
#### New Feature
* Extended range of `DateTime64` to support dates from year 1925 to 2283. Improved support of `DateTime` around zero date (`1970-01-01`). [#9404](https://github.com/ClickHouse/ClickHouse/pull/9404) ([alexey-milovidov](https://github.com/alexey-milovidov), [Vasily Nemkov](https://github.com/Enmk)).
* Extended range of `DateTime64` to support dates from year 1925 to 2283. Improved support of `DateTime` around zero date (`1970-01-01`). [#9404](https://github.com/ClickHouse/ClickHouse/pull/9404) ([alexey-milovidov](https://github.com/alexey-milovidov), [Vasily Nemkov](https://github.com/Enmk)). Not every time and date functions are working for extended range of dates.
* Added support of Kerberos authentication for preconfigured users and HTTP requests (GSS-SPNEGO). [#14995](https://github.com/ClickHouse/ClickHouse/pull/14995) ([Denis Glazachev](https://github.com/traceon)).
* Add `prefer_column_name_to_alias` setting to use original column names instead of aliases. it is needed to be more compatible with common databases' aliasing rules. This is for [#9715](https://github.com/ClickHouse/ClickHouse/issues/9715) and [#9887](https://github.com/ClickHouse/ClickHouse/issues/9887). [#22044](https://github.com/ClickHouse/ClickHouse/pull/22044) ([Amos Bird](https://github.com/amosbird)).
* Added functions `dictGetChildren(dictionary, key)`, `dictGetDescendants(dictionary, key, level)`. Function `dictGetChildren` return all children as an array if indexes. It is a inverse transformation for `dictGetHierarchy`. Function `dictGetDescendants` return all descendants as if `dictGetChildren` was applied `level` times recursively. Zero `level` value is equivalent to infinity. Closes [#14656](https://github.com/ClickHouse/ClickHouse/issues/14656). [#22096](https://github.com/ClickHouse/ClickHouse/pull/22096) ([Maksim Kita](https://github.com/kitaisreal)).
* Added `executable_pool` dictionary source. Close [#14528](https://github.com/ClickHouse/ClickHouse/issues/14528). [#21321](https://github.com/ClickHouse/ClickHouse/pull/21321) ([Maksim Kita](https://github.com/kitaisreal)).

View File

@ -512,6 +512,7 @@ include (cmake/find/fastops.cmake)
include (cmake/find/odbc.cmake)
include (cmake/find/rocksdb.cmake)
include (cmake/find/libpqxx.cmake)
include (cmake/find/nanodbc.cmake)
include (cmake/find/nuraft.cmake)

35
cmake/find/nanodbc.cmake Normal file
View File

@ -0,0 +1,35 @@
option(ENABLE_NANODBC "Enalbe nanodbc" ${ENABLE_LIBRARIES})
if (NOT ENABLE_NANODBC)
set (USE_ODBC 0)
return()
endif()
if (NOT ENABLE_ODBC)
set (USE_NANODBC 0)
message (STATUS "Using nanodbc=${USE_NANODBC}")
return()
endif()
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/nanodbc/CMakeLists.txt")
message (WARNING "submodule contrib/nanodbc is missing. to fix try run: \n git submodule update --init --recursive")
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal nanodbc library")
set (USE_NANODBC 0)
return()
endif()
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/unixodbc/include")
message (ERROR "submodule contrib/unixodbc is missing. to fix try run: \n git submodule update --init --recursive")
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal unixodbc needed for nanodbc")
set (USE_NANODBC 0)
return()
endif()
set (USE_NANODBC 1)
set (NANODBC_LIBRARY nanodbc)
set (NANODBC_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/nanodbc/nanodbce")
message (STATUS "Using nanodbc=${USE_NANODBC}: ${NANODBC_INCLUDE_DIR} : ${NANODBC_LIBRARY}")
message (STATUS "Using unixodbc")

View File

@ -326,6 +326,10 @@ if (USE_LIBPQXX)
add_subdirectory (libpqxx-cmake)
endif()
if (USE_NANODBC)
add_subdirectory (nanodbc-cmake)
endif()
if (USE_NURAFT)
add_subdirectory(nuraft-cmake)
endif()

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit c35819f2c8a378d4ba88cc930c17bc20aeb875eb
Subproject commit d2feb5978b979729a07c3ca76eaa4ab94cef4ceb

1
contrib/nanodbc vendored Submodule

@ -0,0 +1 @@
Subproject commit 9fc459675515d491401727ec67fca38db721f28c

View File

@ -0,0 +1,14 @@
set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/nanodbc)
if (NOT TARGET unixodbc)
message(FATAL_ERROR "Configuration error: unixodbc is not a target")
endif()
set (SRCS
${LIBRARY_DIR}/nanodbc/nanodbc.cpp
)
add_library(nanodbc ${SRCS})
target_link_libraries (nanodbc PUBLIC unixodbc)
target_include_directories (nanodbc SYSTEM PUBLIC ${LIBRARY_DIR}/)

View File

@ -300,6 +300,7 @@ function run_tests
01663_aes_msan # Depends on OpenSSL
01667_aes_args_check # Depends on OpenSSL
01776_decrypt_aead_size_check # Depends on OpenSSL
01811_filter_by_null # Depends on OpenSSL
01281_unsucceeded_insert_select_queries_counter
01292_create_user
01294_lazy_database_concurrent

View File

@ -58,8 +58,7 @@ ClickHouse artificially executes `INSERT` longer (adds sleep) so that the
## inactive_parts_to_throw_insert {#inactive-parts-to-throw-insert}
If the number of inactive parts in a single partition more than the `inactive_parts_to_throw_insert` value, `INSERT` is interrupted with the `Too many inactive parts (N). Parts cleaning are processing significantly slower than inserts` exception.
If the number of inactive parts in a single partition more than the `inactive_parts_to_throw_insert` value, `INSERT` is interrupted with the "Too many inactive parts (N). Parts cleaning are processing significantly slower than inserts" exception.
Possible values:

View File

@ -1565,6 +1565,17 @@ Possible values:
Default value: 0
## optimize_skip_unused_shards_rewrite_in {#optimize-skip-unused-shardslrewrite-in}
Rewrite IN in query for remote shards to exclude values that does not belong to the shard (requires optimize_skip_unused_shards).
Possible values:
- 0 — Disabled.
- 1 — Enabled.
Default value: 1 (since it requires `optimize_skip_unused_shards` anyway, which `0` by default)
## allow_nondeterministic_optimize_skip_unused_shards {#allow-nondeterministic-optimize-skip-unused-shards}
Allow nondeterministic (like `rand` or `dictGet`, since later has some caveats with updates) functions in sharding key.

View File

@ -22,7 +22,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Структура таблицы может отличаться от исходной структуры таблицы PostgreSQL:
- Имена столбцов должны быть такими же, как в исходной таблице MySQL, но вы можете использовать только некоторые из этих столбцов и в любом порядке.
- Имена столбцов должны быть такими же, как в исходной таблице PostgreSQL, но вы можете использовать только некоторые из этих столбцов и в любом порядке.
- Типы столбцов могут отличаться от типов в исходной таблице PostgreSQL. ClickHouse пытается [приводить](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types.
- Настройка `external_table_functions_use_nulls` определяет как обрабатывать Nullable столбцы. По умолчанию 1, если 0 - табличная функция не будет делать nullable столбцы и будет вместо null выставлять значения по умолчанию для скалярного типа. Это также применимо для null значений внутри массивов.

View File

@ -55,6 +55,26 @@ Eсли число кусков в партиции превышает знач
ClickHouse искусственно выполняет `INSERT` дольше (добавляет sleep), чтобы фоновый механизм слияния успевал слиять куски быстрее, чем они добавляются.
## inactive_parts_to_throw_insert {#inactive-parts-to-throw-insert}
Если число неактивных кусков в партиции превышает значение `inactive_parts_to_throw_insert`, `INSERT` прерывается с исключением «Too many inactive parts (N). Parts cleaning are processing significantly slower than inserts».
Возможные значения:
- Положительное целое число.
Значение по умолчанию: 0 (не ограничено).
## inactive_parts_to_delay_insert {#inactive-parts-to-delay-insert}
Если число неактивных кусков в партиции больше или равно значению `inactive_parts_to_delay_insert`, `INSERT` искусственно замедляется. Это полезно, когда сервер не может быстро очистить неактивные куски.
Возможные значения:
- Положительное целое число.
Значение по умолчанию: 0 (не ограничено).
## max_delay_to_insert {#max-delay-to-insert}
Величина в секундах, которая используется для расчета задержки `INSERT`, если число кусков в партиции превышает значение [parts_to_delay_insert](#parts-to-delay-insert).

View File

@ -15,10 +15,12 @@
- [`groupBitOr`](../../sql-reference/aggregate-functions/reference/groupbitor.md#groupbitor)
- [`groupBitXor`](../../sql-reference/aggregate-functions/reference/groupbitxor.md#groupbitxor)
- [`groupArrayArray`](../../sql-reference/aggregate-functions/reference/grouparray.md#agg_function-grouparray)
- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference/groupuniqarray.md#groupuniqarray)
- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference/groupuniqarray.md)
- [`sumMap`](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap)
- [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap)
- [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap)
- [`argMin`](../../sql-reference/aggregate-functions/reference/argmin.md)
- [`argMax`](../../sql-reference/aggregate-functions/reference/argmax.md)
!!! note "Примечание"
Значения `SimpleAggregateFunction(func, Type)` отображаются и хранятся так же, как и `Type`, поэтому комбинаторы [-Merge](../../sql-reference/aggregate-functions/combinators.md#aggregate_functions_combinators-merge) и [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state) не требуются.

View File

@ -222,8 +222,8 @@ Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast)
{
String pk_column = primary_key_expr_list->children[i]->getColumnName();
if (pk_column != sorting_key_column)
throw Exception("Primary key must be a prefix of the sorting key, but in position "
+ toString(i) + " its column is " + pk_column + ", not " + sorting_key_column,
throw Exception("Primary key must be a prefix of the sorting key, but the column in the position "
+ toString(i) + " is " + sorting_key_column +", not " + pk_column,
ErrorCodes::BAD_ARGUMENTS);
if (!primary_key_columns_set.emplace(pk_column).second)

View File

@ -17,7 +17,6 @@ add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES})
target_link_libraries(clickhouse-library-bridge PRIVATE
daemon
dbms
clickhouse_parsers
bridge
)

View File

@ -26,11 +26,12 @@ target_link_libraries(clickhouse-odbc-bridge PRIVATE
dbms
bridge
clickhouse_parsers
Poco::Data
Poco::Data::ODBC
nanodbc
unixodbc
)
set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)
target_compile_options (clickhouse-odbc-bridge PRIVATE -Wno-reserved-id-macro -Wno-keyword-macro)
if (USE_GDB_ADD_INDEX)
add_custom_command(TARGET clickhouse-odbc-bridge POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} ../clickhouse-odbc-bridge COMMENT "Adding .gdb-index to clickhouse-odbc-bridge" VERBATIM)

View File

@ -2,29 +2,36 @@
#if USE_ODBC
# include <DataTypes/DataTypeFactory.h>
# include <DataTypes/DataTypeNullable.h>
# include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
# include <IO/WriteHelpers.h>
# include <Parsers/ParserQueryWithOutput.h>
# include <Parsers/parseQuery.h>
# include <Poco/Data/ODBC/ODBCException.h>
# include <Poco/Data/ODBC/SessionImpl.h>
# include <Poco/Data/ODBC/Utility.h>
# include <Server/HTTP/HTMLForm.h>
# include <Poco/Net/HTTPServerRequest.h>
# include <Poco/Net/HTTPServerResponse.h>
# include <Poco/NumberParser.h>
# include <common/logger_useful.h>
# include <Common/quoteString.h>
# include <ext/scope_guard.h>
# include "getIdentifierQuote.h"
# include "validateODBCConnectionString.h"
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeNullable.h>
#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
#include <IO/WriteHelpers.h>
#include <Parsers/ParserQueryWithOutput.h>
#include <Parsers/parseQuery.h>
#include <Server/HTTP/HTMLForm.h>
#include <Poco/Net/HTTPServerRequest.h>
#include <Poco/Net/HTTPServerResponse.h>
#include <Poco/NumberParser.h>
#include <common/logger_useful.h>
#include <Common/quoteString.h>
#include <ext/scope_guard.h>
#include "getIdentifierQuote.h"
#include "validateODBCConnectionString.h"
#include "ODBCConnectionFactory.h"
#include <sql.h>
#include <sqlext.h>
# define POCO_SQL_ODBC_CLASS Poco::Data::ODBC
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
}
namespace
{
DataTypePtr getDataType(SQLSMALLINT type)
@ -59,6 +66,7 @@ namespace
}
}
void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
HTMLForm params(request, request.getStream());
@ -77,88 +85,79 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ
process_error("No 'table' param in request URL");
return;
}
if (!params.has("connection_string"))
{
process_error("No 'connection_string' in request URL");
return;
}
std::string schema_name;
std::string table_name = params.get("table");
std::string connection_string = params.get("connection_string");
if (params.has("schema"))
{
schema_name = params.get("schema");
LOG_TRACE(log, "Will fetch info for table '{}'", schema_name + "." + table_name);
}
else
LOG_TRACE(log, "Will fetch info for table '{}'", table_name);
LOG_TRACE(log, "Got connection str '{}'", connection_string);
try
{
const bool external_table_functions_use_nulls = Poco::NumberParser::parseBool(params.get("external_table_functions_use_nulls", "false"));
POCO_SQL_ODBC_CLASS::SessionImpl session(validateODBCConnectionString(connection_string), DBMS_DEFAULT_CONNECT_TIMEOUT_SEC);
SQLHDBC hdbc = session.dbc().handle();
auto connection = ODBCConnectionFactory::instance().get(
validateODBCConnectionString(connection_string),
getContext()->getSettingsRef().odbc_bridge_connection_pool_size);
SQLHSTMT hstmt = nullptr;
nanodbc::catalog catalog(*connection);
std::string catalog_name;
if (POCO_SQL_ODBC_CLASS::Utility::isError(SQLAllocStmt(hdbc, &hstmt)))
throw POCO_SQL_ODBC_CLASS::ODBCException("Could not allocate connection handle.");
SCOPE_EXIT(SQLFreeStmt(hstmt, SQL_DROP));
const auto & context_settings = getContext()->getSettingsRef();
/// TODO Why not do SQLColumns instead?
std::string name = schema_name.empty() ? backQuoteIfNeed(table_name) : backQuoteIfNeed(schema_name) + "." + backQuoteIfNeed(table_name);
WriteBufferFromOwnString buf;
std::string input = "SELECT * FROM " + name + " WHERE 1 = 0";
ParserQueryWithOutput parser(input.data() + input.size());
ASTPtr select = parseQuery(parser, input.data(), input.data() + input.size(), "", context_settings.max_query_size, context_settings.max_parser_depth);
IAST::FormatSettings settings(buf, true);
settings.always_quote_identifiers = true;
settings.identifier_quoting_style = getQuotingStyle(hdbc);
select->format(settings);
std::string query = buf.str();
LOG_TRACE(log, "Inferring structure with query '{}'", query);
if (POCO_SQL_ODBC_CLASS::Utility::isError(POCO_SQL_ODBC_CLASS::SQLPrepare(hstmt, reinterpret_cast<SQLCHAR *>(query.data()), query.size())))
throw POCO_SQL_ODBC_CLASS::DescriptorException(session.dbc());
if (POCO_SQL_ODBC_CLASS::Utility::isError(SQLExecute(hstmt)))
throw POCO_SQL_ODBC_CLASS::StatementException(hstmt);
SQLSMALLINT cols = 0;
if (POCO_SQL_ODBC_CLASS::Utility::isError(SQLNumResultCols(hstmt, &cols)))
throw POCO_SQL_ODBC_CLASS::StatementException(hstmt);
/// TODO cols not checked
NamesAndTypesList columns;
for (SQLSMALLINT ncol = 1; ncol <= cols; ++ncol)
/// In XDBC tables it is allowed to pass either database_name or schema_name in table definion, but not both of them.
/// They both are passed as 'schema' parameter in request URL, so it is not clear whether it is database_name or schema_name passed.
/// If it is schema_name then we know that database is added in odbc.ini. But if we have database_name as 'schema',
/// it is not guaranteed. For nanodbc database_name must be either in odbc.ini or passed as catalog_name.
auto get_columns = [&]()
{
SQLSMALLINT type = 0;
/// TODO Why 301?
SQLCHAR column_name[301];
SQLSMALLINT is_nullable;
const auto result = POCO_SQL_ODBC_CLASS::SQLDescribeCol(hstmt, ncol, column_name, sizeof(column_name), nullptr, &type, nullptr, nullptr, &is_nullable);
if (POCO_SQL_ODBC_CLASS::Utility::isError(result))
throw POCO_SQL_ODBC_CLASS::StatementException(hstmt);
auto column_type = getDataType(type);
if (external_table_functions_use_nulls && is_nullable == SQL_NULLABLE)
nanodbc::catalog::tables tables = catalog.find_tables(table_name, /* type = */ "", /* schema = */ "", /* catalog = */ schema_name);
if (tables.next())
{
column_type = std::make_shared<DataTypeNullable>(column_type);
catalog_name = tables.table_catalog();
LOG_TRACE(log, "Will fetch info for table '{}.{}'", catalog_name, table_name);
return catalog.find_columns(/* column = */ "", table_name, /* schema = */ "", catalog_name);
}
columns.emplace_back(reinterpret_cast<char *>(column_name), std::move(column_type));
tables = catalog.find_tables(table_name, /* type = */ "", /* schema = */ schema_name);
if (tables.next())
{
catalog_name = tables.table_catalog();
LOG_TRACE(log, "Will fetch info for table '{}.{}.{}'", catalog_name, schema_name, table_name);
return catalog.find_columns(/* column = */ "", table_name, schema_name, catalog_name);
}
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table {} not found", schema_name.empty() ? table_name : schema_name + '.' + table_name);
};
nanodbc::catalog::columns columns_definition = get_columns();
NamesAndTypesList columns;
while (columns_definition.next())
{
SQLSMALLINT type = columns_definition.sql_data_type();
std::string column_name = columns_definition.column_name();
bool is_nullable = columns_definition.nullable() == SQL_NULLABLE;
auto column_type = getDataType(type);
if (external_table_functions_use_nulls && is_nullable == SQL_NULLABLE)
column_type = std::make_shared<DataTypeNullable>(column_type);
columns.emplace_back(column_name, std::move(column_type));
}
if (columns.empty())
throw Exception("Columns definition was not returned", ErrorCodes::LOGICAL_ERROR);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
try
{

View File

@ -2,16 +2,13 @@
#if USE_ODBC
# include <Interpreters/Context_fwd.h>
# include <Server/HTTP/HTTPRequestHandler.h>
# include <Common/config.h>
#include <Interpreters/Context_fwd.h>
#include <Interpreters/Context.h>
#include <Server/HTTP/HTTPRequestHandler.h>
#include <Common/config.h>
#include <Poco/Logger.h>
# include <Poco/Logger.h>
/** The structure of the table is taken from the query "SELECT * FROM table WHERE 1=0".
* TODO: It would be much better to utilize ODBC methods dedicated for columns description.
* If there is no such table, an exception is thrown.
*/
namespace DB
{
@ -19,7 +16,9 @@ class ODBCColumnsInfoHandler : public HTTPRequestHandler, WithContext
{
public:
ODBCColumnsInfoHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_), log(&Poco::Logger::get("ODBCColumnsInfoHandler")), keep_alive_timeout(keep_alive_timeout_)
: WithContext(context_)
, log(&Poco::Logger::get("ODBCColumnsInfoHandler"))
, keep_alive_timeout(keep_alive_timeout_)
{
}

View File

@ -38,9 +38,9 @@ std::unique_ptr<HTTPRequestHandler> ODBCBridgeHandlerFactory::createRequestHandl
return nullptr;
#endif
else if (uri.getPath() == "/write")
return std::make_unique<ODBCHandler>(pool_map, keep_alive_timeout, getContext(), "write");
return std::make_unique<ODBCHandler>(keep_alive_timeout, getContext(), "write");
else
return std::make_unique<ODBCHandler>(pool_map, keep_alive_timeout, getContext(), "read");
return std::make_unique<ODBCHandler>(keep_alive_timeout, getContext(), "read");
}
return nullptr;
}

View File

@ -6,14 +6,8 @@
#include "IdentifierQuoteHandler.h"
#include "MainHandler.h"
#include "SchemaAllowedHandler.h"
#include <Poco/Logger.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#include <Poco/Data/SessionPool.h>
#pragma GCC diagnostic pop
namespace DB
{
@ -24,9 +18,11 @@ class ODBCBridgeHandlerFactory : public HTTPRequestHandlerFactory, WithContext
{
public:
ODBCBridgeHandlerFactory(const std::string & name_, size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_), log(&Poco::Logger::get(name_)), name(name_), keep_alive_timeout(keep_alive_timeout_)
: WithContext(context_)
, log(&Poco::Logger::get(name_))
, name(name_)
, keep_alive_timeout(keep_alive_timeout_)
{
pool_map = std::make_shared<ODBCHandler::PoolMap>();
}
std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;
@ -35,7 +31,6 @@ private:
Poco::Logger * log;
std::string name;
size_t keep_alive_timeout;
std::shared_ptr<ODBCHandler::PoolMap> pool_map;
};
}

View File

@ -2,23 +2,20 @@
#if USE_ODBC
# include <DataTypes/DataTypeFactory.h>
# include <Server/HTTP/HTMLForm.h>
# include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
# include <IO/WriteHelpers.h>
# include <Parsers/ParserQueryWithOutput.h>
# include <Parsers/parseQuery.h>
# include <Poco/Data/ODBC/ODBCException.h>
# include <Poco/Data/ODBC/SessionImpl.h>
# include <Poco/Data/ODBC/Utility.h>
# include <Poco/Net/HTTPServerRequest.h>
# include <Poco/Net/HTTPServerResponse.h>
# include <common/logger_useful.h>
# include <ext/scope_guard.h>
# include "getIdentifierQuote.h"
# include "validateODBCConnectionString.h"
#include <DataTypes/DataTypeFactory.h>
#include <Server/HTTP/HTMLForm.h>
#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
#include <IO/WriteHelpers.h>
#include <Parsers/ParserQueryWithOutput.h>
#include <Parsers/parseQuery.h>
#include <Poco/Net/HTTPServerRequest.h>
#include <Poco/Net/HTTPServerResponse.h>
#include <common/logger_useful.h>
#include <ext/scope_guard.h>
#include "getIdentifierQuote.h"
#include "validateODBCConnectionString.h"
#include "ODBCConnectionFactory.h"
# define POCO_SQL_ODBC_CLASS Poco::Data::ODBC
namespace DB
{
@ -44,10 +41,12 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ
try
{
std::string connection_string = params.get("connection_string");
POCO_SQL_ODBC_CLASS::SessionImpl session(validateODBCConnectionString(connection_string), DBMS_DEFAULT_CONNECT_TIMEOUT_SEC);
SQLHDBC hdbc = session.dbc().handle();
auto identifier = getIdentifierQuote(hdbc);
auto connection = ODBCConnectionFactory::instance().get(
validateODBCConnectionString(connection_string),
getContext()->getSettingsRef().odbc_bridge_connection_pool_size);
auto identifier = getIdentifierQuote(*connection);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
try

View File

@ -11,11 +11,13 @@
namespace DB
{
class IdentifierQuoteHandler : public HTTPRequestHandler
class IdentifierQuoteHandler : public HTTPRequestHandler, WithContext
{
public:
IdentifierQuoteHandler(size_t keep_alive_timeout_, ContextPtr)
: log(&Poco::Logger::get("IdentifierQuoteHandler")), keep_alive_timeout(keep_alive_timeout_)
IdentifierQuoteHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, log(&Poco::Logger::get("IdentifierQuoteHandler"))
, keep_alive_timeout(keep_alive_timeout_)
{
}

View File

@ -18,18 +18,17 @@
#include <Processors/Formats/InputStreamFromInputFormat.h>
#include <common/logger_useful.h>
#include <Server/HTTP/HTMLForm.h>
#include "ODBCConnectionFactory.h"
#include <mutex>
#include <memory>
#include <nanodbc/nanodbc.h>
#if USE_ODBC
#include <Poco/Data/ODBC/SessionImpl.h>
#define POCO_SQL_ODBC_CLASS Poco::Data::ODBC
#endif
namespace DB
{
namespace
{
std::unique_ptr<Block> parseColumns(std::string && column_string)
@ -42,37 +41,6 @@ namespace
}
}
using PocoSessionPoolConstructor = std::function<std::shared_ptr<Poco::Data::SessionPool>()>;
/** Is used to adjust max size of default Poco thread pool. See issue #750
* Acquire the lock, resize pool and construct new Session.
*/
static std::shared_ptr<Poco::Data::SessionPool> createAndCheckResizePocoSessionPool(PocoSessionPoolConstructor pool_constr)
{
static std::mutex mutex;
Poco::ThreadPool & pool = Poco::ThreadPool::defaultPool();
/// NOTE: The lock don't guarantee that external users of the pool don't change its capacity
std::unique_lock lock(mutex);
if (pool.available() == 0)
pool.addCapacity(2 * std::max(pool.capacity(), 1));
return pool_constr();
}
ODBCHandler::PoolPtr ODBCHandler::getPool(const std::string & connection_str)
{
std::lock_guard lock(mutex);
if (!pool_map->count(connection_str))
{
pool_map->emplace(connection_str, createAndCheckResizePocoSessionPool([connection_str]
{
return std::make_shared<Poco::Data::SessionPool>("ODBC", validateODBCConnectionString(connection_str));
}));
}
return pool_map->at(connection_str);
}
void ODBCHandler::processError(HTTPServerResponse & response, const std::string & message)
{
@ -82,6 +50,7 @@ void ODBCHandler::processError(HTTPServerResponse & response, const std::string
LOG_WARNING(log, message);
}
void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
HTMLForm params(request);
@ -141,6 +110,10 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
try
{
auto connection = ODBCConnectionFactory::instance().get(
validateODBCConnectionString(connection_string),
getContext()->getSettingsRef().odbc_bridge_connection_pool_size);
if (mode == "write")
{
if (!params.has("db_name"))
@ -159,15 +132,12 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
auto quoting_style = IdentifierQuotingStyle::None;
#if USE_ODBC
POCO_SQL_ODBC_CLASS::SessionImpl session(validateODBCConnectionString(connection_string), DBMS_DEFAULT_CONNECT_TIMEOUT_SEC);
quoting_style = getQuotingStyle(session.dbc().handle());
quoting_style = getQuotingStyle(*connection);
#endif
auto pool = getPool(connection_string);
auto & read_buf = request.getStream();
auto input_format = FormatFactory::instance().getInput(format, read_buf, *sample_block, getContext(), max_block_size);
auto input_stream = std::make_shared<InputStreamFromInputFormat>(input_format);
ODBCBlockOutputStream output_stream(pool->get(), db_name, table_name, *sample_block, quoting_style);
ODBCBlockOutputStream output_stream(*connection, db_name, table_name, *sample_block, getContext(), quoting_style);
copyData(*input_stream, output_stream);
writeStringBinary("Ok.", out);
}
@ -176,10 +146,8 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
std::string query = params.get("query");
LOG_TRACE(log, "Query: {}", query);
BlockOutputStreamPtr writer
= FormatFactory::instance().getOutputStreamParallelIfPossible(format, out, *sample_block, getContext());
auto pool = getPool(connection_string);
ODBCBlockInputStream inp(pool->get(), query, *sample_block, max_block_size);
BlockOutputStreamPtr writer = FormatFactory::instance().getOutputStreamParallelIfPossible(format, out, *sample_block, getContext());
ODBCBlockInputStream inp(*connection, query, *sample_block, max_block_size);
copyData(inp, *writer);
}
}

View File

@ -2,13 +2,8 @@
#include <Interpreters/Context_fwd.h>
#include <Server/HTTP/HTTPRequestHandler.h>
#include <Poco/Logger.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#include <Poco/Data/SessionPool.h>
#pragma GCC diagnostic pop
#include <mutex>
#include <unordered_map>
@ -24,16 +19,12 @@ namespace DB
class ODBCHandler : public HTTPRequestHandler, WithContext
{
public:
using PoolPtr = std::shared_ptr<Poco::Data::SessionPool>;
using PoolMap = std::unordered_map<std::string, PoolPtr>;
ODBCHandler(std::shared_ptr<PoolMap> pool_map_,
ODBCHandler(
size_t keep_alive_timeout_,
ContextPtr context_,
const String & mode_)
: WithContext(context_)
, log(&Poco::Logger::get("ODBCHandler"))
, pool_map(pool_map_)
, keep_alive_timeout(keep_alive_timeout_)
, mode(mode_)
{
@ -44,13 +35,11 @@ public:
private:
Poco::Logger * log;
std::shared_ptr<PoolMap> pool_map;
size_t keep_alive_timeout;
String mode;
static inline std::mutex mutex;
PoolPtr getPool(const std::string & connection_str);
void processError(HTTPServerResponse & response, const std::string & message);
};

View File

@ -1,5 +1,7 @@
#include "ODBCBlockInputStream.h"
#include <vector>
#include <IO/ReadBufferFromString.h>
#include <DataTypes/DataTypeNullable.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
@ -14,137 +16,143 @@ namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
extern const int UNKNOWN_TYPE;
}
ODBCBlockInputStream::ODBCBlockInputStream(
Poco::Data::Session && session_, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_)
: session{session_}
, statement{(this->session << query_str, Poco::Data::Keywords::now)}
, result{statement}
, iterator{result.begin()}
nanodbc::connection & connection_, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_)
: log(&Poco::Logger::get("ODBCBlockInputStream"))
, max_block_size{max_block_size_}
, log(&Poco::Logger::get("ODBCBlockInputStream"))
, connection(connection_)
, query(query_str)
{
if (sample_block.columns() != result.columnCount())
throw Exception{"RecordSet contains " + toString(result.columnCount()) + " columns while " + toString(sample_block.columns())
+ " expected",
ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH};
description.init(sample_block);
}
namespace
{
using ValueType = ExternalResultDescription::ValueType;
void insertValue(IColumn & column, const ValueType type, const Poco::Dynamic::Var & value)
{
switch (type)
{
case ValueType::vtUInt8:
assert_cast<ColumnUInt8 &>(column).insertValue(value.convert<UInt64>());
break;
case ValueType::vtUInt16:
assert_cast<ColumnUInt16 &>(column).insertValue(value.convert<UInt64>());
break;
case ValueType::vtUInt32:
assert_cast<ColumnUInt32 &>(column).insertValue(value.convert<UInt64>());
break;
case ValueType::vtUInt64:
assert_cast<ColumnUInt64 &>(column).insertValue(value.convert<UInt64>());
break;
case ValueType::vtInt8:
assert_cast<ColumnInt8 &>(column).insertValue(value.convert<Int64>());
break;
case ValueType::vtInt16:
assert_cast<ColumnInt16 &>(column).insertValue(value.convert<Int64>());
break;
case ValueType::vtInt32:
assert_cast<ColumnInt32 &>(column).insertValue(value.convert<Int64>());
break;
case ValueType::vtInt64:
assert_cast<ColumnInt64 &>(column).insertValue(value.convert<Int64>());
break;
case ValueType::vtFloat32:
assert_cast<ColumnFloat32 &>(column).insertValue(value.convert<Float64>());
break;
case ValueType::vtFloat64:
assert_cast<ColumnFloat64 &>(column).insertValue(value.convert<Float64>());
break;
case ValueType::vtString:
assert_cast<ColumnString &>(column).insert(value.convert<String>());
break;
case ValueType::vtDate:
{
Poco::DateTime date = value.convert<Poco::DateTime>();
assert_cast<ColumnUInt16 &>(column).insertValue(UInt16{LocalDate(date.year(), date.month(), date.day()).getDayNum()});
break;
}
case ValueType::vtDateTime:
{
Poco::DateTime datetime = value.convert<Poco::DateTime>();
assert_cast<ColumnUInt32 &>(column).insertValue(DateLUT::instance().makeDateTime(
datetime.year(), datetime.month(), datetime.day(), datetime.hour(), datetime.minute(), datetime.second()));
break;
}
case ValueType::vtUUID:
assert_cast<ColumnUInt128 &>(column).insert(parse<UUID>(value.convert<std::string>()));
break;
default:
throw Exception("Unsupported value type", ErrorCodes::UNKNOWN_TYPE);
}
}
void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); }
result = execute(connection, NANODBC_TEXT(query));
}
Block ODBCBlockInputStream::readImpl()
{
if (iterator == result.end())
return {};
MutableColumns columns(description.sample_block.columns());
for (const auto i : ext::range(0, columns.size()))
columns[i] = description.sample_block.getByPosition(i).column->cloneEmpty();
if (finished)
return Block();
MutableColumns columns(description.sample_block.cloneEmptyColumns());
size_t num_rows = 0;
while (iterator != result.end())
while (true)
{
Poco::Data::Row & row = *iterator;
for (const auto idx : ext::range(0, row.fieldCount()))
if (!result.next())
{
/// TODO This is extremely slow.
const Poco::Dynamic::Var & value = row[idx];
finished = true;
break;
}
if (!value.isEmpty())
for (int idx = 0; idx < result.columns(); ++idx)
{
const auto & sample = description.sample_block.getByPosition(idx);
if (!result.is_null(idx))
{
if (description.types[idx].second)
bool is_nullable = description.types[idx].second;
if (is_nullable)
{
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[idx]);
insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value);
const auto & data_type = assert_cast<const DataTypeNullable &>(*sample.type);
insertValue(column_nullable.getNestedColumn(), data_type.getNestedType(), description.types[idx].first, result, idx);
column_nullable.getNullMapData().emplace_back(0);
}
else
insertValue(*columns[idx], description.types[idx].first, value);
{
insertValue(*columns[idx], sample.type, description.types[idx].first, result, idx);
}
}
else
insertDefaultValue(*columns[idx], *description.sample_block.getByPosition(idx).column);
insertDefaultValue(*columns[idx], *sample.column);
}
++iterator;
++num_rows;
if (num_rows == max_block_size)
if (++num_rows == max_block_size)
break;
}
return description.sample_block.cloneWithColumns(std::move(columns));
}
void ODBCBlockInputStream::insertValue(
IColumn & column, const DataTypePtr data_type, const ValueType type, nanodbc::result & row, size_t idx)
{
switch (type)
{
case ValueType::vtUInt8:
assert_cast<ColumnUInt8 &>(column).insertValue(row.get<uint16_t>(idx));
break;
case ValueType::vtUInt16:
assert_cast<ColumnUInt16 &>(column).insertValue(row.get<uint16_t>(idx));
break;
case ValueType::vtUInt32:
assert_cast<ColumnUInt32 &>(column).insertValue(row.get<uint32_t>(idx));
break;
case ValueType::vtUInt64:
assert_cast<ColumnUInt64 &>(column).insertValue(row.get<uint64_t>(idx));
break;
case ValueType::vtInt8:
assert_cast<ColumnInt8 &>(column).insertValue(row.get<int16_t>(idx));
break;
case ValueType::vtInt16:
assert_cast<ColumnInt16 &>(column).insertValue(row.get<int16_t>(idx));
break;
case ValueType::vtInt32:
assert_cast<ColumnInt32 &>(column).insertValue(row.get<int32_t>(idx));
break;
case ValueType::vtInt64:
assert_cast<ColumnInt64 &>(column).insertValue(row.get<int64_t>(idx));
break;
case ValueType::vtFloat32:
assert_cast<ColumnFloat32 &>(column).insertValue(row.get<float>(idx));
break;
case ValueType::vtFloat64:
assert_cast<ColumnFloat64 &>(column).insertValue(row.get<double>(idx));
break;
case ValueType::vtFixedString:[[fallthrough]];
case ValueType::vtString:
assert_cast<ColumnString &>(column).insert(row.get<std::string>(idx));
break;
case ValueType::vtUUID:
{
auto value = row.get<std::string>(idx);
assert_cast<ColumnUInt128 &>(column).insert(parse<UUID>(value.data(), value.size()));
break;
}
case ValueType::vtDate:
assert_cast<ColumnUInt16 &>(column).insertValue(UInt16{LocalDate{row.get<std::string>(idx)}.getDayNum()});
break;
case ValueType::vtDateTime:
{
auto value = row.get<std::string>(idx);
ReadBufferFromString in(value);
time_t time = 0;
readDateTimeText(time, in);
if (time < 0)
time = 0;
assert_cast<ColumnUInt32 &>(column).insertValue(time);
break;
}
case ValueType::vtDateTime64:[[fallthrough]];
case ValueType::vtDecimal32: [[fallthrough]];
case ValueType::vtDecimal64: [[fallthrough]];
case ValueType::vtDecimal128: [[fallthrough]];
case ValueType::vtDecimal256:
{
auto value = row.get<std::string>(idx);
ReadBufferFromString istr(value);
data_type->getDefaultSerialization()->deserializeWholeText(column, istr, FormatSettings{});
break;
}
default:
throw Exception("Unsupported value type", ErrorCodes::UNKNOWN_TYPE);
}
}
}

View File

@ -3,10 +3,8 @@
#include <string>
#include <Core/Block.h>
#include <DataStreams/IBlockInputStream.h>
#include <Poco/Data/RecordSet.h>
#include <Poco/Data/Session.h>
#include <Poco/Data/Statement.h>
#include <Core/ExternalResultDescription.h>
#include <nanodbc/nanodbc.h>
namespace DB
@ -15,25 +13,33 @@ namespace DB
class ODBCBlockInputStream final : public IBlockInputStream
{
public:
ODBCBlockInputStream(
Poco::Data::Session && session_, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_);
ODBCBlockInputStream(nanodbc::connection & connection_, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_);
String getName() const override { return "ODBC"; }
Block getHeader() const override { return description.sample_block.cloneEmpty(); }
private:
using QueryResult = std::shared_ptr<nanodbc::result>;
using ValueType = ExternalResultDescription::ValueType;
Block readImpl() override;
Poco::Data::Session session;
Poco::Data::Statement statement;
Poco::Data::RecordSet result;
Poco::Data::RecordSet::Iterator iterator;
static void insertValue(IColumn & column, const DataTypePtr data_type, const ValueType type, nanodbc::result & row, size_t idx);
static void insertDefaultValue(IColumn & column, const IColumn & sample_column)
{
column.insertFrom(sample_column, 0);
}
Poco::Logger * log;
const UInt64 max_block_size;
ExternalResultDescription description;
Poco::Logger * log;
nanodbc::connection & connection;
nanodbc::result result;
String query;
bool finished = false;
};
}

View File

@ -8,16 +8,14 @@
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTIdentifier.h>
#include "getIdentifierQuote.h"
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
#include <Formats/FormatFactory.h>
namespace DB
{
namespace ErrorCodes
{
extern const int UNKNOWN_TYPE;
}
namespace
{
using ValueType = ExternalResultDescription::ValueType;
@ -40,69 +38,21 @@ namespace
return buf.str();
}
std::string getQuestionMarks(size_t n)
{
std::string result = "(";
for (size_t i = 0; i < n; ++i)
{
if (i > 0)
result += ",";
result += "?";
}
return result + ")";
}
Poco::Dynamic::Var getVarFromField(const Field & field, const ValueType type)
{
switch (type)
{
case ValueType::vtUInt8:
return Poco::Dynamic::Var(static_cast<UInt64>(field.get<UInt64>())).convert<UInt64>();
case ValueType::vtUInt16:
return Poco::Dynamic::Var(static_cast<UInt64>(field.get<UInt64>())).convert<UInt64>();
case ValueType::vtUInt32:
return Poco::Dynamic::Var(static_cast<UInt64>(field.get<UInt64>())).convert<UInt64>();
case ValueType::vtUInt64:
return Poco::Dynamic::Var(field.get<UInt64>()).convert<UInt64>();
case ValueType::vtInt8:
return Poco::Dynamic::Var(static_cast<Int64>(field.get<Int64>())).convert<Int64>();
case ValueType::vtInt16:
return Poco::Dynamic::Var(static_cast<Int64>(field.get<Int64>())).convert<Int64>();
case ValueType::vtInt32:
return Poco::Dynamic::Var(static_cast<Int64>(field.get<Int64>())).convert<Int64>();
case ValueType::vtInt64:
return Poco::Dynamic::Var(field.get<Int64>()).convert<Int64>();
case ValueType::vtFloat32:
return Poco::Dynamic::Var(field.get<Float64>()).convert<Float64>();
case ValueType::vtFloat64:
return Poco::Dynamic::Var(field.get<Float64>()).convert<Float64>();
case ValueType::vtString:
return Poco::Dynamic::Var(field.get<String>()).convert<String>();
case ValueType::vtDate:
return Poco::Dynamic::Var(LocalDate(DayNum(field.get<UInt64>())).toString()).convert<String>();
case ValueType::vtDateTime:
return Poco::Dynamic::Var(DateLUT::instance().timeToString(time_t(field.get<UInt64>()))).convert<String>();
case ValueType::vtUUID:
return Poco::Dynamic::Var(UUID(field.get<UInt128>()).toUnderType().toHexString()).convert<std::string>();
default:
throw Exception("Unsupported value type", ErrorCodes::UNKNOWN_TYPE);
}
__builtin_unreachable();
}
}
ODBCBlockOutputStream::ODBCBlockOutputStream(Poco::Data::Session && session_,
ODBCBlockOutputStream::ODBCBlockOutputStream(nanodbc::connection & connection_,
const std::string & remote_database_name_,
const std::string & remote_table_name_,
const Block & sample_block_,
ContextPtr local_context_,
IdentifierQuotingStyle quoting_)
: session(session_)
: log(&Poco::Logger::get("ODBCBlockOutputStream"))
, connection(connection_)
, db_name(remote_database_name_)
, table_name(remote_table_name_)
, sample_block(sample_block_)
, local_context(local_context_)
, quoting(quoting_)
, log(&Poco::Logger::get("ODBCBlockOutputStream"))
{
description.init(sample_block);
}
@ -114,28 +64,12 @@ Block ODBCBlockOutputStream::getHeader() const
void ODBCBlockOutputStream::write(const Block & block)
{
ColumnsWithTypeAndName columns;
for (size_t i = 0; i < block.columns(); ++i)
columns.push_back({block.getColumns()[i], sample_block.getDataTypes()[i], sample_block.getNames()[i]});
WriteBufferFromOwnString values_buf;
auto writer = FormatFactory::instance().getOutputStream("Values", values_buf, sample_block, local_context);
writer->write(block);
std::vector<Poco::Dynamic::Var> row_to_insert(block.columns());
Poco::Data::Statement statement(session << getInsertQuery(db_name, table_name, columns, quoting) + getQuestionMarks(block.columns()));
for (size_t i = 0; i < block.columns(); ++i)
statement.addBind(Poco::Data::Keywords::use(row_to_insert[i]));
for (size_t i = 0; i < block.rows(); ++i)
{
for (size_t col_idx = 0; col_idx < block.columns(); ++col_idx)
{
Field val;
columns[col_idx].column->get(i, val);
if (val.isNull())
row_to_insert[col_idx] = Poco::Dynamic::Var();
else
row_to_insert[col_idx] = getVarFromField(val, description.types[col_idx].first);
}
statement.execute();
}
std::string query = getInsertQuery(db_name, table_name, block.getColumnsWithTypeAndName(), quoting) + values_buf.str();
execute(connection, query);
}
}

View File

@ -2,30 +2,41 @@
#include <Core/Block.h>
#include <DataStreams/IBlockOutputStream.h>
#include <Poco/Data/Session.h>
#include <Core/ExternalResultDescription.h>
#include <Parsers/IdentifierQuotingStyle.h>
#include <Interpreters/Context_fwd.h>
#include <nanodbc/nanodbc.h>
namespace DB
{
class ODBCBlockOutputStream : public IBlockOutputStream
{
public:
ODBCBlockOutputStream(Poco::Data::Session && session_, const std::string & remote_database_name_,
const std::string & remote_table_name_, const Block & sample_block_, IdentifierQuotingStyle quoting);
ODBCBlockOutputStream(
nanodbc::connection & connection_,
const std::string & remote_database_name_,
const std::string & remote_table_name_,
const Block & sample_block_,
ContextPtr local_context_,
IdentifierQuotingStyle quoting);
Block getHeader() const override;
void write(const Block & block) override;
private:
Poco::Data::Session session;
Poco::Logger * log;
nanodbc::connection & connection;
std::string db_name;
std::string table_name;
Block sample_block;
ContextPtr local_context;
IdentifierQuotingStyle quoting;
ExternalResultDescription description;
Poco::Logger * log;
};
}

View File

@ -0,0 +1,82 @@
#pragma once
#include <common/logger_useful.h>
#include <nanodbc/nanodbc.h>
#include <mutex>
#include <Common/BorrowedObjectPool.h>
#include <unordered_map>
namespace nanodbc
{
static constexpr inline auto ODBC_CONNECT_TIMEOUT = 100;
using ConnectionPtr = std::shared_ptr<nanodbc::connection>;
using Pool = BorrowedObjectPool<ConnectionPtr>;
using PoolPtr = std::shared_ptr<Pool>;
class ConnectionHolder
{
public:
ConnectionHolder(const std::string & connection_string_, PoolPtr pool_) : connection_string(connection_string_), pool(pool_) {}
~ConnectionHolder()
{
if (connection)
pool->returnObject(std::move(connection));
}
nanodbc::connection & operator*()
{
if (!connection)
{
pool->borrowObject(connection, [&]()
{
return std::make_shared<nanodbc::connection>(connection_string, ODBC_CONNECT_TIMEOUT);
});
}
return *connection;
}
private:
std::string connection_string;
PoolPtr pool;
ConnectionPtr connection;
};
}
namespace DB
{
class ODBCConnectionFactory final : private boost::noncopyable
{
public:
static ODBCConnectionFactory & instance()
{
static ODBCConnectionFactory ret;
return ret;
}
nanodbc::ConnectionHolder get(const std::string & connection_string, size_t pool_size)
{
std::lock_guard lock(mutex);
if (!factory.count(connection_string))
factory.emplace(std::make_pair(connection_string, std::make_shared<nanodbc::Pool>(pool_size)));
return nanodbc::ConnectionHolder(connection_string, factory[connection_string]);
}
private:
/// [connection_settings_string] -> [connection_pool]
using PoolFactory = std::unordered_map<std::string, nanodbc::PoolPtr>;
PoolFactory factory;
std::mutex mutex;
};
}

View File

@ -2,33 +2,26 @@
#if USE_ODBC
# include <Server/HTTP/HTMLForm.h>
# include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
# include <IO/WriteHelpers.h>
# include <Poco/Data/ODBC/ODBCException.h>
# include <Poco/Data/ODBC/SessionImpl.h>
# include <Poco/Data/ODBC/Utility.h>
# include <Poco/Net/HTTPServerRequest.h>
# include <Poco/Net/HTTPServerResponse.h>
# include <common/logger_useful.h>
# include "validateODBCConnectionString.h"
#include <Server/HTTP/HTMLForm.h>
#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
#include <IO/WriteHelpers.h>
#include <Poco/Net/HTTPServerRequest.h>
#include <Poco/Net/HTTPServerResponse.h>
#include <common/logger_useful.h>
#include "validateODBCConnectionString.h"
#include "ODBCConnectionFactory.h"
#include <sql.h>
#include <sqlext.h>
# define POCO_SQL_ODBC_CLASS Poco::Data::ODBC
namespace DB
{
namespace
{
bool isSchemaAllowed(SQLHDBC hdbc)
bool isSchemaAllowed(nanodbc::connection & connection)
{
SQLUINTEGER value;
SQLSMALLINT value_length = sizeof(value);
SQLRETURN r = POCO_SQL_ODBC_CLASS::SQLGetInfo(hdbc, SQL_SCHEMA_USAGE, &value, sizeof(value), &value_length);
if (POCO_SQL_ODBC_CLASS::Utility::isError(r))
throw POCO_SQL_ODBC_CLASS::ConnectionException(hdbc);
return value != 0;
uint32_t result = connection.get_info<uint32_t>(SQL_SCHEMA_USAGE);
return result != 0;
}
}
@ -55,10 +48,12 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer
try
{
std::string connection_string = params.get("connection_string");
POCO_SQL_ODBC_CLASS::SessionImpl session(validateODBCConnectionString(connection_string), DBMS_DEFAULT_CONNECT_TIMEOUT_SEC);
SQLHDBC hdbc = session.dbc().handle();
bool result = isSchemaAllowed(hdbc);
auto connection = ODBCConnectionFactory::instance().get(
validateODBCConnectionString(connection_string),
getContext()->getSettingsRef().odbc_bridge_connection_pool_size);
bool result = isSchemaAllowed(*connection);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
try

View File

@ -1,22 +1,25 @@
#pragma once
#include <Interpreters/Context.h>
#include <Server/HTTP/HTTPRequestHandler.h>
#include <Poco/Logger.h>
#if USE_ODBC
namespace DB
{
class Context;
/// This handler establishes connection to database, and retrieves whether schema is allowed.
class SchemaAllowedHandler : public HTTPRequestHandler
class SchemaAllowedHandler : public HTTPRequestHandler, WithContext
{
public:
SchemaAllowedHandler(size_t keep_alive_timeout_, ContextPtr)
: log(&Poco::Logger::get("SchemaAllowedHandler")), keep_alive_timeout(keep_alive_timeout_)
SchemaAllowedHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, log(&Poco::Logger::get("SchemaAllowedHandler"))
, keep_alive_timeout(keep_alive_timeout_)
{
}

View File

@ -2,11 +2,10 @@
#if USE_ODBC
# include <Poco/Data/ODBC/ODBCException.h>
# include <Poco/Data/ODBC/SessionImpl.h>
# include <Poco/Data/ODBC/Utility.h>
# define POCO_SQL_ODBC_CLASS Poco::Data::ODBC
#include <common/logger_useful.h>
#include <nanodbc/nanodbc.h>
#include <sql.h>
#include <sqlext.h>
namespace DB
@ -17,33 +16,16 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
std::string getIdentifierQuote(SQLHDBC hdbc)
std::string getIdentifierQuote(nanodbc::connection & connection)
{
std::string identifier;
SQLSMALLINT t;
SQLRETURN r = POCO_SQL_ODBC_CLASS::SQLGetInfo(hdbc, SQL_IDENTIFIER_QUOTE_CHAR, nullptr, 0, &t);
if (POCO_SQL_ODBC_CLASS::Utility::isError(r))
throw POCO_SQL_ODBC_CLASS::ConnectionException(hdbc);
if (t > 0)
{
// I have no idea, why to add '2' here, got from: contrib/poco/Data/ODBC/src/ODBCStatementImpl.cpp:60 (SQL_DRIVER_NAME)
identifier.resize(static_cast<std::size_t>(t) + 2);
if (POCO_SQL_ODBC_CLASS::Utility::isError(POCO_SQL_ODBC_CLASS::SQLGetInfo(
hdbc, SQL_IDENTIFIER_QUOTE_CHAR, &identifier[0], SQLSMALLINT((identifier.length() - 1) * sizeof(identifier[0])), &t)))
throw POCO_SQL_ODBC_CLASS::ConnectionException(hdbc);
identifier.resize(static_cast<std::size_t>(t));
}
return identifier;
return connection.get_info<std::string>(SQL_IDENTIFIER_QUOTE_CHAR);
}
IdentifierQuotingStyle getQuotingStyle(SQLHDBC hdbc)
IdentifierQuotingStyle getQuotingStyle(nanodbc::connection & connection)
{
auto identifier_quote = getIdentifierQuote(hdbc);
auto identifier_quote = getIdentifierQuote(connection);
if (identifier_quote.length() == 0)
return IdentifierQuotingStyle::None;
else if (identifier_quote[0] == '`')

View File

@ -2,20 +2,19 @@
#if USE_ODBC
# include <Interpreters/Context.h>
# include <Poco/Logger.h>
# include <Poco/Net/HTTPRequestHandler.h>
# include <Poco/Data/ODBC/Utility.h>
#include <Interpreters/Context.h>
#include <Poco/Logger.h>
#include <Poco/Net/HTTPRequestHandler.h>
#include <Parsers/IdentifierQuotingStyle.h>
#include <nanodbc/nanodbc.h>
namespace DB
{
std::string getIdentifierQuote(SQLHDBC hdbc);
std::string getIdentifierQuote(nanodbc::connection & connection);
IdentifierQuotingStyle getQuotingStyle(SQLHDBC hdbc);
IdentifierQuotingStyle getQuotingStyle(nanodbc::connection & connection);
}

View File

@ -750,6 +750,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
global_context->setClustersConfig(config);
global_context->setMacros(std::make_unique<Macros>(*config, "macros", log));
global_context->setExternalAuthenticatorsConfig(*config);
global_context->setExternalModelsConfig(config);
/// Setup protection to avoid accidental DROP for big tables (that are greater than 50 GB by default)
if (config->has("max_table_size_to_drop"))
@ -878,10 +879,30 @@ int Server::main(const std::vector<std::string> & /*args*/)
servers_to_start_before_tables->emplace_back(
port_name,
std::make_unique<Poco::Net::TCPServer>(
new KeeperTCPHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
new KeeperTCPHandlerFactory(*this, false), server_pool, socket, new Poco::Net::TCPServerParams));
LOG_INFO(log, "Listening for connections to Keeper (tcp): {}", address.toString());
});
const char * secure_port_name = "keeper_server.tcp_port_secure";
createServer(listen_host, secure_port_name, listen_try, [&](UInt16 port)
{
#if USE_SSL
Poco::Net::SecureServerSocket socket;
auto address = socketBindListen(socket, listen_host, port, /* secure = */ true);
socket.setReceiveTimeout(settings.receive_timeout);
socket.setSendTimeout(settings.send_timeout);
servers_to_start_before_tables->emplace_back(
secure_port_name,
std::make_unique<Poco::Net::TCPServer>(
new KeeperTCPHandlerFactory(*this, true), server_pool, socket, new Poco::Net::TCPServerParams));
LOG_INFO(log, "Listening for connections to Keeper with secure protocol (tcp_secure): {}", address.toString());
#else
UNUSED(port);
throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.",
ErrorCodes::SUPPORT_IS_DISABLED};
#endif
});
}
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination.");
@ -1302,7 +1323,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
}
/// try to load dictionaries immediately, throw on error and die
ext::scope_guard dictionaries_xmls, models_xmls;
ext::scope_guard dictionaries_xmls;
try
{
if (!config().getBool("dictionaries_lazy_load", true))
@ -1312,8 +1333,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
}
dictionaries_xmls = global_context->getExternalDictionariesLoader().addConfigRepository(
std::make_unique<ExternalLoaderXMLConfigRepository>(config(), "dictionaries_config"));
models_xmls = global_context->getExternalModelsLoader().addConfigRepository(
std::make_unique<ExternalLoaderXMLConfigRepository>(config(), "models_config"));
}
catch (...)
{

View File

@ -13,17 +13,25 @@ namespace ErrorCodes
void AggregateFunctionCombinatorFactory::registerCombinator(const AggregateFunctionCombinatorPtr & value)
{
if (!dict.emplace(value->getName(), value).second)
throw Exception("AggregateFunctionCombinatorFactory: the name '" + value->getName() + "' is not unique",
ErrorCodes::LOGICAL_ERROR);
CombinatorPair pair{
.name = value->getName(),
.combinator_ptr = value,
};
/// lower_bound() cannot be used since sort order of the dict is by length of the combinator
/// but there are just a few combiners, so not a problem.
if (std::find(dict.begin(), dict.end(), pair) != dict.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionCombinatorFactory: the name '{}' is not unique",
value->getName());
dict.emplace(std::lower_bound(dict.begin(), dict.end(), pair), pair);
}
AggregateFunctionCombinatorPtr AggregateFunctionCombinatorFactory::tryFindSuffix(const std::string & name) const
{
/// O(N) is ok for just a few combinators.
for (const auto & suffix_value : dict)
if (endsWith(name, suffix_value.first))
return suffix_value.second;
if (endsWith(name, suffix_value.name))
return suffix_value.combinator_ptr;
return {};
}

View File

@ -15,7 +15,17 @@ namespace DB
class AggregateFunctionCombinatorFactory final: private boost::noncopyable
{
private:
using Dict = std::unordered_map<std::string, AggregateFunctionCombinatorPtr>;
struct CombinatorPair
{
std::string name;
AggregateFunctionCombinatorPtr combinator_ptr;
bool operator==(const CombinatorPair & rhs) const { return name == rhs.name; }
/// Sort by the length of the combinator name for proper tryFindSuffix()
/// for combiners with common prefix (i.e. "State" and "SimpleState").
bool operator<(const CombinatorPair & rhs) const { return name.length() > rhs.name.length(); }
};
using Dict = std::vector<CombinatorPair>;
Dict dict;
public:

View File

@ -55,7 +55,7 @@ std::vector<Connection *> HedgedConnectionsFactory::getManyConnections(PoolMode
{
size_t min_entries = (settings && settings->skip_unavailable_shards) ? 0 : 1;
size_t max_entries;
size_t max_entries = 1;
switch (pool_mode)
{
case PoolMode::GET_ALL:

View File

@ -1,4 +1,5 @@
#include <Common/Config/AbstractConfigurationComparison.h>
#include <Common/getMultipleKeysFromConfig.h>
#include <unordered_set>
#include <common/StringRef.h>
@ -31,6 +32,23 @@ bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const P
return isSameConfiguration(left, key, right, key);
}
bool isSameConfigurationWithMultipleKeys(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right, const String & root, const String & name)
{
if (&left == &right)
return true;
auto left_multiple_keys = getMultipleKeysFromConfig(left, root, name);
auto right_multiple_keys = getMultipleKeysFromConfig(right, root, name);
if (left_multiple_keys.size() != right_multiple_keys.size())
return false;
for (auto & key : left_multiple_keys)
if (!isSameConfiguration(left, right, concatKeyAndSubKey(root, key)))
return false;
return true;
}
bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const String & left_key,
const Poco::Util::AbstractConfiguration & right, const String & right_key)
{

View File

@ -13,6 +13,17 @@ namespace DB
bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left,
const Poco::Util::AbstractConfiguration & right);
/// Config may have multiple keys with one name. For example:
/// <root>
/// <some_key>...</some_key>
/// <some_key>...</some_key>
/// </root>
/// Returns true if the specified subview of the two configurations contains
/// the same keys and values for each key with the given name.
bool isSameConfigurationWithMultipleKeys(const Poco::Util::AbstractConfiguration & left,
const Poco::Util::AbstractConfiguration & right,
const String & root, const String & name);
/// Returns true if the specified subview of the two configurations contains the same keys and values.
bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left,
const Poco::Util::AbstractConfiguration & right,

View File

@ -185,12 +185,20 @@ public:
/// Conversion of infinite values to integer is undefined.
throw Exception("Cannot convert infinite value to integer type", ErrorCodes::CANNOT_CONVERT_TYPE);
}
else if (x > std::numeric_limits<T>::max() || x < std::numeric_limits<T>::lowest())
{
throw Exception("Cannot convert out of range floating point value to integer type", ErrorCodes::CANNOT_CONVERT_TYPE);
}
}
if constexpr (std::is_same_v<Decimal256, T>)
{
return Int256(x);
}
else
{
return T(x);
}
}
T operator() (const UInt128 &) const

View File

@ -1,4 +1,9 @@
#include <Coordination/KeeperServer.h>
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
#endif
#include <Coordination/LoggerWrapper.h>
#include <Coordination/KeeperStateMachine.h>
#include <Coordination/KeeperStateManager.h>
@ -9,6 +14,7 @@
#include <chrono>
#include <Common/ZooKeeper/ZooKeeperIO.h>
#include <string>
#include <Poco/Util/Application.h>
namespace DB
{
@ -16,6 +22,42 @@ namespace DB
namespace ErrorCodes
{
extern const int RAFT_ERROR;
extern const int NO_ELEMENTS_IN_CONFIG;
extern const int SUPPORT_IS_DISABLED;
}
namespace
{
#if USE_SSL
void setSSLParams(nuraft::asio_service::options & asio_opts)
{
const Poco::Util::LayeredConfiguration & config = Poco::Util::Application::instance().config();
String certificate_file_property = "openSSL.server.certificateFile";
String private_key_file_property = "openSSL.server.privateKeyFile";
String root_ca_file_property = "openSSL.server.caConfig";
if (!config.has(certificate_file_property))
throw Exception("Server certificate file is not set.", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
if (!config.has(private_key_file_property))
throw Exception("Server private key file is not set.", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
asio_opts.enable_ssl_ = true;
asio_opts.server_cert_file_ = config.getString(certificate_file_property);
asio_opts.server_key_file_ = config.getString(private_key_file_property);
if (config.has(root_ca_file_property))
asio_opts.root_cert_file_ = config.getString(root_ca_file_property);
if (config.getBool("openSSL.server.loadDefaultCAFile", false))
asio_opts.load_default_ca_file_ = true;
if (config.getString("openSSL.server.verificationMode", "none") == "none")
asio_opts.skip_verification_ = true;
}
#endif
}
KeeperServer::KeeperServer(
@ -72,6 +114,15 @@ void KeeperServer::startup()
params.return_method_ = nuraft::raft_params::blocking;
nuraft::asio_service::options asio_opts{};
if (state_manager->isSecure())
{
#if USE_SSL
setSSLParams(asio_opts);
#else
throw Exception{"SSL support for NuRaft is disabled because ClickHouse was built without SSL support.",
ErrorCodes::SUPPORT_IS_DISABLED};
#endif
}
launchRaftServer(params, asio_opts);

View File

@ -12,6 +12,7 @@ namespace ErrorCodes
KeeperStateManager::KeeperStateManager(int server_id_, const std::string & host, int port, const std::string & logs_path)
: my_server_id(server_id_)
, my_port(port)
, secure(false)
, log_store(nuraft::cs_new<KeeperLogStore>(logs_path, 5000, false))
, cluster_config(nuraft::cs_new<nuraft::cluster_config>())
{
@ -25,6 +26,7 @@ KeeperStateManager::KeeperStateManager(
const Poco::Util::AbstractConfiguration & config,
const CoordinationSettingsPtr & coordination_settings)
: my_server_id(my_server_id_)
, secure(config.getBool(config_prefix + ".raft_configuration.secure", false))
, log_store(nuraft::cs_new<KeeperLogStore>(
config.getString(config_prefix + ".log_storage_path", config.getString("path", DBMS_DEFAULT_PATH) + "coordination/logs"),
coordination_settings->rotate_log_storage_interval, coordination_settings->force_sync))
@ -37,6 +39,9 @@ KeeperStateManager::KeeperStateManager(
for (const auto & server_key : keys)
{
if (!startsWith(server_key, "server"))
continue;
std::string full_prefix = config_prefix + ".raft_configuration." + server_key;
int server_id = config.getInt(full_prefix + ".id");
std::string hostname = config.getString(full_prefix + ".hostname");
@ -44,6 +49,7 @@ KeeperStateManager::KeeperStateManager(
bool can_become_leader = config.getBool(full_prefix + ".can_become_leader", true);
int32_t priority = config.getInt(full_prefix + ".priority", 1);
bool start_as_follower = config.getBool(full_prefix + ".start_as_follower", false);
if (start_as_follower)
start_as_follower_servers.insert(server_id);
@ -57,6 +63,7 @@ KeeperStateManager::KeeperStateManager(
cluster_config->get_servers().push_back(peer_config);
}
if (!my_server_config)
throw Exception(ErrorCodes::RAFT_ERROR, "Our server id {} not found in raft_configuration section", my_server_id);

View File

@ -52,6 +52,11 @@ public:
return start_as_follower_servers.count(my_server_id);
}
bool isSecure() const
{
return secure;
}
nuraft::ptr<KeeperLogStore> getLogStore() const { return log_store; }
uint64_t getTotalServers() const { return total_servers; }
@ -59,6 +64,7 @@ public:
private:
int my_server_id;
int my_port;
bool secure;
uint64_t total_servers{0};
std::unordered_set<int> start_as_follower_servers;
nuraft::ptr<KeeperLogStore> log_store;

View File

@ -78,7 +78,7 @@ class IColumn;
M(UInt64, background_buffer_flush_schedule_pool_size, 16, "Number of threads performing background flush for tables with Buffer engine. Only has meaning at server startup.", 0) \
M(UInt64, background_pool_size, 16, "Number of threads performing background work for tables (for example, merging in merge tree). Only has meaning at server startup.", 0) \
M(UInt64, background_move_pool_size, 8, "Number of threads performing background moves for tables. Only has meaning at server startup.", 0) \
M(UInt64, background_fetches_pool_size, 3, "Number of threads performing background fetches for replicated tables. Only has meaning at server startup.", 0) \
M(UInt64, background_fetches_pool_size, 8, "Number of threads performing background fetches for replicated tables. Only has meaning at server startup.", 0) \
M(UInt64, background_schedule_pool_size, 16, "Number of threads performing background tasks for replicated tables, dns cache updates. Only has meaning at server startup.", 0) \
M(UInt64, background_message_broker_schedule_pool_size, 16, "Number of threads performing background tasks for message streaming. Only has meaning at server startup.", 0) \
M(UInt64, background_distributed_schedule_pool_size, 16, "Number of threads performing background tasks for distributed sends. Only has meaning at server startup.", 0) \
@ -118,6 +118,7 @@ class IColumn;
M(Bool, optimize_distributed_group_by_sharding_key, false, "Optimize GROUP BY sharding_key queries (by avoiding costly aggregation on the initiator server).", 0) \
M(UInt64, optimize_skip_unused_shards_limit, 1000, "Limit for number of sharding key values, turns off optimize_skip_unused_shards if the limit is reached", 0) \
M(Bool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \
M(Bool, optimize_skip_unused_shards_rewrite_in, true, "Rewrite IN in query for remote shards to exclude values that does not belong to the shard (requires optimize_skip_unused_shards)", 0) \
M(Bool, allow_nondeterministic_optimize_skip_unused_shards, false, "Allow non-deterministic functions (includes dictGet) in sharding_key for optimize_skip_unused_shards", 0) \
M(UInt64, force_optimize_skip_unused_shards, 0, "Throw an exception if unused shards cannot be skipped (1 - throw only if the table has the sharding key, 2 - always throw.", 0) \
M(UInt64, optimize_skip_unused_shards_nesting, 0, "Same as optimize_skip_unused_shards, but accept nesting level until which it will work.", 0) \
@ -228,7 +229,7 @@ class IColumn;
M(Seconds, http_connection_timeout, DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT, "HTTP connection timeout.", 0) \
M(Seconds, http_send_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, "HTTP send timeout", 0) \
M(Seconds, http_receive_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, "HTTP receive timeout", 0) \
M(UInt64, http_max_uri_size, 16384, "Maximum URI length of HTTP request", 0) \
M(UInt64, http_max_uri_size, 1048576, "Maximum URI length of HTTP request", 0) \
M(Bool, optimize_throw_if_noop, false, "If setting is enabled and OPTIMIZE query didn't actually assign a merge then an explanatory exception is thrown", 0) \
M(Bool, use_index_for_in_with_subqueries, true, "Try using an index if there is a subquery or a table expression on the right side of the IN operator.", 0) \
M(Bool, joined_subquery_requires_alias, true, "Force joined subqueries and table functions to have aliases for correct name qualification.", 0) \
@ -372,6 +373,7 @@ class IColumn;
M(UInt64, postgresql_connection_pool_size, 16, "Connection pool size for PostgreSQL table engine and database engine.", 0) \
M(Int64, postgresql_connection_pool_wait_timeout, -1, "Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool.", 0) \
M(UInt64, glob_expansion_max_elements, 1000, "Maximum number of allowed addresses (For external storages, table functions, etc).", 0) \
M(UInt64, odbc_bridge_connection_pool_size, 16, "Connection pool size for each connection settings string in ODBC bridge.", 0) \
\
M(Seconds, distributed_replica_error_half_life, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD, "Time period reduces replica error counter by 2 times.", 0) \
M(UInt64, distributed_replica_error_cap, DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT, "Max number of errors per replica, prevents piling up an incredible amount of errors if replica was offline for some time and allows it to be reconsidered in a shorter amount of time.", 0) \

View File

@ -36,15 +36,15 @@
* int func() {
* #if USE_MULTITARGET_CODE
* if (isArchSupported(TargetArch::AVX2))
* return TargetSpecifc::AVX2::funcImpl();
* return TargetSpecific::AVX2::funcImpl();
* #endif
* return TargetSpecifc::Default::funcImpl();
* return TargetSpecific::Default::funcImpl();
* }
*
* Sometimes code may benefit from compiling with different options.
* For these purposes use DECLARE_MULTITARGET_CODE macros. It will create a copy
* of the code for every supported target and compile it with different options.
* These copies are available via TargetSpecifc namespaces described above.
* These copies are available via TargetSpecific namespaces described above.
*
* Inside every TargetSpecific namespace there is a constexpr variable BuildArch,
* which indicates the target platform for current code.
@ -106,7 +106,7 @@ String toString(TargetArch arch);
/* Clang shows warning when there aren't any objects to apply pragma.
* To prevent this warning we define this function inside every macros with pragmas.
*/
# define DUMMY_FUNCTION_DEFINITION void __dummy_function_definition();
# define DUMMY_FUNCTION_DEFINITION [[maybe_unused]] void __dummy_function_definition();
#else
# define BEGIN_AVX512F_SPECIFIC_CODE \
_Pragma("GCC push_options") \

View File

@ -3,12 +3,14 @@
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnDecimal.h>
#include <Functions/IFunctionImpl.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionFactory.h>
#include <Functions/extractTimeZoneFromFunctionArguments.h>
#include <Functions/DateTimeTransforms.h>
#include <Functions/TransformDateTime64.h>
#include <IO/WriteHelpers.h>
@ -42,6 +44,7 @@ namespace
*/
class FunctionDateDiff : public IFunction
{
using ColumnDateTime64 = ColumnDecimal<DateTime64>;
public:
static constexpr auto name = "dateDiff";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionDateDiff>(); }
@ -133,17 +136,21 @@ private:
dispatchForSecondColumn<Transform>(*x_vec_16, y, timezone_x, timezone_y, result);
else if (const auto * x_vec_32 = checkAndGetColumn<ColumnUInt32>(&x))
dispatchForSecondColumn<Transform>(*x_vec_32, y, timezone_x, timezone_y, result);
else if (const auto * x_vec_64 = checkAndGetColumn<ColumnDateTime64>(&x))
dispatchForSecondColumn<Transform>(*x_vec_64, y, timezone_x, timezone_y, result);
else if (const auto * x_const_16 = checkAndGetColumnConst<ColumnUInt16>(&x))
dispatchConstForSecondColumn<Transform>(x_const_16->getValue<UInt16>(), y, timezone_x, timezone_y, result);
else if (const auto * x_const_32 = checkAndGetColumnConst<ColumnUInt32>(&x))
dispatchConstForSecondColumn<Transform>(x_const_32->getValue<UInt32>(), y, timezone_x, timezone_y, result);
else if (const auto * x_const_64 = checkAndGetColumnConst<ColumnDateTime64>(&x))
dispatchConstForSecondColumn<Transform>(x_const_64->getValue<DecimalField<DateTime64>>(), y, timezone_x, timezone_y, result);
else
throw Exception("Illegal column for first argument of function " + getName() + ", must be Date or DateTime", ErrorCodes::ILLEGAL_COLUMN);
throw Exception("Illegal column for first argument of function " + getName() + ", must be Date, DateTime or DateTime64", ErrorCodes::ILLEGAL_COLUMN);
}
template <typename Transform, typename T1>
template <typename Transform, typename LeftColumnType>
void dispatchForSecondColumn(
const ColumnVector<T1> & x, const IColumn & y,
const LeftColumnType & x, const IColumn & y,
const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
ColumnInt64::Container & result) const
{
@ -151,10 +158,14 @@ private:
vectorVector<Transform>(x, *y_vec_16, timezone_x, timezone_y, result);
else if (const auto * y_vec_32 = checkAndGetColumn<ColumnUInt32>(&y))
vectorVector<Transform>(x, *y_vec_32, timezone_x, timezone_y, result);
else if (const auto * y_vec_64 = checkAndGetColumn<ColumnDateTime64>(&y))
vectorVector<Transform>(x, *y_vec_64, timezone_x, timezone_y, result);
else if (const auto * y_const_16 = checkAndGetColumnConst<ColumnUInt16>(&y))
vectorConstant<Transform>(x, y_const_16->getValue<UInt16>(), timezone_x, timezone_y, result);
else if (const auto * y_const_32 = checkAndGetColumnConst<ColumnUInt32>(&y))
vectorConstant<Transform>(x, y_const_32->getValue<UInt32>(), timezone_x, timezone_y, result);
else if (const auto * y_const_64 = checkAndGetColumnConst<ColumnDateTime64>(&y))
vectorConstant<Transform>(x, y_const_64->getValue<DecimalField<DateTime64>>(), timezone_x, timezone_y, result);
else
throw Exception("Illegal column for second argument of function " + getName() + ", must be Date or DateTime", ErrorCodes::ILLEGAL_COLUMN);
}
@ -169,49 +180,81 @@ private:
constantVector<Transform>(x, *y_vec_16, timezone_x, timezone_y, result);
else if (const auto * y_vec_32 = checkAndGetColumn<ColumnUInt32>(&y))
constantVector<Transform>(x, *y_vec_32, timezone_x, timezone_y, result);
else if (const auto * y_vec_64 = checkAndGetColumn<ColumnDateTime64>(&y))
constantVector<Transform>(x, *y_vec_64, timezone_x, timezone_y, result);
else
throw Exception("Illegal column for second argument of function " + getName() + ", must be Date or DateTime", ErrorCodes::ILLEGAL_COLUMN);
}
template <typename Transform, typename T1, typename T2>
template <typename Transform, typename LeftColumnType, typename RightColumnType>
void vectorVector(
const ColumnVector<T1> & x, const ColumnVector<T2> & y,
const LeftColumnType & x, const RightColumnType & y,
const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
ColumnInt64::Container & result) const
{
const auto & x_data = x.getData();
const auto & y_data = y.getData();
const auto transform_x = TransformDateTime64<Transform>(getScale(x));
const auto transform_y = TransformDateTime64<Transform>(getScale(y));
for (size_t i = 0, size = x.size(); i < size; ++i)
result[i] = calculate<Transform>(x_data[i], y_data[i], timezone_x, timezone_y);
result[i] = calculate(transform_x, transform_y, x_data[i], y_data[i], timezone_x, timezone_y);
}
template <typename Transform, typename T1, typename T2>
template <typename Transform, typename LeftColumnType, typename T2>
void vectorConstant(
const ColumnVector<T1> & x, T2 y,
const LeftColumnType & x, T2 y,
const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
ColumnInt64::Container & result) const
{
const auto & x_data = x.getData();
const auto transform_x = TransformDateTime64<Transform>(getScale(x));
const auto transform_y = TransformDateTime64<Transform>(getScale(y));
const auto y_value = stripDecimalFieldValue(y);
for (size_t i = 0, size = x.size(); i < size; ++i)
result[i] = calculate<Transform>(x_data[i], y, timezone_x, timezone_y);
result[i] = calculate(transform_x, transform_y, x_data[i], y_value, timezone_x, timezone_y);
}
template <typename Transform, typename T1, typename T2>
template <typename Transform, typename T1, typename RightColumnType>
void constantVector(
T1 x, const ColumnVector<T2> & y,
T1 x, const RightColumnType & y,
const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
ColumnInt64::Container & result) const
{
const auto & y_data = y.getData();
const auto transform_x = TransformDateTime64<Transform>(getScale(x));
const auto transform_y = TransformDateTime64<Transform>(getScale(y));
const auto x_value = stripDecimalFieldValue(x);
for (size_t i = 0, size = y.size(); i < size; ++i)
result[i] = calculate<Transform>(x, y_data[i], timezone_x, timezone_y);
result[i] = calculate(transform_x, transform_y, x_value, y_data[i], timezone_x, timezone_y);
}
template <typename Transform, typename T1, typename T2>
Int64 calculate(T1 x, T2 y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y) const
template <typename TransformX, typename TransformY, typename T1, typename T2>
Int64 calculate(const TransformX & transform_x, const TransformY & transform_y, T1 x, T2 y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y) const
{
return Int64(Transform::execute(y, timezone_y))
- Int64(Transform::execute(x, timezone_x));
return Int64(transform_y.execute(y, timezone_y))
- Int64(transform_x.execute(x, timezone_x));
}
template <typename T>
static UInt32 getScale(const T & v)
{
if constexpr (std::is_same_v<T, ColumnDateTime64>)
return v.getScale();
else if constexpr (std::is_same_v<T, DecimalField<DateTime64>>)
return v.getScale();
return 0;
}
template <typename T>
static auto stripDecimalFieldValue(T && v)
{
if constexpr (std::is_same_v<std::decay_t<T>, DecimalField<DateTime64>>)
return v.getValue();
else
return v;
}
};

View File

@ -480,7 +480,7 @@ public:
// since right now LUT does not support Int64-values and not format instructions for subsecond parts,
// treat DatTime64 values just as DateTime values by ignoring fractional and casting to UInt32.
const auto c = DecimalUtils::split(vec[i], scale);
instruction.perform(pos, static_cast<UInt32>(c.whole), time_zone);
instruction.perform(pos, static_cast<Int64>(c.whole), time_zone);
}
}
else

View File

@ -773,7 +773,8 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re
while (!buf.eof() && isNumericASCII(*buf.position()))
++buf.position();
}
else if (scale && (whole >= 1000000000LL * scale))
/// 9908870400 is time_t value for 2184-01-01 UTC (a bit over the last year supported by DateTime64)
else if (whole >= 9908870400LL)
{
/// Unix timestamp with subsecond precision, already scaled to integer.
/// For disambiguation we support only time since 2001-09-09 01:46:40 UTC and less than 30 000 years in future.

View File

@ -259,7 +259,11 @@ void PocoHTTPClient::makeRequestInternal(
String error_message;
Poco::StreamCopier::copyToString(response_body_stream, error_message);
response->SetClientErrorType(Aws::Client::CoreErrors::NETWORK_CONNECTION);
if (Aws::Http::IsRetryableHttpResponseCode(response->GetResponseCode()))
response->SetClientErrorType(Aws::Client::CoreErrors::NETWORK_CONNECTION);
else
response->SetClientErrorType(Aws::Client::CoreErrors::USER_CANCELLED);
response->SetClientErrorMessage(error_message);
if (status_code == 429 || status_code == 503)

View File

@ -480,20 +480,15 @@ void CatBoostLibHolder::initAPI()
std::shared_ptr<CatBoostLibHolder> getCatBoostWrapperHolder(const std::string & lib_path)
{
static std::weak_ptr<CatBoostLibHolder> ptr;
static std::shared_ptr<CatBoostLibHolder> ptr;
static std::mutex mutex;
std::lock_guard lock(mutex);
auto result = ptr.lock();
if (!result || result->getCurrentPath() != lib_path)
{
result = std::make_shared<CatBoostLibHolder>(lib_path);
/// This assignment is not atomic, which prevents from creating lock only inside 'if'.
ptr = result;
}
if (!ptr || ptr->getCurrentPath() != lib_path)
ptr = std::make_shared<CatBoostLibHolder>(lib_path);
return result;
return ptr;
}
}

View File

@ -292,7 +292,7 @@ void Clusters::updateClusters(const Poco::Util::AbstractConfiguration & new_conf
std::lock_guard lock(mutex);
/// If old congig is set, remove deleted clusters from impl, otherwise just clear it.
/// If old config is set, remove deleted clusters from impl, otherwise just clear it.
if (old_config)
{
for (const auto & key : deleted_keys)

View File

@ -55,6 +55,8 @@ public:
static Poco::Timespan saturate(const Poco::Timespan & v, const Poco::Timespan & limit);
public:
using SlotToShard = std::vector<UInt64>;
struct Address
{
/** In configuration file,
@ -232,7 +234,6 @@ public:
bool maybeCrossReplication() const;
private:
using SlotToShard = std::vector<UInt64>;
SlotToShard slot_to_shard;
public:

View File

@ -30,7 +30,7 @@ public:
virtual void createForShard(
const Cluster::ShardInfo & shard_info,
const String & query, const ASTPtr & query_ast,
const ASTPtr & query_ast,
ContextPtr context, const ThrottlerPtr & throttler,
const SelectQueryInfo & query_info,
std::vector<QueryPlanPtr> & res,

View File

@ -115,7 +115,7 @@ String formattedAST(const ASTPtr & ast)
void SelectStreamFactory::createForShard(
const Cluster::ShardInfo & shard_info,
const String &, const ASTPtr & query_ast,
const ASTPtr & query_ast,
ContextPtr context, const ThrottlerPtr & throttler,
const SelectQueryInfo &,
std::vector<QueryPlanPtr> & plans,

View File

@ -34,7 +34,7 @@ public:
void createForShard(
const Cluster::ShardInfo & shard_info,
const String & query, const ASTPtr & query_ast,
const ASTPtr & query_ast,
ContextPtr context, const ThrottlerPtr & throttler,
const SelectQueryInfo & query_info,
std::vector<QueryPlanPtr> & plans,

View File

@ -5,7 +5,7 @@
#include <Interpreters/Cluster.h>
#include <Interpreters/IInterpreter.h>
#include <Interpreters/ProcessList.h>
#include <Parsers/queryToString.h>
#include <Interpreters/OptimizeShardingKeyRewriteInVisitor.h>
#include <Processors/Pipe.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/ReadFromPreparedSource.h>
@ -91,7 +91,10 @@ ContextPtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context,
void executeQuery(
QueryPlan & query_plan,
IStreamFactory & stream_factory, Poco::Logger * log,
const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info)
const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info,
const ExpressionActionsPtr & sharding_key_expr,
const std::string & sharding_key_column_name,
const ClusterPtr & not_optimized_cluster)
{
assert(log);
@ -104,9 +107,7 @@ void executeQuery(
Pipes remote_pipes;
Pipes delayed_pipes;
const std::string query = queryToString(query_ast);
auto new_context = updateSettingsForCluster(*query_info.cluster, context, settings, log);
auto new_context = updateSettingsForCluster(*query_info.getCluster(), context, settings, log);
new_context->getClientInfo().distributed_depth += 1;
@ -127,9 +128,28 @@ void executeQuery(
else
throttler = user_level_throttler;
for (const auto & shard_info : query_info.cluster->getShardsInfo())
size_t shards = query_info.getCluster()->getShardCount();
for (const auto & shard_info : query_info.getCluster()->getShardsInfo())
{
stream_factory.createForShard(shard_info, query, query_ast,
ASTPtr query_ast_for_shard;
if (query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1)
{
query_ast_for_shard = query_ast->clone();
OptimizeShardingKeyRewriteInVisitor::Data visitor_data{
sharding_key_expr,
sharding_key_column_name,
shard_info,
not_optimized_cluster->getSlotToShard(),
};
OptimizeShardingKeyRewriteInVisitor visitor(visitor_data);
visitor.visit(query_ast_for_shard);
}
else
query_ast_for_shard = query_ast;
stream_factory.createForShard(shard_info,
query_ast_for_shard,
new_context, throttler, query_info, plans,
remote_pipes, delayed_pipes, log);
}

View File

@ -8,11 +8,15 @@ namespace DB
struct Settings;
class Cluster;
using ClusterPtr = std::shared_ptr<Cluster>;
struct SelectQueryInfo;
class Pipe;
class QueryPlan;
class ExpressionActions;
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
namespace ClusterProxy
{
@ -35,7 +39,10 @@ ContextPtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context,
void executeQuery(
QueryPlan & query_plan,
IStreamFactory & stream_factory, Poco::Logger * log,
const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info);
const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info,
const ExpressionActionsPtr & sharding_key_expr,
const std::string & sharding_key_column_name,
const ClusterPtr & not_optimized_cluster);
}

View File

@ -29,6 +29,7 @@
#include <Disks/DiskLocal.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Interpreters/ActionLocksManager.h>
#include <Interpreters/ExternalLoaderXMLConfigRepository.h>
#include <Core/Settings.h>
#include <Core/SettingsQuirks.h>
#include <Access/AccessControlManager.h>
@ -225,7 +226,6 @@ private:
void cleanThread()
{
setThreadName("SessionCleaner");
std::unique_lock lock{mutex};
while (true)
@ -338,6 +338,9 @@ struct ContextSharedPart
mutable std::optional<EmbeddedDictionaries> embedded_dictionaries; /// Metrica's dictionaries. Have lazy initialization.
mutable std::optional<ExternalDictionariesLoader> external_dictionaries_loader;
mutable std::optional<ExternalModelsLoader> external_models_loader;
ConfigurationPtr external_models_config;
ext::scope_guard models_repository_guard;
String default_profile_name; /// Default profile name used for default values.
String system_profile_name; /// Profile used by system processes
String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying
@ -445,6 +448,7 @@ struct ContextSharedPart
system_logs.reset();
embedded_dictionaries.reset();
external_dictionaries_loader.reset();
models_repository_guard.reset();
external_models_loader.reset();
buffer_flush_schedule_pool.reset();
schedule_pool.reset();
@ -456,7 +460,6 @@ struct ContextSharedPart
trace_collector.reset();
/// Stop zookeeper connection
zookeeper.reset();
}
bool hasTraceCollector() const
@ -1353,11 +1356,29 @@ const ExternalModelsLoader & Context::getExternalModelsLoader() const
ExternalModelsLoader & Context::getExternalModelsLoader()
{
std::lock_guard lock(shared->external_models_mutex);
return getExternalModelsLoaderUnlocked();
}
ExternalModelsLoader & Context::getExternalModelsLoaderUnlocked()
{
if (!shared->external_models_loader)
shared->external_models_loader.emplace(getGlobalContext());
return *shared->external_models_loader;
}
void Context::setExternalModelsConfig(const ConfigurationPtr & config, const std::string & config_name)
{
std::lock_guard lock(shared->external_models_mutex);
if (shared->external_models_config && isSameConfigurationWithMultipleKeys(*config, *shared->external_models_config, "", config_name))
return;
shared->external_models_config = config;
shared->models_repository_guard .reset();
shared->models_repository_guard = getExternalModelsLoaderUnlocked().addConfigRepository(
std::make_unique<ExternalLoaderXMLConfigRepository>(*config, config_name));
}
EmbeddedDictionaries & Context::getEmbeddedDictionariesImpl(const bool throw_on_error) const
{

View File

@ -499,8 +499,11 @@ public:
EmbeddedDictionaries & getEmbeddedDictionaries();
ExternalDictionariesLoader & getExternalDictionariesLoader();
ExternalModelsLoader & getExternalModelsLoader();
ExternalModelsLoader & getExternalModelsLoaderUnlocked();
void tryCreateEmbeddedDictionaries() const;
void setExternalModelsConfig(const ConfigurationPtr & config, const std::string & config_name = "models_config");
/// I/O formats.
BlockInputStreamPtr getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, UInt64 max_block_size) const;

View File

@ -0,0 +1,110 @@
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/convertFieldToType.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTIdentifier.h>
#include <DataTypes/FieldToDataType.h>
#include <Interpreters/OptimizeShardingKeyRewriteInVisitor.h>
namespace
{
using namespace DB;
Field executeFunctionOnField(
const Field & field, const std::string & name,
const ExpressionActionsPtr & expr,
const std::string & sharding_key_column_name)
{
DataTypePtr type = applyVisitor(FieldToDataType{}, field);
ColumnWithTypeAndName column;
column.column = type->createColumnConst(1, field);
column.name = name;
column.type = type;
Block block{column};
size_t num_rows = 1;
expr->execute(block, num_rows);
ColumnWithTypeAndName & ret = block.getByName(sharding_key_column_name);
return (*ret.column)[0];
}
/// Return true if shard may contain such value (or it is unknown), otherwise false.
bool shardContains(
const Field & sharding_column_value,
const std::string & sharding_column_name,
const ExpressionActionsPtr & expr,
const std::string & sharding_key_column_name,
const Cluster::ShardInfo & shard_info,
const Cluster::SlotToShard & slots)
{
/// NULL is not allowed in sharding key,
/// so it should be safe to assume that shard cannot contain it.
if (sharding_column_value.isNull())
return false;
Field sharding_value = executeFunctionOnField(sharding_column_value, sharding_column_name, expr, sharding_key_column_name);
UInt64 value = sharding_value.get<UInt64>();
const auto shard_num = slots[value % slots.size()] + 1;
return shard_info.shard_num == shard_num;
}
}
namespace DB
{
bool OptimizeShardingKeyRewriteInMatcher::needChildVisit(ASTPtr & /*node*/, const ASTPtr & /*child*/)
{
return true;
}
void OptimizeShardingKeyRewriteInMatcher::visit(ASTPtr & node, Data & data)
{
if (auto * function = node->as<ASTFunction>())
visit(*function, data);
}
void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & data)
{
if (function.name != "in")
return;
auto * left = function.arguments->children.front().get();
auto * right = function.arguments->children.back().get();
auto * identifier = left->as<ASTIdentifier>();
if (!identifier)
return;
const auto & expr = data.sharding_key_expr;
const auto & sharding_key_column_name = data.sharding_key_column_name;
if (!expr->getRequiredColumnsWithTypes().contains(identifier->name()))
return;
/// NOTE: that we should not take care about empty tuple,
/// since after optimize_skip_unused_shards,
/// at least one element should match each shard.
if (auto * tuple_func = right->as<ASTFunction>(); tuple_func && tuple_func->name == "tuple")
{
auto * tuple_elements = tuple_func->children.front()->as<ASTExpressionList>();
std::erase_if(tuple_elements->children, [&](auto & child)
{
auto * literal = child->template as<ASTLiteral>();
return literal && !shardContains(literal->value, identifier->name(), expr, sharding_key_column_name, data.shard_info, data.slots);
});
}
else if (auto * tuple_literal = right->as<ASTLiteral>();
tuple_literal && tuple_literal->value.getType() == Field::Types::Tuple)
{
auto & tuple = tuple_literal->value.get<Tuple &>();
std::erase_if(tuple, [&](auto & child)
{
return !shardContains(child, identifier->name(), expr, sharding_key_column_name, data.shard_info, data.slots);
});
}
}
}

View File

@ -0,0 +1,41 @@
#pragma once
#include <Interpreters/InDepthNodeVisitor.h>
#include <Interpreters/Cluster.h>
namespace DB
{
class ExpressionActions;
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
class ASTFunction;
/// Rewrite `sharding_key IN (...)` for specific shard,
/// so that it will contain only values that belong to this specific shard.
///
/// See also:
/// - evaluateExpressionOverConstantCondition()
/// - StorageDistributed::createSelector()
/// - createBlockSelector()
struct OptimizeShardingKeyRewriteInMatcher
{
/// Cluster::SlotToShard
using SlotToShard = std::vector<UInt64>;
struct Data
{
const ExpressionActionsPtr & sharding_key_expr;
const std::string & sharding_key_column_name;
const Cluster::ShardInfo & shard_info;
const Cluster::SlotToShard & slots;
};
static bool needChildVisit(ASTPtr & /*node*/, const ASTPtr & /*child*/);
static void visit(ASTPtr & node, Data & data);
static void visit(ASTFunction & function, Data & data);
};
using OptimizeShardingKeyRewriteInVisitor = InDepthNodeVisitor<OptimizeShardingKeyRewriteInMatcher, true>;
}

View File

@ -293,13 +293,11 @@ void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query,
else
{
ASTFunction * func = elem->as<ASTFunction>();
/// Never remove untuple. It's result column may be in required columns.
/// It is not easy to analyze untuple here, because types were not calculated yes.
if (func && func->name == "untuple")
for (const auto & col : required_result_columns)
if (col.rfind("_ut_", 0) == 0)
{
new_elements.push_back(elem);
break;
}
new_elements.push_back(elem);
}
}

View File

@ -118,6 +118,7 @@ SRCS(
OpenTelemetrySpanLog.cpp
OptimizeIfChains.cpp
OptimizeIfWithConstantConditionVisitor.cpp
OptimizeShardingKeyRewriteInVisitor.cpp
PartLog.cpp
PredicateExpressionsOptimizer.cpp
PredicateRewriteVisitor.cpp

View File

@ -78,7 +78,7 @@ void ArrowBlockInputFormat::prepareReader()
{
if (stream)
{
auto stream_reader_status = arrow::ipc::RecordBatchStreamReader::Open(asArrowFile(in));
auto stream_reader_status = arrow::ipc::RecordBatchStreamReader::Open(std::make_unique<ArrowInputStreamFromReadBuffer>(in));
if (!stream_reader_status.ok())
throw Exception(ErrorCodes::UNKNOWN_EXCEPTION,
"Error while opening a table: {}", stream_reader_status.status().ToString());

View File

@ -55,26 +55,23 @@ arrow::Status RandomAccessFileFromSeekableReadBuffer::Close()
arrow::Result<int64_t> RandomAccessFileFromSeekableReadBuffer::Tell() const
{
return arrow::Result<int64_t>(in.getPosition());
return in.getPosition();
}
arrow::Result<int64_t> RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes, void * out)
{
int64_t bytes_read = in.readBig(reinterpret_cast<char *>(out), nbytes);
return arrow::Result<int64_t>(bytes_read);
return in.readBig(reinterpret_cast<char *>(out), nbytes);
}
arrow::Result<std::shared_ptr<arrow::Buffer>> RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes)
{
auto buffer_status = arrow::AllocateBuffer(nbytes);
ARROW_RETURN_NOT_OK(buffer_status);
ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(nbytes))
ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data()))
auto shared_buffer = std::shared_ptr<arrow::Buffer>(std::move(std::move(*buffer_status)));
if (bytes_read < nbytes)
RETURN_NOT_OK(buffer->Resize(bytes_read));
size_t n = in.readBig(reinterpret_cast<char *>(shared_buffer->mutable_data()), nbytes);
auto read_buffer = arrow::SliceBuffer(shared_buffer, 0, n);
return arrow::Result<std::shared_ptr<arrow::Buffer>>(shared_buffer);
return buffer;
}
arrow::Status RandomAccessFileFromSeekableReadBuffer::Seek(int64_t position)
@ -83,6 +80,43 @@ arrow::Status RandomAccessFileFromSeekableReadBuffer::Seek(int64_t position)
return arrow::Status::OK();
}
ArrowInputStreamFromReadBuffer::ArrowInputStreamFromReadBuffer(ReadBuffer & in_) : in(in_), is_open{true}
{
}
arrow::Result<int64_t> ArrowInputStreamFromReadBuffer::Read(int64_t nbytes, void * out)
{
return in.readBig(reinterpret_cast<char *>(out), nbytes);
}
arrow::Result<std::shared_ptr<arrow::Buffer>> ArrowInputStreamFromReadBuffer::Read(int64_t nbytes)
{
ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(nbytes))
ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data()))
if (bytes_read < nbytes)
RETURN_NOT_OK(buffer->Resize(bytes_read));
return buffer;
}
arrow::Status ArrowInputStreamFromReadBuffer::Abort()
{
return arrow::Status();
}
arrow::Result<int64_t> ArrowInputStreamFromReadBuffer::Tell() const
{
return in.count();
}
arrow::Status ArrowInputStreamFromReadBuffer::Close()
{
is_open = false;
return arrow::Status();
}
std::shared_ptr<arrow::io::RandomAccessFile> asArrowFile(ReadBuffer & in)
{
if (auto * fd_in = dynamic_cast<ReadBufferFromFileDescriptor *>(&in))

View File

@ -61,6 +61,24 @@ private:
ARROW_DISALLOW_COPY_AND_ASSIGN(RandomAccessFileFromSeekableReadBuffer);
};
class ArrowInputStreamFromReadBuffer : public arrow::io::InputStream
{
public:
explicit ArrowInputStreamFromReadBuffer(ReadBuffer & in);
arrow::Result<int64_t> Read(int64_t nbytes, void* out) override;
arrow::Result<std::shared_ptr<arrow::Buffer>> Read(int64_t nbytes) override;
arrow::Status Abort() override;
arrow::Result<int64_t> Tell() const override;
arrow::Status Close() override;
bool closed() const override { return !is_open; }
private:
ReadBuffer & in;
bool is_open = false;
ARROW_DISALLOW_COPY_AND_ASSIGN(ArrowInputStreamFromReadBuffer);
};
std::shared_ptr<arrow::io::RandomAccessFile> asArrowFile(ReadBuffer & in);
}

View File

@ -5,6 +5,7 @@
#include <Poco/Net/NetException.h>
#include <common/logger_useful.h>
#include <Server/IServer.h>
#include <string>
namespace DB
{
@ -21,9 +22,9 @@ private:
void run() override {}
};
public:
KeeperTCPHandlerFactory(IServer & server_)
KeeperTCPHandlerFactory(IServer & server_, bool secure)
: server(server_)
, log(&Poco::Logger::get("KeeperTCPHandlerFactory"))
, log(&Poco::Logger::get(std::string{"KeeperTCP"} + (secure ? "S" : "") + "HandlerFactory"))
{
}

View File

@ -1326,11 +1326,12 @@ String KeyCondition::toString() const
* The set of all possible tuples can be considered as an n-dimensional space, where n is the size of the tuple.
* A range of tuples specifies some subset of this space.
*
* Hyperrectangles (you can also find the term "rail")
* will be the subrange of an n-dimensional space that is a direct product of one-dimensional ranges.
* In this case, the one-dimensional range can be: a period, a segment, an interval, a half-interval, unlimited on the left, unlimited on the right ...
* Hyperrectangles will be the subrange of an n-dimensional space that is a direct product of one-dimensional ranges.
* In this case, the one-dimensional range can be:
* a point, a segment, an open interval, a half-open interval;
* unlimited on the left, unlimited on the right ...
*
* The range of tuples can always be represented as a combination of hyperrectangles.
* The range of tuples can always be represented as a combination (union) of hyperrectangles.
* For example, the range [ x1 y1 .. x2 y2 ] given x1 != x2 is equal to the union of the following three hyperrectangles:
* [x1] x [y1 .. +inf)
* (x1 .. x2) x (-inf .. +inf)

View File

@ -56,6 +56,8 @@
#include <boost/range/adaptor/filtered.hpp>
#include <boost/algorithm/string/join.hpp>
#include <ext/scope_guard_safe.h>
#include <algorithm>
#include <iomanip>
#include <optional>
@ -317,8 +319,8 @@ void MergeTreeData::checkProperties(
{
const String & pk_column = new_primary_key.column_names[i];
if (pk_column != sorting_key_column)
throw Exception("Primary key must be a prefix of the sorting key, but in position "
+ toString(i) + " its column is " + pk_column + ", not " + sorting_key_column,
throw Exception("Primary key must be a prefix of the sorting key, but the column in the position "
+ toString(i) + " is " + sorting_key_column +", not " + pk_column,
ErrorCodes::BAD_ARGUMENTS);
if (!primary_key_columns_set.emplace(pk_column).second)
@ -1211,14 +1213,21 @@ void MergeTreeData::clearPartsFromFilesystem(const DataPartsVector & parts_to_re
{
/// Parallel parts removal.
size_t num_threads = std::min(size_t(settings->max_part_removal_threads), parts_to_remove.size());
size_t num_threads = std::min<size_t>(settings->max_part_removal_threads, parts_to_remove.size());
ThreadPool pool(num_threads);
/// NOTE: Under heavy system load you may get "Cannot schedule a task" from ThreadPool.
for (const DataPartPtr & part : parts_to_remove)
{
pool.scheduleOrThrowOnError([&]
pool.scheduleOrThrowOnError([&, thread_group = CurrentThread::getGroup()]
{
SCOPE_EXIT_SAFE(
if (thread_group)
CurrentThread::detachQueryIfNotDetached();
);
if (thread_group)
CurrentThread::attachTo(thread_group);
LOG_DEBUG(log, "Removing part from filesystem {}", part->name);
part->remove();
});
@ -2871,7 +2880,7 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String
throw Exception("Volume " + name + " does not exists on policy " + getStoragePolicy()->getName(), ErrorCodes::UNKNOWN_DISK);
if (parts.empty())
throw Exception("Nothing to move", ErrorCodes::NO_SUCH_DATA_PART);
throw Exception("Nothing to move (сheck that the partition exists).", ErrorCodes::NO_SUCH_DATA_PART);
parts.erase(std::remove_if(parts.begin(), parts.end(), [&](auto part_ptr)
{

View File

@ -299,6 +299,10 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
auto column
= result.getByName(expression_ast->getColumnName()).column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality();
if (column->onlyNull())
return false;
const auto * col_uint8 = typeid_cast<const ColumnUInt8 *>(column.get());
const NullMap * null_map = nullptr;

View File

@ -486,9 +486,13 @@ void MergeTreeRangeReader::ReadResult::setFilter(const ColumnPtr & new_filter)
ConstantFilterDescription const_description(*new_filter);
if (const_description.always_true)
{
setFilterConstTrue();
}
else if (const_description.always_false)
{
clear();
}
else
{
FilterDescription filter_description(*new_filter);

View File

@ -119,9 +119,13 @@ struct SelectQueryInfo
ASTPtr query;
ASTPtr view_query; /// Optimized VIEW query
/// For optimize_skip_unused_shards.
/// Can be modified in getQueryProcessingStage()
/// Cluster for the query.
ClusterPtr cluster;
/// Optimized cluster for the query.
/// In case of optimize_skip_unused_shards it may differs from original cluster.
///
/// Configured in StorageDistributed::getQueryProcessingStage()
ClusterPtr optimized_cluster;
TreeRewriterResultPtr syntax_analyzer_result;
@ -134,6 +138,8 @@ struct SelectQueryInfo
/// Prepared sets are used for indices by storage engine.
/// Example: x IN (1, 2, 3)
PreparedSets sets;
ClusterPtr getCluster() const { return !optimized_cluster ? cluster : optimized_cluster; }
};
}

View File

@ -478,7 +478,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
"Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}",
makeFormattedListOfShards(optimized_cluster));
cluster = optimized_cluster;
query_info.cluster = cluster;
query_info.optimized_cluster = cluster;
}
else
{
@ -558,7 +558,7 @@ void StorageDistributed::read(
InterpreterSelectQuery(query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
/// Return directly (with correct header) if no shard to query.
if (query_info.cluster->getShardsInfo().empty())
if (query_info.getCluster()->getShardsInfo().empty())
{
Pipe pipe(std::make_shared<NullSource>(header));
auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
@ -586,7 +586,9 @@ void StorageDistributed::read(
local_context->getExternalTables());
ClusterProxy::executeQuery(query_plan, select_stream_factory, log,
modified_query_ast, local_context, query_info);
modified_query_ast, local_context, query_info,
sharding_key_expr, sharding_key_column_name,
getCluster());
/// This is a bug, it is possible only when there is no shards to query, and this is handled earlier.
if (!query_plan.isInitialized())
@ -753,7 +755,7 @@ void StorageDistributed::alter(const AlterCommands & params, ContextPtr local_co
void StorageDistributed::startup()
{
if (remote_database.empty() && !remote_table_function_ptr)
if (remote_database.empty() && !remote_table_function_ptr && !getCluster()->maybeCrossReplication())
LOG_WARNING(log, "Name of remote database is empty. Default database will be used implicitly.");
if (!storage_policy)
@ -952,7 +954,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster(
throw Exception(exception_message.str(), ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS);
}
return cluster;
return {};
}
IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, const ColumnWithTypeAndName & result)

View File

@ -159,8 +159,7 @@ bool StorageMerge::isRemote() const
bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & /*metadata_snapshot*/) const
{
/// It's beneficial if it is true for at least one table.
StorageListWithLocks selected_tables = getSelectedTables(
query_context->getCurrentQueryId(), query_context->getSettingsRef());
StorageListWithLocks selected_tables = getSelectedTables(query_context);
size_t i = 0;
for (const auto & table : selected_tables)
@ -250,8 +249,7 @@ Pipe StorageMerge::read(
/** First we make list of selected tables to find out its size.
* This is necessary to correctly pass the recommended number of threads to each table.
*/
StorageListWithLocks selected_tables
= getSelectedTables(query_info, has_table_virtual_column, local_context->getCurrentQueryId(), local_context->getSettingsRef());
StorageListWithLocks selected_tables = getSelectedTables(local_context, query_info.query, has_table_virtual_column);
if (selected_tables.empty())
/// FIXME: do we support sampling in this case?
@ -427,34 +425,20 @@ Pipe StorageMerge::createSources(
return pipe;
}
StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const String & query_id, const Settings & settings) const
{
StorageListWithLocks selected_tables;
auto iterator = getDatabaseIterator(getContext());
while (iterator->isValid())
{
const auto & table = iterator->table();
if (table && table.get() != this)
selected_tables.emplace_back(
table, table->lockForShare(query_id, settings.lock_acquire_timeout), iterator->name());
iterator->next();
}
return selected_tables;
}
StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(
const SelectQueryInfo & query_info, bool has_virtual_column, const String & query_id, const Settings & settings) const
ContextPtr query_context,
const ASTPtr & query /* = nullptr */,
bool filter_by_virtual_column /* = false */) const
{
const ASTPtr & query = query_info.query;
assert(!filter_by_virtual_column || query);
const Settings & settings = query_context->getSettingsRef();
StorageListWithLocks selected_tables;
DatabaseTablesIteratorPtr iterator = getDatabaseIterator(getContext());
auto virtual_column = ColumnString::create();
MutableColumnPtr table_name_virtual_column;
if (filter_by_virtual_column)
table_name_virtual_column = ColumnString::create();
while (iterator->isValid())
{
@ -467,18 +451,20 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(
if (storage.get() != this)
{
selected_tables.emplace_back(
storage, storage->lockForShare(query_id, settings.lock_acquire_timeout), iterator->name());
virtual_column->insert(iterator->name());
auto table_lock = storage->lockForShare(query_context->getCurrentQueryId(), settings.lock_acquire_timeout);
selected_tables.emplace_back(storage, std::move(table_lock), iterator->name());
if (filter_by_virtual_column)
table_name_virtual_column->insert(iterator->name());
}
iterator->next();
}
if (has_virtual_column)
if (filter_by_virtual_column)
{
Block virtual_columns_block = Block{ColumnWithTypeAndName(std::move(virtual_column), std::make_shared<DataTypeString>(), "_table")};
VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, getContext());
/// Filter names of selected tables if there is a condition on "_table" virtual column in WHERE clause
Block virtual_columns_block = Block{ColumnWithTypeAndName(std::move(table_name_virtual_column), std::make_shared<DataTypeString>(), "_table")};
VirtualColumnUtils::filterBlockWithQuery(query, virtual_columns_block, query_context);
auto values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_table");
/// Remove unused tables from the list
@ -488,7 +474,6 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(
return selected_tables;
}
DatabaseTablesIteratorPtr StorageMerge::getDatabaseIterator(ContextPtr local_context) const
{
try

View File

@ -55,10 +55,8 @@ private:
using StorageWithLockAndName = std::tuple<StoragePtr, TableLockHolder, String>;
using StorageListWithLocks = std::list<StorageWithLockAndName>;
StorageListWithLocks getSelectedTables(const String & query_id, const Settings & settings) const;
StorageMerge::StorageListWithLocks getSelectedTables(
const SelectQueryInfo & query_info, bool has_virtual_column, const String & query_id, const Settings & settings) const;
ContextPtr query_context, const ASTPtr & query = nullptr, bool filter_by_virtual_column = false) const;
template <typename F>
StoragePtr getFirstTable(F && predicate) const;

View File

@ -10,7 +10,6 @@
#include <Storages/IStorage.h>
#include <DataStreams/IBlockOutputStream.h>
#include <Storages/PostgreSQL/PostgreSQLPoolWithFailover.h>
#include <pqxx/pqxx>
namespace DB

View File

@ -5701,6 +5701,56 @@ CancellationCode StorageReplicatedMergeTree::killMutation(const String & mutatio
return CancellationCode::CancelSent;
}
void StorageReplicatedMergeTree::removePartsFromFilesystem(const DataPartsVector & parts)
{
auto remove_part = [&](const auto & part)
{
LOG_DEBUG(log, "Removing part from filesystem {}", part.name);
try
{
bool keep_s3 = !this->unlockSharedData(part);
part.remove(keep_s3);
}
catch (...)
{
tryLogCurrentException(log, "There is a problem with deleting part " + part.name + " from filesystem");
}
};
const auto settings = getSettings();
if (settings->max_part_removal_threads > 1 && parts.size() > settings->concurrent_part_removal_threshold)
{
/// Parallel parts removal.
size_t num_threads = std::min<size_t>(settings->max_part_removal_threads, parts.size());
ThreadPool pool(num_threads);
/// NOTE: Under heavy system load you may get "Cannot schedule a task" from ThreadPool.
for (const DataPartPtr & part : parts)
{
pool.scheduleOrThrowOnError([&, thread_group = CurrentThread::getGroup()]
{
SCOPE_EXIT_SAFE(
if (thread_group)
CurrentThread::detachQueryIfNotDetached();
);
if (thread_group)
CurrentThread::attachTo(thread_group);
remove_part(*part);
});
}
pool.wait();
}
else
{
for (const DataPartPtr & part : parts)
{
remove_part(*part);
}
}
}
void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK()
{
@ -5726,26 +5776,10 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK()
}
parts.clear();
auto remove_parts_from_filesystem = [log=log, this] (const DataPartsVector & parts_to_remove)
{
for (const auto & part : parts_to_remove)
{
try
{
bool keep_s3 = !this->unlockSharedData(*part);
part->remove(keep_s3);
}
catch (...)
{
tryLogCurrentException(log, "There is a problem with deleting part " + part->name + " from filesystem");
}
}
};
/// Delete duplicate parts from filesystem
if (!parts_to_delete_only_from_filesystem.empty())
{
remove_parts_from_filesystem(parts_to_delete_only_from_filesystem);
removePartsFromFilesystem(parts_to_delete_only_from_filesystem);
removePartsFinally(parts_to_delete_only_from_filesystem);
LOG_DEBUG(log, "Removed {} old duplicate parts", parts_to_delete_only_from_filesystem.size());
@ -5790,7 +5824,7 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK()
/// Remove parts from filesystem and finally from data_parts
if (!parts_to_remove_from_filesystem.empty())
{
remove_parts_from_filesystem(parts_to_remove_from_filesystem);
removePartsFromFilesystem(parts_to_remove_from_filesystem);
removePartsFinally(parts_to_remove_from_filesystem);
LOG_DEBUG(log, "Removed {} old parts", parts_to_remove_from_filesystem.size());

View File

@ -408,6 +408,8 @@ private:
/// Just removes part from ZooKeeper using previous method
void removePartFromZooKeeper(const String & part_name);
void removePartsFromFilesystem(const DataPartsVector & parts);
/// Quickly removes big set of parts from ZooKeeper (using async multi queries)
void removePartsFromZooKeeper(zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names,
NameSet * parts_should_be_retried = nullptr);

View File

@ -17,8 +17,8 @@ void StorageSystemAggregateFunctionCombinators::fillData(MutableColumns & res_co
const auto & combinators = AggregateFunctionCombinatorFactory::instance().getAllAggregateFunctionCombinators();
for (const auto & pair : combinators)
{
res_columns[0]->insert(pair.first);
res_columns[1]->insert(pair.second->isForInternalUsageOnly());
res_columns[0]->insert(pair.name);
res_columns[1]->insert(pair.combinator_ptr->isForInternalUsageOnly());
}
}

View File

@ -191,10 +191,15 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr contex
ConstantFilterDescription constant_filter(*filter_column);
if (constant_filter.always_true)
{
return;
}
if (constant_filter.always_false)
{
block = block.cloneEmpty();
return;
}
FilterDescription filter(*filter_column);

View File

@ -54,6 +54,26 @@ def run_and_check(args, env=None, shell=False, stdout=subprocess.PIPE, stderr=su
raise Exception('Command {} return non-zero code {}: {}'.format(args, res.returncode, res.stderr.decode('utf-8')))
def retry_exception(num, delay, func, exception=Exception, *args, **kwargs):
"""
Retry if `func()` throws, `num` times.
:param func: func to run
:param num: number of retries
:throws StopIteration
"""
i = 0
while i <= num:
try:
func(*args, **kwargs)
time.sleep(delay)
except exception: # pylint: disable=broad-except
i += 1
continue
return
raise StopIteration('Function did not finished successfully')
def subprocess_check_call(args):
# Uncomment for debugging
# print('run:', ' ' . join(args))
@ -632,16 +652,6 @@ class ClickHouseCluster:
if self.is_up:
return
# Just in case kill unstopped containers from previous launch
try:
print("Trying to kill unstopped containers...")
if not subprocess_call(['docker-compose', 'kill']):
subprocess_call(['docker-compose', 'down', '--volumes'])
print("Unstopped containers killed")
except:
pass
try:
if destroy_dirs and p.exists(self.instances_dir):
print(("Removing instances dir %s", self.instances_dir))
@ -651,9 +661,24 @@ class ClickHouseCluster:
print(('Setup directory for instance: {} destroy_dirs: {}'.format(instance.name, destroy_dirs)))
instance.create_dir(destroy_dir=destroy_dirs)
# In case of multiple cluster we should not stop compose services.
if destroy_dirs:
# Just in case kill unstopped containers from previous launch
try:
print("Trying to kill unstopped containers...")
subprocess_call(['docker-compose', 'kill'])
subprocess_call(self.base_cmd + ['down', '--volumes', '--remove-orphans'])
print("Unstopped containers killed")
except:
pass
clickhouse_pull_cmd = self.base_cmd + ['pull']
print(f"Pulling images for {self.base_cmd}")
retry_exception(10, 5, subprocess_check_call, Exception, clickhouse_pull_cmd)
self.docker_client = docker.from_env(version=self.docker_api_version)
common_opts = ['up', '-d', '--force-recreate']
common_opts = ['up', '-d']
if self.with_zookeeper and self.base_zookeeper_cmd:
print('Setup ZooKeeper')
@ -735,7 +760,7 @@ class ClickHouseCluster:
if self.with_redis and self.base_redis_cmd:
print('Setup Redis')
subprocess_check_call(self.base_redis_cmd + ['up', '-d', '--force-recreate'])
subprocess_check_call(self.base_redis_cmd + ['up', '-d'])
time.sleep(10)
if self.with_minio and self.base_minio_cmd:
@ -769,7 +794,7 @@ class ClickHouseCluster:
os.environ.pop('SSL_CERT_FILE')
if self.with_cassandra and self.base_cassandra_cmd:
subprocess_check_call(self.base_cassandra_cmd + ['up', '-d', '--force-recreate'])
subprocess_check_call(self.base_cassandra_cmd + ['up', '-d'])
self.wait_cassandra_to_start()
clickhouse_start_cmd = self.base_cmd + ['up', '-d', '--no-recreate']

View File

@ -0,0 +1,3 @@
<yandex>
<catboost_dynamic_library_path>/etc/clickhouse-server/model/libcatboostmodel.so</catboost_dynamic_library_path>
</yandex>

View File

@ -0,0 +1,2 @@
<yandex>
</yandex>

View File

@ -0,0 +1,8 @@
<models>
<model>
<type>catboost</type>
<name>model1</name>
<path>/etc/clickhouse-server/model/model.bin</path>
<lifetime>0</lifetime>
</model>
</models>

View File

@ -0,0 +1,8 @@
<models>
<model>
<type>catboost</type>
<name>model2</name>
<path>/etc/clickhouse-server/model/model.bin</path>
<lifetime>0</lifetime>
</model>
</models>

View File

@ -0,0 +1,58 @@
import os
import sys
import time
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance('node', stay_alive=True, main_configs=['config/models_config.xml', 'config/catboost_lib.xml'])
def copy_file_to_container(local_path, dist_path, container_id):
os.system("docker cp {local} {cont_id}:{dist}".format(local=local_path, cont_id=container_id, dist=dist_path))
config = '''<yandex>
<models_config>/etc/clickhouse-server/model/{model_config}</models_config>
</yandex>'''
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
copy_file_to_container(os.path.join(SCRIPT_DIR, 'model/.'), '/etc/clickhouse-server/model', node.docker_id)
node.restart_clickhouse()
yield cluster
finally:
cluster.shutdown()
def change_config(model_config):
node.replace_config("/etc/clickhouse-server/config.d/models_config.xml", config.format(model_config=model_config))
node.query("SYSTEM RELOAD CONFIG;")
def test(started_cluster):
# Set config with the path to the first model.
change_config("model_config.xml")
node.query("SELECT modelEvaluate('model1', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);")
# Change path to the second model in config.
change_config("model_config2.xml")
# Check that the new model is loaded.
node.query("SELECT modelEvaluate('model2', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);")
# Check that the old model was unloaded.
node.query_and_get_error("SELECT modelEvaluate('model1', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);")

View File

@ -0,0 +1 @@
#!/usr/bin/env python3

View File

@ -0,0 +1,42 @@
<yandex>
<keeper_server>
<tcp_port>9181</tcp_port>
<server_id>1</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<snapshot_distance>75</snapshot_distance>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<secure>true</secure>
<server>
<id>1</id>
<hostname>node1</hostname>
<port>44444</port>
<can_become_leader>true</can_become_leader>
<priority>3</priority>
</server>
<server>
<id>2</id>
<hostname>node2</hostname>
<port>44444</port>
<can_become_leader>true</can_become_leader>
<start_as_follower>true</start_as_follower>
<priority>2</priority>
</server>
<server>
<id>3</id>
<hostname>node3</hostname>
<port>44444</port>
<can_become_leader>true</can_become_leader>
<start_as_follower>true</start_as_follower>
<priority>1</priority>
</server>
</raft_configuration>
</keeper_server>
</yandex>

View File

@ -0,0 +1,42 @@
<yandex>
<keeper_server>
<tcp_port>9181</tcp_port>
<server_id>2</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<snapshot_distance>75</snapshot_distance>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<secure>true</secure>
<server>
<id>1</id>
<hostname>node1</hostname>
<port>44444</port>
<can_become_leader>true</can_become_leader>
<priority>3</priority>
</server>
<server>
<id>2</id>
<hostname>node2</hostname>
<port>44444</port>
<can_become_leader>true</can_become_leader>
<start_as_follower>true</start_as_follower>
<priority>2</priority>
</server>
<server>
<id>3</id>
<hostname>node3</hostname>
<port>44444</port>
<can_become_leader>true</can_become_leader>
<start_as_follower>true</start_as_follower>
<priority>1</priority>
</server>
</raft_configuration>
</keeper_server>
</yandex>

View File

@ -0,0 +1,42 @@
<yandex>
<keeper_server>
<tcp_port>9181</tcp_port>
<server_id>3</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<snapshot_distance>75</snapshot_distance>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<secure>true</secure>
<server>
<id>1</id>
<hostname>node1</hostname>
<port>44444</port>
<can_become_leader>true</can_become_leader>
<priority>3</priority>
</server>
<server>
<id>2</id>
<hostname>node2</hostname>
<port>44444</port>
<can_become_leader>true</can_become_leader>
<start_as_follower>true</start_as_follower>
<priority>2</priority>
</server>
<server>
<id>3</id>
<hostname>node3</hostname>
<port>44444</port>
<can_become_leader>true</can_become_leader>
<start_as_follower>true</start_as_follower>
<priority>1</priority>
</server>
</raft_configuration>
</keeper_server>
</yandex>

View File

@ -0,0 +1,21 @@
-----BEGIN CERTIFICATE-----
MIIDazCCAlOgAwIBAgIUUiyhAav08YhTLfUIXLN/0Ln09n4wDQYJKoZIhvcNAQEL
BQAwRTELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM
GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDAeFw0yMTA0MTIxMTQ1MjBaFw0yMTA1
MTIxMTQ1MjBaMEUxCzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEw
HwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQwggEiMA0GCSqGSIb3DQEB
AQUAA4IBDwAwggEKAoIBAQDK0Ww4voPlkePBPS2MsEi7e1ePS+CDxTdDuOwWWEA7
JiOyqIGqdyL6AE2EqjL3sSdVFVxytpGQWDuM6JHXdb01AnMngBuql9Jkiln7i267
v54HtMWdm8o3rik/b/mB+kkn/sP715tI49Ybh/RobtvtK16ZgHr1ombkq6rXiom2
8GmSmpYFwZtZsXtm2JwbZVayupQpWwdu3KrTXKBtVyKVvvWdgkf47DWYtWDS3vqE
cShM1H97G4DvI+4RX1WtQevQ0yCx1aFTg4xMHFkpUxlP8iW6mQaQPqy9rnI57e3L
RHc2I/B56xa43R3GmQ2S7bE4hvm1SrZDtVgrZLf4nvwNAgMBAAGjUzBRMB0GA1Ud
DgQWBBQ4+o0x1FzK7nRbcnm2pNLwaywCdzAfBgNVHSMEGDAWgBQ4+o0x1FzK7nRb
cnm2pNLwaywCdzAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4IBAQDE
YmM8MH6RKcaqMqCBefWLj0LTcZ/Wm4G/eCFC51PkAIsf7thnzViemBHRXUSF8wzc
1MBPD6II6OB1F0i7ntGjtlhnL2WcPYbo2Np59p7fo9SMbYwF49OZ40twsuKeeoAp
pfow+y/EBZqa99MY2q6FU6FDA3Rpv0Sdk+/5PHdsSP6cgeMszFBUS0tCQEvEl83n
FJUb0vjEX4x3J64XO/0DKXyCxFyF77OwHG2ZV5BeCpIhGXu+d/e221LJkGI2orKR
kgsaUwrkS8HQt3Hd0gYpLI1Opx/JlRpB0VLYLzRGj7kDpbAcTj3SMEUp/FAZmlXR
Iiebt73eE3rOWVFgyY9f
-----END CERTIFICATE-----

View File

@ -0,0 +1,19 @@
-----BEGIN CERTIFICATE-----
MIIDETCCAfkCFHL+gKBQnU0P73/nrFrGaVPauTPmMA0GCSqGSIb3DQEBCwUAMEUx
CzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRl
cm5ldCBXaWRnaXRzIFB0eSBMdGQwHhcNMjEwNDEyMTE0NzI5WhcNMjEwNTEyMTE0
NzI5WjBFMQswCQYDVQQGEwJBVTETMBEGA1UECAwKU29tZS1TdGF0ZTEhMB8GA1UE
CgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMIIBIjANBgkqhkiG9w0BAQEFAAOC
AQ8AMIIBCgKCAQEA1iPeYn1Vy4QnQi6uNVqQnFLr0u3qdrMjGEBNAOuGmtIdhIn8
rMCzaehNr3y2YTMRbZAqmv28P/wOXpzR1uQaFlQzTOjmsn/HOZ9JX2hv5sBUv7SU
UiPJS7UtptKDPbLv3N/v1dOXbY+vVyzo8U1Q9OS1J5yhYW6KtxP++hfSrOsFu669
d1pqWFWaNBsmf0zF+ETvi6lywhyTFA1/PazcStP5GntcDL7eDvGq+DDsRC40oRpy
S4xRQRSteCTtGGmWpx+Jmt+90wFnLgruUbWT0veCoLxLvz0tJUk3ueUVnMkrxBQG
Fz+IWm+SQppNU5LlAcBcu9wJfo3h34BXp0NFNQIDAQABMA0GCSqGSIb3DQEBCwUA
A4IBAQCUnvQsv+GsPwGnIWqH9iiFVhgDx5QbSTW94Fyqk8dcIJBzWAiCshmLBWPJ
pfy4y2nxJbzovFsd9DA49pxqqILeLjue99yma2DVKeo+XDLDN3OX5faIMTBd7AnL
0MKqW7gUSLRUZrNOvFciAY8xRezgBQQBo4mcmmMbAbk5wKndGY6ZZOcY+JwXlqGB
5hyi6ishO8ciiZi3GMFNWWk9ViSfo27IqjKdSkQq1pr3FULvepd6SkdX+NvfZTAH
rG+CSoFGiJcOBbhDkvpY32cAJEnJOA1vHpFxfnGP8/1haeVZHqSwH1cySD78HVtF
fBs000wGHzBYWNI2KkwjNtYf06P4
-----END CERTIFICATE-----

View File

@ -0,0 +1,27 @@
-----BEGIN RSA PRIVATE KEY-----
MIIEowIBAAKCAQEA1iPeYn1Vy4QnQi6uNVqQnFLr0u3qdrMjGEBNAOuGmtIdhIn8
rMCzaehNr3y2YTMRbZAqmv28P/wOXpzR1uQaFlQzTOjmsn/HOZ9JX2hv5sBUv7SU
UiPJS7UtptKDPbLv3N/v1dOXbY+vVyzo8U1Q9OS1J5yhYW6KtxP++hfSrOsFu669
d1pqWFWaNBsmf0zF+ETvi6lywhyTFA1/PazcStP5GntcDL7eDvGq+DDsRC40oRpy
S4xRQRSteCTtGGmWpx+Jmt+90wFnLgruUbWT0veCoLxLvz0tJUk3ueUVnMkrxBQG
Fz+IWm+SQppNU5LlAcBcu9wJfo3h34BXp0NFNQIDAQABAoIBAHYDso2o8V2F6XTp
8QxqawQcFudaQztDonW9CjMVmks8vRPMUDqMwNP/OMEcBA8xa8tsBm8Ao3zH1suB
tYuujkn8AYHDYVDCZvN0u6UfE3yiRpKYXJ2gJ1HX+d7UaYvZT6P0rmKzh+LTqxhq
Ib7Kk3FDkirQgYgGueAH3x/JfUvaAGvFrq+HvvlhHOs7M7iFU4nJA8jNfBolpTnG
v5MMI+f8/GHGreVICJUoclE+4V/4LDHUlrc3l1kQk0keeD6ECw/pl48TNL6ncXKu
baez1rfKbMPjhLUy2q5UZa93oW+olchEOXs1nUNKUhIOOr0f0YweYhUHNTineVM9
yTecMIkCgYEA7CFQMyeLVeBA6C9AHBe8Zf/k64cMPyr0lUz6548ulil580PNPbvW
kd2vIKfUMgCO5lMA47ArL4bXZ7cjTvJmPYE1Yv8z+F0Tk03fnTrudHOSBEiGXAu3
MPTxCDU7Se5Dwj0Fq81aFRtCHl8Rrss+WiBD8eRoxb/vwXKFc6VUAWMCgYEA6CjZ
XrZz11lySBhjkyVXcdLj89hDZ+bPxA7b3VB7TfCxsn5xVck7U3TFkg5Z9XwEQ7Ob
XFAPuwT9GKm7QPp6L8T2RltoJ3ys40UH1RtcNLz2aIo/xSP7lopPdAfWHef5r4y9
kRw+Gh4NP/l5wefXsRz/D0jY3+t+QnwnhuCKbocCgYEAiR6bPOlkvzyXVH1DxEyA
Sdb8b00f7nqaRyzJsrfxvJ9fQsWHpKa0ZkYOUW9ECLlMQjHHHXEK0vGBmqe9qDWY
63RhtRgvbLVYDb018k7rc9I846Hd7AudmJ9UbIjE4hyrWlsnNOntur32ej6IvTEn
Bx0fd5NEyDi6GGLRXiOOkbMCgYAressLE/yqDlR68CZl/o5cAPU0TAKDyRSMUYQX
9OTC+hstpMSxHlkADlSaQBnVAf8CdvbX2R65FfwYzGEHkGGl5KuDDcd57b2rathG
rzMbpXA4r/u1fkG2Nf0fbABL5ZA7so4mSTXQSmSM4LpO+I7K2vVh9XC4rzAcX4g/
mHoUrQKBgBf3rxp5h9P3HWoZYjzBDo2FqXUjKLLjE9ed5e/VqecqfHIkmueuNHlN
xifHr7lpsYu6IXkTnlK14pvLoPuwP59dCIOUYwAFz8RlH4MSUGNhYeGA8cqRrhmJ
tYk3OKExuN/+O12kUPVTy6BMH1hBdRJP+7y7lapWsRhZt18y+8Za
-----END RSA PRIVATE KEY-----

View File

@ -0,0 +1,15 @@
<yandex>
<openSSL>
<server>
<certificateFile>/etc/clickhouse-server/config.d/server.crt</certificateFile>
<privateKeyFile>/etc/clickhouse-server/config.d/server.key</privateKeyFile>
<caConfig>/etc/clickhouse-server/config.d/rootCA.pem</caConfig>
<loadDefaultCAFile>true</loadDefaultCAFile>
<verificationMode>none</verificationMode>
<cacheSessions>true</cacheSessions>
<disableProtocols>sslv2,sslv3</disableProtocols>
<preferServerCiphers>true</preferServerCiphers>
</server>
</openSSL>
</yandex>

View File

@ -0,0 +1,58 @@
#!/usr/bin/env python3
import pytest
from helpers.cluster import ClickHouseCluster
import random
import string
import os
import time
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', main_configs=['configs/enable_secure_keeper1.xml', 'configs/ssl_conf.xml', 'configs/server.crt', 'configs/server.key', 'configs/rootCA.pem'])
node2 = cluster.add_instance('node2', main_configs=['configs/enable_secure_keeper2.xml', 'configs/ssl_conf.xml', 'configs/server.crt', 'configs/server.key', 'configs/rootCA.pem'])
node3 = cluster.add_instance('node3', main_configs=['configs/enable_secure_keeper3.xml', 'configs/ssl_conf.xml', 'configs/server.crt', 'configs/server.key', 'configs/rootCA.pem'])
from kazoo.client import KazooClient, KazooState
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def get_fake_zk(nodename, timeout=30.0):
_fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout)
def reset_listener(state):
nonlocal _fake_zk_instance
print("Fake zk callback called for state", state)
if state != KazooState.CONNECTED:
_fake_zk_instance._reset()
_fake_zk_instance.add_listener(reset_listener)
_fake_zk_instance.start()
return _fake_zk_instance
def test_secure_raft_works(started_cluster):
try:
node1_zk = get_fake_zk("node1")
node2_zk = get_fake_zk("node2")
node3_zk = get_fake_zk("node3")
node1_zk.create("/test_node", b"somedata1")
node2_zk.sync("/test_node")
node3_zk.sync("/test_node")
assert node1_zk.exists("/test_node") is not None
assert node2_zk.exists("/test_node") is not None
assert node3_zk.exists("/test_node") is not None
finally:
try:
for zk_conn in [node1_zk, node2_zk, node3_zk]:
zk_conn.stop()
zk_conn.close()
except:
pass

View File

@ -0,0 +1 @@
#!/usr/bin/env python3

Some files were not shown because too many files have changed in this diff Show More