diff --git a/.gitmodules b/.gitmodules index e6cc7114ed3..f7dcf5f4ac1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -221,6 +221,9 @@ [submodule "contrib/NuRaft"] path = contrib/NuRaft url = https://github.com/ClickHouse-Extras/NuRaft.git +[submodule "contrib/nanodbc"] + path = contrib/nanodbc + url = https://github.com/ClickHouse-Extras/nanodbc.git [submodule "contrib/datasketches-cpp"] path = contrib/datasketches-cpp url = https://github.com/ClickHouse-Extras/datasketches-cpp.git diff --git a/CHANGELOG.md b/CHANGELOG.md index 77c80715f35..0f895c7c482 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ #### New Feature * Extended range of `DateTime64` to support dates from year 1925 to 2283. Improved support of `DateTime` around zero date (`1970-01-01`). [#9404](https://github.com/ClickHouse/ClickHouse/pull/9404) ([alexey-milovidov](https://github.com/alexey-milovidov), [Vasily Nemkov](https://github.com/Enmk)). Not every time and date functions are working for extended range of dates. +* Added support of Kerberos authentication for preconfigured users and HTTP requests (GSS-SPNEGO). [#14995](https://github.com/ClickHouse/ClickHouse/pull/14995) ([Denis Glazachev](https://github.com/traceon)). * Add `prefer_column_name_to_alias` setting to use original column names instead of aliases. it is needed to be more compatible with common databases' aliasing rules. This is for [#9715](https://github.com/ClickHouse/ClickHouse/issues/9715) and [#9887](https://github.com/ClickHouse/ClickHouse/issues/9887). [#22044](https://github.com/ClickHouse/ClickHouse/pull/22044) ([Amos Bird](https://github.com/amosbird)). * Added functions `dictGetChildren(dictionary, key)`, `dictGetDescendants(dictionary, key, level)`. Function `dictGetChildren` return all children as an array if indexes. It is a inverse transformation for `dictGetHierarchy`. Function `dictGetDescendants` return all descendants as if `dictGetChildren` was applied `level` times recursively. Zero `level` value is equivalent to infinity. Closes [#14656](https://github.com/ClickHouse/ClickHouse/issues/14656). [#22096](https://github.com/ClickHouse/ClickHouse/pull/22096) ([Maksim Kita](https://github.com/kitaisreal)). * Added `executable_pool` dictionary source. Close [#14528](https://github.com/ClickHouse/ClickHouse/issues/14528). [#21321](https://github.com/ClickHouse/ClickHouse/pull/21321) ([Maksim Kita](https://github.com/kitaisreal)). diff --git a/CMakeLists.txt b/CMakeLists.txt index 0d385f704ee..736a6577660 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -512,6 +512,7 @@ include (cmake/find/fastops.cmake) include (cmake/find/odbc.cmake) include (cmake/find/rocksdb.cmake) include (cmake/find/libpqxx.cmake) +include (cmake/find/nanodbc.cmake) include (cmake/find/nuraft.cmake) diff --git a/cmake/find/nanodbc.cmake b/cmake/find/nanodbc.cmake new file mode 100644 index 00000000000..2c913abb13e --- /dev/null +++ b/cmake/find/nanodbc.cmake @@ -0,0 +1,35 @@ +option(ENABLE_NANODBC "Enalbe nanodbc" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_NANODBC) + set (USE_ODBC 0) + return() +endif() + +if (NOT ENABLE_ODBC) + set (USE_NANODBC 0) + message (STATUS "Using nanodbc=${USE_NANODBC}") + return() +endif() + +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/nanodbc/CMakeLists.txt") + message (WARNING "submodule contrib/nanodbc is missing. to fix try run: \n git submodule update --init --recursive") + message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal nanodbc library") + set (USE_NANODBC 0) + return() +endif() + +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/unixodbc/include") + message (ERROR "submodule contrib/unixodbc is missing. to fix try run: \n git submodule update --init --recursive") + message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal unixodbc needed for nanodbc") + set (USE_NANODBC 0) + return() +endif() + +set (USE_NANODBC 1) + +set (NANODBC_LIBRARY nanodbc) + +set (NANODBC_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/nanodbc/nanodbce") + +message (STATUS "Using nanodbc=${USE_NANODBC}: ${NANODBC_INCLUDE_DIR} : ${NANODBC_LIBRARY}") +message (STATUS "Using unixodbc") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 8ad19b08ff1..d05177739fe 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -326,6 +326,10 @@ if (USE_LIBPQXX) add_subdirectory (libpqxx-cmake) endif() +if (USE_NANODBC) + add_subdirectory (nanodbc-cmake) +endif() + if (USE_NURAFT) add_subdirectory(nuraft-cmake) endif() diff --git a/contrib/NuRaft b/contrib/NuRaft index c35819f2c8a..d2feb5978b9 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit c35819f2c8a378d4ba88cc930c17bc20aeb875eb +Subproject commit d2feb5978b979729a07c3ca76eaa4ab94cef4ceb diff --git a/contrib/nanodbc b/contrib/nanodbc new file mode 160000 index 00000000000..9fc45967551 --- /dev/null +++ b/contrib/nanodbc @@ -0,0 +1 @@ +Subproject commit 9fc459675515d491401727ec67fca38db721f28c diff --git a/contrib/nanodbc-cmake/CMakeLists.txt b/contrib/nanodbc-cmake/CMakeLists.txt new file mode 100644 index 00000000000..5de46d52a61 --- /dev/null +++ b/contrib/nanodbc-cmake/CMakeLists.txt @@ -0,0 +1,14 @@ +set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/nanodbc) + +if (NOT TARGET unixodbc) + message(FATAL_ERROR "Configuration error: unixodbc is not a target") +endif() + +set (SRCS + ${LIBRARY_DIR}/nanodbc/nanodbc.cpp +) + +add_library(nanodbc ${SRCS}) + +target_link_libraries (nanodbc PUBLIC unixodbc) +target_include_directories (nanodbc SYSTEM PUBLIC ${LIBRARY_DIR}/) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index c21a115289d..14c6ee0d337 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -300,6 +300,7 @@ function run_tests 01663_aes_msan # Depends on OpenSSL 01667_aes_args_check # Depends on OpenSSL 01776_decrypt_aead_size_check # Depends on OpenSSL + 01811_filter_by_null # Depends on OpenSSL 01281_unsucceeded_insert_select_queries_counter 01292_create_user 01294_lazy_database_concurrent diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index d6338642af6..b2470207dcc 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -58,8 +58,7 @@ ClickHouse artificially executes `INSERT` longer (adds ‘sleep’) so that the ## inactive_parts_to_throw_insert {#inactive-parts-to-throw-insert} -If the number of inactive parts in a single partition more than the `inactive_parts_to_throw_insert` value, `INSERT` is interrupted with the `Too many inactive parts (N). Parts cleaning are processing significantly slower than inserts` exception. - +If the number of inactive parts in a single partition more than the `inactive_parts_to_throw_insert` value, `INSERT` is interrupted with the "Too many inactive parts (N). Parts cleaning are processing significantly slower than inserts" exception. Possible values: diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index b64fd528f98..6a1b25982a0 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1565,6 +1565,17 @@ Possible values: Default value: 0 +## optimize_skip_unused_shards_rewrite_in {#optimize-skip-unused-shardslrewrite-in} + +Rewrite IN in query for remote shards to exclude values that does not belong to the shard (requires optimize_skip_unused_shards). + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 1 (since it requires `optimize_skip_unused_shards` anyway, which `0` by default) + ## allow_nondeterministic_optimize_skip_unused_shards {#allow-nondeterministic-optimize-skip-unused-shards} Allow nondeterministic (like `rand` or `dictGet`, since later has some caveats with updates) functions in sharding key. diff --git a/docs/ru/operations/settings/merge-tree-settings.md b/docs/ru/operations/settings/merge-tree-settings.md index bfc0b0a2644..f9093d379e3 100644 --- a/docs/ru/operations/settings/merge-tree-settings.md +++ b/docs/ru/operations/settings/merge-tree-settings.md @@ -55,6 +55,26 @@ Eсли число кусков в партиции превышает знач ClickHouse искусственно выполняет `INSERT` дольше (добавляет ‘sleep’), чтобы фоновый механизм слияния успевал слиять куски быстрее, чем они добавляются. +## inactive_parts_to_throw_insert {#inactive-parts-to-throw-insert} + +Если число неактивных кусков в партиции превышает значение `inactive_parts_to_throw_insert`, `INSERT` прерывается с исключением «Too many inactive parts (N). Parts cleaning are processing significantly slower than inserts». + +Возможные значения: + +- Положительное целое число. + +Значение по умолчанию: 0 (не ограничено). + +## inactive_parts_to_delay_insert {#inactive-parts-to-delay-insert} + +Если число неактивных кусков в партиции больше или равно значению `inactive_parts_to_delay_insert`, `INSERT` искусственно замедляется. Это полезно, когда сервер не может быстро очистить неактивные куски. + +Возможные значения: + +- Положительное целое число. + +Значение по умолчанию: 0 (не ограничено). + ## max_delay_to_insert {#max-delay-to-insert} Величина в секундах, которая используется для расчета задержки `INSERT`, если число кусков в партиции превышает значение [parts_to_delay_insert](#parts-to-delay-insert). diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index 0948153362b..33a64f6bf26 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -15,10 +15,12 @@ - [`groupBitOr`](../../sql-reference/aggregate-functions/reference/groupbitor.md#groupbitor) - [`groupBitXor`](../../sql-reference/aggregate-functions/reference/groupbitxor.md#groupbitxor) - [`groupArrayArray`](../../sql-reference/aggregate-functions/reference/grouparray.md#agg_function-grouparray) -- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference/groupuniqarray.md#groupuniqarray) +- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference/groupuniqarray.md) - [`sumMap`](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap) - [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap) - [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap) +- [`argMin`](../../sql-reference/aggregate-functions/reference/argmin.md) +- [`argMax`](../../sql-reference/aggregate-functions/reference/argmax.md) !!! note "Примечание" Значения `SimpleAggregateFunction(func, Type)` отображаются и хранятся так же, как и `Type`, поэтому комбинаторы [-Merge](../../sql-reference/aggregate-functions/combinators.md#aggregate_functions_combinators-merge) и [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state) не требуются. diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index a9aa5b4f366..5ceff47ee0c 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -17,7 +17,6 @@ add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES}) target_link_libraries(clickhouse-library-bridge PRIVATE daemon dbms - clickhouse_parsers bridge ) diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index c383d09767c..7b232f2b5dc 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -26,11 +26,12 @@ target_link_libraries(clickhouse-odbc-bridge PRIVATE dbms bridge clickhouse_parsers - Poco::Data - Poco::Data::ODBC + nanodbc + unixodbc ) set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) +target_compile_options (clickhouse-odbc-bridge PRIVATE -Wno-reserved-id-macro -Wno-keyword-macro) if (USE_GDB_ADD_INDEX) add_custom_command(TARGET clickhouse-odbc-bridge POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} ../clickhouse-odbc-bridge COMMENT "Adding .gdb-index to clickhouse-odbc-bridge" VERBATIM) diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp index 2e0f5f87413..e33858583c2 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -2,29 +2,36 @@ #if USE_ODBC -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include "getIdentifierQuote.h" -# include "validateODBCConnectionString.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "getIdentifierQuote.h" +#include "validateODBCConnectionString.h" +#include "ODBCConnectionFactory.h" + +#include +#include -# define POCO_SQL_ODBC_CLASS Poco::Data::ODBC namespace DB { + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; +} + namespace { DataTypePtr getDataType(SQLSMALLINT type) @@ -59,6 +66,7 @@ namespace } } + void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { HTMLForm params(request, request.getStream()); @@ -77,88 +85,79 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ process_error("No 'table' param in request URL"); return; } + if (!params.has("connection_string")) { process_error("No 'connection_string' in request URL"); return; } + std::string schema_name; std::string table_name = params.get("table"); std::string connection_string = params.get("connection_string"); if (params.has("schema")) - { schema_name = params.get("schema"); - LOG_TRACE(log, "Will fetch info for table '{}'", schema_name + "." + table_name); - } - else - LOG_TRACE(log, "Will fetch info for table '{}'", table_name); + LOG_TRACE(log, "Got connection str '{}'", connection_string); try { const bool external_table_functions_use_nulls = Poco::NumberParser::parseBool(params.get("external_table_functions_use_nulls", "false")); - POCO_SQL_ODBC_CLASS::SessionImpl session(validateODBCConnectionString(connection_string), DBMS_DEFAULT_CONNECT_TIMEOUT_SEC); - SQLHDBC hdbc = session.dbc().handle(); + auto connection = ODBCConnectionFactory::instance().get( + validateODBCConnectionString(connection_string), + getContext()->getSettingsRef().odbc_bridge_connection_pool_size); - SQLHSTMT hstmt = nullptr; + nanodbc::catalog catalog(*connection); + std::string catalog_name; - if (POCO_SQL_ODBC_CLASS::Utility::isError(SQLAllocStmt(hdbc, &hstmt))) - throw POCO_SQL_ODBC_CLASS::ODBCException("Could not allocate connection handle."); - - SCOPE_EXIT(SQLFreeStmt(hstmt, SQL_DROP)); - - const auto & context_settings = getContext()->getSettingsRef(); - - /// TODO Why not do SQLColumns instead? - std::string name = schema_name.empty() ? backQuoteIfNeed(table_name) : backQuoteIfNeed(schema_name) + "." + backQuoteIfNeed(table_name); - WriteBufferFromOwnString buf; - std::string input = "SELECT * FROM " + name + " WHERE 1 = 0"; - ParserQueryWithOutput parser(input.data() + input.size()); - ASTPtr select = parseQuery(parser, input.data(), input.data() + input.size(), "", context_settings.max_query_size, context_settings.max_parser_depth); - - IAST::FormatSettings settings(buf, true); - settings.always_quote_identifiers = true; - settings.identifier_quoting_style = getQuotingStyle(hdbc); - select->format(settings); - std::string query = buf.str(); - - LOG_TRACE(log, "Inferring structure with query '{}'", query); - - if (POCO_SQL_ODBC_CLASS::Utility::isError(POCO_SQL_ODBC_CLASS::SQLPrepare(hstmt, reinterpret_cast(query.data()), query.size()))) - throw POCO_SQL_ODBC_CLASS::DescriptorException(session.dbc()); - - if (POCO_SQL_ODBC_CLASS::Utility::isError(SQLExecute(hstmt))) - throw POCO_SQL_ODBC_CLASS::StatementException(hstmt); - - SQLSMALLINT cols = 0; - if (POCO_SQL_ODBC_CLASS::Utility::isError(SQLNumResultCols(hstmt, &cols))) - throw POCO_SQL_ODBC_CLASS::StatementException(hstmt); - - /// TODO cols not checked - - NamesAndTypesList columns; - for (SQLSMALLINT ncol = 1; ncol <= cols; ++ncol) + /// In XDBC tables it is allowed to pass either database_name or schema_name in table definion, but not both of them. + /// They both are passed as 'schema' parameter in request URL, so it is not clear whether it is database_name or schema_name passed. + /// If it is schema_name then we know that database is added in odbc.ini. But if we have database_name as 'schema', + /// it is not guaranteed. For nanodbc database_name must be either in odbc.ini or passed as catalog_name. + auto get_columns = [&]() { - SQLSMALLINT type = 0; - /// TODO Why 301? - SQLCHAR column_name[301]; - - SQLSMALLINT is_nullable; - const auto result = POCO_SQL_ODBC_CLASS::SQLDescribeCol(hstmt, ncol, column_name, sizeof(column_name), nullptr, &type, nullptr, nullptr, &is_nullable); - if (POCO_SQL_ODBC_CLASS::Utility::isError(result)) - throw POCO_SQL_ODBC_CLASS::StatementException(hstmt); - - auto column_type = getDataType(type); - if (external_table_functions_use_nulls && is_nullable == SQL_NULLABLE) + nanodbc::catalog::tables tables = catalog.find_tables(table_name, /* type = */ "", /* schema = */ "", /* catalog = */ schema_name); + if (tables.next()) { - column_type = std::make_shared(column_type); + catalog_name = tables.table_catalog(); + LOG_TRACE(log, "Will fetch info for table '{}.{}'", catalog_name, table_name); + return catalog.find_columns(/* column = */ "", table_name, /* schema = */ "", catalog_name); } - columns.emplace_back(reinterpret_cast(column_name), std::move(column_type)); + tables = catalog.find_tables(table_name, /* type = */ "", /* schema = */ schema_name); + if (tables.next()) + { + catalog_name = tables.table_catalog(); + LOG_TRACE(log, "Will fetch info for table '{}.{}.{}'", catalog_name, schema_name, table_name); + return catalog.find_columns(/* column = */ "", table_name, schema_name, catalog_name); + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table {} not found", schema_name.empty() ? table_name : schema_name + '.' + table_name); + }; + + nanodbc::catalog::columns columns_definition = get_columns(); + + NamesAndTypesList columns; + while (columns_definition.next()) + { + SQLSMALLINT type = columns_definition.sql_data_type(); + std::string column_name = columns_definition.column_name(); + + bool is_nullable = columns_definition.nullable() == SQL_NULLABLE; + + auto column_type = getDataType(type); + + if (external_table_functions_use_nulls && is_nullable == SQL_NULLABLE) + column_type = std::make_shared(column_type); + + columns.emplace_back(column_name, std::move(column_type)); } + if (columns.empty()) + throw Exception("Columns definition was not returned", ErrorCodes::LOGICAL_ERROR); + WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); try { diff --git a/programs/odbc-bridge/ColumnInfoHandler.h b/programs/odbc-bridge/ColumnInfoHandler.h index b4143c289a8..bc976f54aee 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.h +++ b/programs/odbc-bridge/ColumnInfoHandler.h @@ -2,16 +2,13 @@ #if USE_ODBC -# include -# include -# include +#include +#include +#include +#include +#include -# include -/** The structure of the table is taken from the query "SELECT * FROM table WHERE 1=0". - * TODO: It would be much better to utilize ODBC methods dedicated for columns description. - * If there is no such table, an exception is thrown. - */ namespace DB { @@ -19,7 +16,9 @@ class ODBCColumnsInfoHandler : public HTTPRequestHandler, WithContext { public: ODBCColumnsInfoHandler(size_t keep_alive_timeout_, ContextPtr context_) - : WithContext(context_), log(&Poco::Logger::get("ODBCColumnsInfoHandler")), keep_alive_timeout(keep_alive_timeout_) + : WithContext(context_) + , log(&Poco::Logger::get("ODBCColumnsInfoHandler")) + , keep_alive_timeout(keep_alive_timeout_) { } diff --git a/programs/odbc-bridge/HandlerFactory.cpp b/programs/odbc-bridge/HandlerFactory.cpp index 38ddd75d553..49984453d33 100644 --- a/programs/odbc-bridge/HandlerFactory.cpp +++ b/programs/odbc-bridge/HandlerFactory.cpp @@ -38,9 +38,9 @@ std::unique_ptr ODBCBridgeHandlerFactory::createRequestHandl return nullptr; #endif else if (uri.getPath() == "/write") - return std::make_unique(pool_map, keep_alive_timeout, getContext(), "write"); + return std::make_unique(keep_alive_timeout, getContext(), "write"); else - return std::make_unique(pool_map, keep_alive_timeout, getContext(), "read"); + return std::make_unique(keep_alive_timeout, getContext(), "read"); } return nullptr; } diff --git a/programs/odbc-bridge/HandlerFactory.h b/programs/odbc-bridge/HandlerFactory.h index 59ec88e053c..ffbbe3670af 100644 --- a/programs/odbc-bridge/HandlerFactory.h +++ b/programs/odbc-bridge/HandlerFactory.h @@ -6,14 +6,8 @@ #include "IdentifierQuoteHandler.h" #include "MainHandler.h" #include "SchemaAllowedHandler.h" - #include -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-parameter" -#include -#pragma GCC diagnostic pop - namespace DB { @@ -24,9 +18,11 @@ class ODBCBridgeHandlerFactory : public HTTPRequestHandlerFactory, WithContext { public: ODBCBridgeHandlerFactory(const std::string & name_, size_t keep_alive_timeout_, ContextPtr context_) - : WithContext(context_), log(&Poco::Logger::get(name_)), name(name_), keep_alive_timeout(keep_alive_timeout_) + : WithContext(context_) + , log(&Poco::Logger::get(name_)) + , name(name_) + , keep_alive_timeout(keep_alive_timeout_) { - pool_map = std::make_shared(); } std::unique_ptr createRequestHandler(const HTTPServerRequest & request) override; @@ -35,7 +31,6 @@ private: Poco::Logger * log; std::string name; size_t keep_alive_timeout; - std::shared_ptr pool_map; }; } diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.cpp b/programs/odbc-bridge/IdentifierQuoteHandler.cpp index 5060d37c479..a5a97cb8086 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.cpp +++ b/programs/odbc-bridge/IdentifierQuoteHandler.cpp @@ -2,23 +2,20 @@ #if USE_ODBC -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include "getIdentifierQuote.h" -# include "validateODBCConnectionString.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "getIdentifierQuote.h" +#include "validateODBCConnectionString.h" +#include "ODBCConnectionFactory.h" -# define POCO_SQL_ODBC_CLASS Poco::Data::ODBC namespace DB { @@ -44,10 +41,12 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ try { std::string connection_string = params.get("connection_string"); - POCO_SQL_ODBC_CLASS::SessionImpl session(validateODBCConnectionString(connection_string), DBMS_DEFAULT_CONNECT_TIMEOUT_SEC); - SQLHDBC hdbc = session.dbc().handle(); - auto identifier = getIdentifierQuote(hdbc); + auto connection = ODBCConnectionFactory::instance().get( + validateODBCConnectionString(connection_string), + getContext()->getSettingsRef().odbc_bridge_connection_pool_size); + + auto identifier = getIdentifierQuote(*connection); WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); try diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.h b/programs/odbc-bridge/IdentifierQuoteHandler.h index d851abc83ec..ef3806fd802 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.h +++ b/programs/odbc-bridge/IdentifierQuoteHandler.h @@ -11,11 +11,13 @@ namespace DB { -class IdentifierQuoteHandler : public HTTPRequestHandler +class IdentifierQuoteHandler : public HTTPRequestHandler, WithContext { public: - IdentifierQuoteHandler(size_t keep_alive_timeout_, ContextPtr) - : log(&Poco::Logger::get("IdentifierQuoteHandler")), keep_alive_timeout(keep_alive_timeout_) + IdentifierQuoteHandler(size_t keep_alive_timeout_, ContextPtr context_) + : WithContext(context_) + , log(&Poco::Logger::get("IdentifierQuoteHandler")) + , keep_alive_timeout(keep_alive_timeout_) { } diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp index 679dfa70c45..24bcaf63c69 100644 --- a/programs/odbc-bridge/MainHandler.cpp +++ b/programs/odbc-bridge/MainHandler.cpp @@ -18,18 +18,17 @@ #include #include #include +#include "ODBCConnectionFactory.h" #include #include +#include -#if USE_ODBC -#include -#define POCO_SQL_ODBC_CLASS Poco::Data::ODBC -#endif namespace DB { + namespace { std::unique_ptr parseColumns(std::string && column_string) @@ -42,37 +41,6 @@ namespace } } -using PocoSessionPoolConstructor = std::function()>; -/** Is used to adjust max size of default Poco thread pool. See issue #750 - * Acquire the lock, resize pool and construct new Session. - */ -static std::shared_ptr createAndCheckResizePocoSessionPool(PocoSessionPoolConstructor pool_constr) -{ - static std::mutex mutex; - - Poco::ThreadPool & pool = Poco::ThreadPool::defaultPool(); - - /// NOTE: The lock don't guarantee that external users of the pool don't change its capacity - std::unique_lock lock(mutex); - - if (pool.available() == 0) - pool.addCapacity(2 * std::max(pool.capacity(), 1)); - - return pool_constr(); -} - -ODBCHandler::PoolPtr ODBCHandler::getPool(const std::string & connection_str) -{ - std::lock_guard lock(mutex); - if (!pool_map->count(connection_str)) - { - pool_map->emplace(connection_str, createAndCheckResizePocoSessionPool([connection_str] - { - return std::make_shared("ODBC", validateODBCConnectionString(connection_str)); - })); - } - return pool_map->at(connection_str); -} void ODBCHandler::processError(HTTPServerResponse & response, const std::string & message) { @@ -82,6 +50,7 @@ void ODBCHandler::processError(HTTPServerResponse & response, const std::string LOG_WARNING(log, message); } + void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { HTMLForm params(request); @@ -141,6 +110,10 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse try { + auto connection = ODBCConnectionFactory::instance().get( + validateODBCConnectionString(connection_string), + getContext()->getSettingsRef().odbc_bridge_connection_pool_size); + if (mode == "write") { if (!params.has("db_name")) @@ -159,15 +132,12 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse auto quoting_style = IdentifierQuotingStyle::None; #if USE_ODBC - POCO_SQL_ODBC_CLASS::SessionImpl session(validateODBCConnectionString(connection_string), DBMS_DEFAULT_CONNECT_TIMEOUT_SEC); - quoting_style = getQuotingStyle(session.dbc().handle()); + quoting_style = getQuotingStyle(*connection); #endif - - auto pool = getPool(connection_string); auto & read_buf = request.getStream(); auto input_format = FormatFactory::instance().getInput(format, read_buf, *sample_block, getContext(), max_block_size); auto input_stream = std::make_shared(input_format); - ODBCBlockOutputStream output_stream(pool->get(), db_name, table_name, *sample_block, quoting_style); + ODBCBlockOutputStream output_stream(*connection, db_name, table_name, *sample_block, getContext(), quoting_style); copyData(*input_stream, output_stream); writeStringBinary("Ok.", out); } @@ -176,10 +146,8 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse std::string query = params.get("query"); LOG_TRACE(log, "Query: {}", query); - BlockOutputStreamPtr writer - = FormatFactory::instance().getOutputStreamParallelIfPossible(format, out, *sample_block, getContext()); - auto pool = getPool(connection_string); - ODBCBlockInputStream inp(pool->get(), query, *sample_block, max_block_size); + BlockOutputStreamPtr writer = FormatFactory::instance().getOutputStreamParallelIfPossible(format, out, *sample_block, getContext()); + ODBCBlockInputStream inp(*connection, query, *sample_block, max_block_size); copyData(inp, *writer); } } diff --git a/programs/odbc-bridge/MainHandler.h b/programs/odbc-bridge/MainHandler.h index da6cb5c51b6..bc0fca8b9a5 100644 --- a/programs/odbc-bridge/MainHandler.h +++ b/programs/odbc-bridge/MainHandler.h @@ -2,13 +2,8 @@ #include #include - #include -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-parameter" -#include -#pragma GCC diagnostic pop #include #include @@ -24,16 +19,12 @@ namespace DB class ODBCHandler : public HTTPRequestHandler, WithContext { public: - using PoolPtr = std::shared_ptr; - using PoolMap = std::unordered_map; - - ODBCHandler(std::shared_ptr pool_map_, + ODBCHandler( size_t keep_alive_timeout_, ContextPtr context_, const String & mode_) : WithContext(context_) , log(&Poco::Logger::get("ODBCHandler")) - , pool_map(pool_map_) , keep_alive_timeout(keep_alive_timeout_) , mode(mode_) { @@ -44,13 +35,11 @@ public: private: Poco::Logger * log; - std::shared_ptr pool_map; size_t keep_alive_timeout; String mode; static inline std::mutex mutex; - PoolPtr getPool(const std::string & connection_str); void processError(HTTPServerResponse & response, const std::string & message); }; diff --git a/programs/odbc-bridge/ODBCBlockInputStream.cpp b/programs/odbc-bridge/ODBCBlockInputStream.cpp index b8a4209ac94..3a73cb9f601 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockInputStream.cpp @@ -1,5 +1,7 @@ #include "ODBCBlockInputStream.h" #include +#include +#include #include #include #include @@ -14,137 +16,143 @@ namespace DB { namespace ErrorCodes { - extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; extern const int UNKNOWN_TYPE; } ODBCBlockInputStream::ODBCBlockInputStream( - Poco::Data::Session && session_, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_) - : session{session_} - , statement{(this->session << query_str, Poco::Data::Keywords::now)} - , result{statement} - , iterator{result.begin()} + nanodbc::connection & connection_, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_) + : log(&Poco::Logger::get("ODBCBlockInputStream")) , max_block_size{max_block_size_} - , log(&Poco::Logger::get("ODBCBlockInputStream")) + , connection(connection_) + , query(query_str) { - if (sample_block.columns() != result.columnCount()) - throw Exception{"RecordSet contains " + toString(result.columnCount()) + " columns while " + toString(sample_block.columns()) - + " expected", - ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH}; - description.init(sample_block); -} - - -namespace -{ - using ValueType = ExternalResultDescription::ValueType; - - void insertValue(IColumn & column, const ValueType type, const Poco::Dynamic::Var & value) - { - switch (type) - { - case ValueType::vtUInt8: - assert_cast(column).insertValue(value.convert()); - break; - case ValueType::vtUInt16: - assert_cast(column).insertValue(value.convert()); - break; - case ValueType::vtUInt32: - assert_cast(column).insertValue(value.convert()); - break; - case ValueType::vtUInt64: - assert_cast(column).insertValue(value.convert()); - break; - case ValueType::vtInt8: - assert_cast(column).insertValue(value.convert()); - break; - case ValueType::vtInt16: - assert_cast(column).insertValue(value.convert()); - break; - case ValueType::vtInt32: - assert_cast(column).insertValue(value.convert()); - break; - case ValueType::vtInt64: - assert_cast(column).insertValue(value.convert()); - break; - case ValueType::vtFloat32: - assert_cast(column).insertValue(value.convert()); - break; - case ValueType::vtFloat64: - assert_cast(column).insertValue(value.convert()); - break; - case ValueType::vtString: - assert_cast(column).insert(value.convert()); - break; - case ValueType::vtDate: - { - Poco::DateTime date = value.convert(); - assert_cast(column).insertValue(UInt16{LocalDate(date.year(), date.month(), date.day()).getDayNum()}); - break; - } - case ValueType::vtDateTime: - { - Poco::DateTime datetime = value.convert(); - assert_cast(column).insertValue(DateLUT::instance().makeDateTime( - datetime.year(), datetime.month(), datetime.day(), datetime.hour(), datetime.minute(), datetime.second())); - break; - } - case ValueType::vtUUID: - assert_cast(column).insert(parse(value.convert())); - break; - default: - throw Exception("Unsupported value type", ErrorCodes::UNKNOWN_TYPE); - } - } - - void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); } + result = execute(connection, NANODBC_TEXT(query)); } Block ODBCBlockInputStream::readImpl() { - if (iterator == result.end()) - return {}; - - MutableColumns columns(description.sample_block.columns()); - for (const auto i : ext::range(0, columns.size())) - columns[i] = description.sample_block.getByPosition(i).column->cloneEmpty(); + if (finished) + return Block(); + MutableColumns columns(description.sample_block.cloneEmptyColumns()); size_t num_rows = 0; - while (iterator != result.end()) + + while (true) { - Poco::Data::Row & row = *iterator; - - for (const auto idx : ext::range(0, row.fieldCount())) + if (!result.next()) { - /// TODO This is extremely slow. - const Poco::Dynamic::Var & value = row[idx]; + finished = true; + break; + } - if (!value.isEmpty()) + for (int idx = 0; idx < result.columns(); ++idx) + { + const auto & sample = description.sample_block.getByPosition(idx); + + if (!result.is_null(idx)) { - if (description.types[idx].second) + bool is_nullable = description.types[idx].second; + + if (is_nullable) { ColumnNullable & column_nullable = assert_cast(*columns[idx]); - insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value); + const auto & data_type = assert_cast(*sample.type); + insertValue(column_nullable.getNestedColumn(), data_type.getNestedType(), description.types[idx].first, result, idx); column_nullable.getNullMapData().emplace_back(0); } else - insertValue(*columns[idx], description.types[idx].first, value); + { + insertValue(*columns[idx], sample.type, description.types[idx].first, result, idx); + } } else - insertDefaultValue(*columns[idx], *description.sample_block.getByPosition(idx).column); + insertDefaultValue(*columns[idx], *sample.column); } - ++iterator; - - ++num_rows; - if (num_rows == max_block_size) + if (++num_rows == max_block_size) break; } return description.sample_block.cloneWithColumns(std::move(columns)); } + +void ODBCBlockInputStream::insertValue( + IColumn & column, const DataTypePtr data_type, const ValueType type, nanodbc::result & row, size_t idx) +{ + switch (type) + { + case ValueType::vtUInt8: + assert_cast(column).insertValue(row.get(idx)); + break; + case ValueType::vtUInt16: + assert_cast(column).insertValue(row.get(idx)); + break; + case ValueType::vtUInt32: + assert_cast(column).insertValue(row.get(idx)); + break; + case ValueType::vtUInt64: + assert_cast(column).insertValue(row.get(idx)); + break; + case ValueType::vtInt8: + assert_cast(column).insertValue(row.get(idx)); + break; + case ValueType::vtInt16: + assert_cast(column).insertValue(row.get(idx)); + break; + case ValueType::vtInt32: + assert_cast(column).insertValue(row.get(idx)); + break; + case ValueType::vtInt64: + assert_cast(column).insertValue(row.get(idx)); + break; + case ValueType::vtFloat32: + assert_cast(column).insertValue(row.get(idx)); + break; + case ValueType::vtFloat64: + assert_cast(column).insertValue(row.get(idx)); + break; + case ValueType::vtFixedString:[[fallthrough]]; + case ValueType::vtString: + assert_cast(column).insert(row.get(idx)); + break; + case ValueType::vtUUID: + { + auto value = row.get(idx); + assert_cast(column).insert(parse(value.data(), value.size())); + break; + } + case ValueType::vtDate: + assert_cast(column).insertValue(UInt16{LocalDate{row.get(idx)}.getDayNum()}); + break; + case ValueType::vtDateTime: + { + auto value = row.get(idx); + ReadBufferFromString in(value); + time_t time = 0; + readDateTimeText(time, in); + if (time < 0) + time = 0; + assert_cast(column).insertValue(time); + break; + } + case ValueType::vtDateTime64:[[fallthrough]]; + case ValueType::vtDecimal32: [[fallthrough]]; + case ValueType::vtDecimal64: [[fallthrough]]; + case ValueType::vtDecimal128: [[fallthrough]]; + case ValueType::vtDecimal256: + { + auto value = row.get(idx); + ReadBufferFromString istr(value); + data_type->getDefaultSerialization()->deserializeWholeText(column, istr, FormatSettings{}); + break; + } + default: + throw Exception("Unsupported value type", ErrorCodes::UNKNOWN_TYPE); + } +} + } diff --git a/programs/odbc-bridge/ODBCBlockInputStream.h b/programs/odbc-bridge/ODBCBlockInputStream.h index 13491e05822..bbd90ce4d6c 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.h +++ b/programs/odbc-bridge/ODBCBlockInputStream.h @@ -3,10 +3,8 @@ #include #include #include -#include -#include -#include #include +#include namespace DB @@ -15,25 +13,33 @@ namespace DB class ODBCBlockInputStream final : public IBlockInputStream { public: - ODBCBlockInputStream( - Poco::Data::Session && session_, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_); + ODBCBlockInputStream(nanodbc::connection & connection_, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_); String getName() const override { return "ODBC"; } Block getHeader() const override { return description.sample_block.cloneEmpty(); } private: + using QueryResult = std::shared_ptr; + using ValueType = ExternalResultDescription::ValueType; + Block readImpl() override; - Poco::Data::Session session; - Poco::Data::Statement statement; - Poco::Data::RecordSet result; - Poco::Data::RecordSet::Iterator iterator; + static void insertValue(IColumn & column, const DataTypePtr data_type, const ValueType type, nanodbc::result & row, size_t idx); + static void insertDefaultValue(IColumn & column, const IColumn & sample_column) + { + column.insertFrom(sample_column, 0); + } + + Poco::Logger * log; const UInt64 max_block_size; ExternalResultDescription description; - Poco::Logger * log; + nanodbc::connection & connection; + nanodbc::result result; + String query; + bool finished = false; }; } diff --git a/programs/odbc-bridge/ODBCBlockOutputStream.cpp b/programs/odbc-bridge/ODBCBlockOutputStream.cpp index db3c9441419..e4614204178 100644 --- a/programs/odbc-bridge/ODBCBlockOutputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockOutputStream.cpp @@ -8,16 +8,14 @@ #include #include #include "getIdentifierQuote.h" +#include +#include +#include namespace DB { -namespace ErrorCodes -{ - extern const int UNKNOWN_TYPE; -} - namespace { using ValueType = ExternalResultDescription::ValueType; @@ -40,69 +38,21 @@ namespace return buf.str(); } - std::string getQuestionMarks(size_t n) - { - std::string result = "("; - for (size_t i = 0; i < n; ++i) - { - if (i > 0) - result += ","; - result += "?"; - } - return result + ")"; - } - - Poco::Dynamic::Var getVarFromField(const Field & field, const ValueType type) - { - switch (type) - { - case ValueType::vtUInt8: - return Poco::Dynamic::Var(static_cast(field.get())).convert(); - case ValueType::vtUInt16: - return Poco::Dynamic::Var(static_cast(field.get())).convert(); - case ValueType::vtUInt32: - return Poco::Dynamic::Var(static_cast(field.get())).convert(); - case ValueType::vtUInt64: - return Poco::Dynamic::Var(field.get()).convert(); - case ValueType::vtInt8: - return Poco::Dynamic::Var(static_cast(field.get())).convert(); - case ValueType::vtInt16: - return Poco::Dynamic::Var(static_cast(field.get())).convert(); - case ValueType::vtInt32: - return Poco::Dynamic::Var(static_cast(field.get())).convert(); - case ValueType::vtInt64: - return Poco::Dynamic::Var(field.get()).convert(); - case ValueType::vtFloat32: - return Poco::Dynamic::Var(field.get()).convert(); - case ValueType::vtFloat64: - return Poco::Dynamic::Var(field.get()).convert(); - case ValueType::vtString: - return Poco::Dynamic::Var(field.get()).convert(); - case ValueType::vtDate: - return Poco::Dynamic::Var(LocalDate(DayNum(field.get())).toString()).convert(); - case ValueType::vtDateTime: - return Poco::Dynamic::Var(DateLUT::instance().timeToString(time_t(field.get()))).convert(); - case ValueType::vtUUID: - return Poco::Dynamic::Var(UUID(field.get()).toUnderType().toHexString()).convert(); - default: - throw Exception("Unsupported value type", ErrorCodes::UNKNOWN_TYPE); - - } - __builtin_unreachable(); - } } -ODBCBlockOutputStream::ODBCBlockOutputStream(Poco::Data::Session && session_, +ODBCBlockOutputStream::ODBCBlockOutputStream(nanodbc::connection & connection_, const std::string & remote_database_name_, const std::string & remote_table_name_, const Block & sample_block_, + ContextPtr local_context_, IdentifierQuotingStyle quoting_) - : session(session_) + : log(&Poco::Logger::get("ODBCBlockOutputStream")) + , connection(connection_) , db_name(remote_database_name_) , table_name(remote_table_name_) , sample_block(sample_block_) + , local_context(local_context_) , quoting(quoting_) - , log(&Poco::Logger::get("ODBCBlockOutputStream")) { description.init(sample_block); } @@ -114,28 +64,12 @@ Block ODBCBlockOutputStream::getHeader() const void ODBCBlockOutputStream::write(const Block & block) { - ColumnsWithTypeAndName columns; - for (size_t i = 0; i < block.columns(); ++i) - columns.push_back({block.getColumns()[i], sample_block.getDataTypes()[i], sample_block.getNames()[i]}); + WriteBufferFromOwnString values_buf; + auto writer = FormatFactory::instance().getOutputStream("Values", values_buf, sample_block, local_context); + writer->write(block); - std::vector row_to_insert(block.columns()); - Poco::Data::Statement statement(session << getInsertQuery(db_name, table_name, columns, quoting) + getQuestionMarks(block.columns())); - for (size_t i = 0; i < block.columns(); ++i) - statement.addBind(Poco::Data::Keywords::use(row_to_insert[i])); - - for (size_t i = 0; i < block.rows(); ++i) - { - for (size_t col_idx = 0; col_idx < block.columns(); ++col_idx) - { - Field val; - columns[col_idx].column->get(i, val); - if (val.isNull()) - row_to_insert[col_idx] = Poco::Dynamic::Var(); - else - row_to_insert[col_idx] = getVarFromField(val, description.types[col_idx].first); - } - statement.execute(); - } + std::string query = getInsertQuery(db_name, table_name, block.getColumnsWithTypeAndName(), quoting) + values_buf.str(); + execute(connection, query); } } diff --git a/programs/odbc-bridge/ODBCBlockOutputStream.h b/programs/odbc-bridge/ODBCBlockOutputStream.h index 39e1d6f77ac..0b13f7039b5 100644 --- a/programs/odbc-bridge/ODBCBlockOutputStream.h +++ b/programs/odbc-bridge/ODBCBlockOutputStream.h @@ -2,30 +2,41 @@ #include #include -#include #include #include +#include +#include + namespace DB { + class ODBCBlockOutputStream : public IBlockOutputStream { + public: - ODBCBlockOutputStream(Poco::Data::Session && session_, const std::string & remote_database_name_, - const std::string & remote_table_name_, const Block & sample_block_, IdentifierQuotingStyle quoting); + ODBCBlockOutputStream( + nanodbc::connection & connection_, + const std::string & remote_database_name_, + const std::string & remote_table_name_, + const Block & sample_block_, + ContextPtr local_context_, + IdentifierQuotingStyle quoting); Block getHeader() const override; void write(const Block & block) override; private: - Poco::Data::Session session; + Poco::Logger * log; + + nanodbc::connection & connection; std::string db_name; std::string table_name; Block sample_block; + ContextPtr local_context; IdentifierQuotingStyle quoting; ExternalResultDescription description; - Poco::Logger * log; }; } diff --git a/programs/odbc-bridge/ODBCConnectionFactory.h b/programs/odbc-bridge/ODBCConnectionFactory.h new file mode 100644 index 00000000000..958cf03cfce --- /dev/null +++ b/programs/odbc-bridge/ODBCConnectionFactory.h @@ -0,0 +1,82 @@ +#pragma once + +#include +#include +#include +#include +#include + + +namespace nanodbc +{ + +static constexpr inline auto ODBC_CONNECT_TIMEOUT = 100; + +using ConnectionPtr = std::shared_ptr; +using Pool = BorrowedObjectPool; +using PoolPtr = std::shared_ptr; + +class ConnectionHolder +{ + +public: + ConnectionHolder(const std::string & connection_string_, PoolPtr pool_) : connection_string(connection_string_), pool(pool_) {} + + ~ConnectionHolder() + { + if (connection) + pool->returnObject(std::move(connection)); + } + + nanodbc::connection & operator*() + { + if (!connection) + { + pool->borrowObject(connection, [&]() + { + return std::make_shared(connection_string, ODBC_CONNECT_TIMEOUT); + }); + } + + return *connection; + } + +private: + std::string connection_string; + PoolPtr pool; + ConnectionPtr connection; +}; + +} + + +namespace DB +{ + +class ODBCConnectionFactory final : private boost::noncopyable +{ +public: + static ODBCConnectionFactory & instance() + { + static ODBCConnectionFactory ret; + return ret; + } + + nanodbc::ConnectionHolder get(const std::string & connection_string, size_t pool_size) + { + std::lock_guard lock(mutex); + + if (!factory.count(connection_string)) + factory.emplace(std::make_pair(connection_string, std::make_shared(pool_size))); + + return nanodbc::ConnectionHolder(connection_string, factory[connection_string]); + } + +private: + /// [connection_settings_string] -> [connection_pool] + using PoolFactory = std::unordered_map; + PoolFactory factory; + std::mutex mutex; +}; + +} diff --git a/programs/odbc-bridge/SchemaAllowedHandler.cpp b/programs/odbc-bridge/SchemaAllowedHandler.cpp index d4a70db61f4..4cceaee962c 100644 --- a/programs/odbc-bridge/SchemaAllowedHandler.cpp +++ b/programs/odbc-bridge/SchemaAllowedHandler.cpp @@ -2,33 +2,26 @@ #if USE_ODBC -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include "validateODBCConnectionString.h" +#include +#include +#include +#include +#include +#include +#include "validateODBCConnectionString.h" +#include "ODBCConnectionFactory.h" +#include +#include -# define POCO_SQL_ODBC_CLASS Poco::Data::ODBC namespace DB { namespace { - bool isSchemaAllowed(SQLHDBC hdbc) + bool isSchemaAllowed(nanodbc::connection & connection) { - SQLUINTEGER value; - SQLSMALLINT value_length = sizeof(value); - SQLRETURN r = POCO_SQL_ODBC_CLASS::SQLGetInfo(hdbc, SQL_SCHEMA_USAGE, &value, sizeof(value), &value_length); - - if (POCO_SQL_ODBC_CLASS::Utility::isError(r)) - throw POCO_SQL_ODBC_CLASS::ConnectionException(hdbc); - - return value != 0; + uint32_t result = connection.get_info(SQL_SCHEMA_USAGE); + return result != 0; } } @@ -55,10 +48,12 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer try { std::string connection_string = params.get("connection_string"); - POCO_SQL_ODBC_CLASS::SessionImpl session(validateODBCConnectionString(connection_string), DBMS_DEFAULT_CONNECT_TIMEOUT_SEC); - SQLHDBC hdbc = session.dbc().handle(); - bool result = isSchemaAllowed(hdbc); + auto connection = ODBCConnectionFactory::instance().get( + validateODBCConnectionString(connection_string), + getContext()->getSettingsRef().odbc_bridge_connection_pool_size); + + bool result = isSchemaAllowed(*connection); WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); try diff --git a/programs/odbc-bridge/SchemaAllowedHandler.h b/programs/odbc-bridge/SchemaAllowedHandler.h index eb110f15bda..d7b922ed05b 100644 --- a/programs/odbc-bridge/SchemaAllowedHandler.h +++ b/programs/odbc-bridge/SchemaAllowedHandler.h @@ -1,22 +1,25 @@ #pragma once +#include #include - #include #if USE_ODBC + namespace DB { class Context; /// This handler establishes connection to database, and retrieves whether schema is allowed. -class SchemaAllowedHandler : public HTTPRequestHandler +class SchemaAllowedHandler : public HTTPRequestHandler, WithContext { public: - SchemaAllowedHandler(size_t keep_alive_timeout_, ContextPtr) - : log(&Poco::Logger::get("SchemaAllowedHandler")), keep_alive_timeout(keep_alive_timeout_) + SchemaAllowedHandler(size_t keep_alive_timeout_, ContextPtr context_) + : WithContext(context_) + , log(&Poco::Logger::get("SchemaAllowedHandler")) + , keep_alive_timeout(keep_alive_timeout_) { } diff --git a/programs/odbc-bridge/getIdentifierQuote.cpp b/programs/odbc-bridge/getIdentifierQuote.cpp index 15b3749d37d..d16d2a9eea0 100644 --- a/programs/odbc-bridge/getIdentifierQuote.cpp +++ b/programs/odbc-bridge/getIdentifierQuote.cpp @@ -2,11 +2,10 @@ #if USE_ODBC -# include -# include -# include - -# define POCO_SQL_ODBC_CLASS Poco::Data::ODBC +#include +#include +#include +#include namespace DB @@ -17,33 +16,16 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } -std::string getIdentifierQuote(SQLHDBC hdbc) + +std::string getIdentifierQuote(nanodbc::connection & connection) { - std::string identifier; - - SQLSMALLINT t; - SQLRETURN r = POCO_SQL_ODBC_CLASS::SQLGetInfo(hdbc, SQL_IDENTIFIER_QUOTE_CHAR, nullptr, 0, &t); - - if (POCO_SQL_ODBC_CLASS::Utility::isError(r)) - throw POCO_SQL_ODBC_CLASS::ConnectionException(hdbc); - - if (t > 0) - { - // I have no idea, why to add '2' here, got from: contrib/poco/Data/ODBC/src/ODBCStatementImpl.cpp:60 (SQL_DRIVER_NAME) - identifier.resize(static_cast(t) + 2); - - if (POCO_SQL_ODBC_CLASS::Utility::isError(POCO_SQL_ODBC_CLASS::SQLGetInfo( - hdbc, SQL_IDENTIFIER_QUOTE_CHAR, &identifier[0], SQLSMALLINT((identifier.length() - 1) * sizeof(identifier[0])), &t))) - throw POCO_SQL_ODBC_CLASS::ConnectionException(hdbc); - - identifier.resize(static_cast(t)); - } - return identifier; + return connection.get_info(SQL_IDENTIFIER_QUOTE_CHAR); } -IdentifierQuotingStyle getQuotingStyle(SQLHDBC hdbc) + +IdentifierQuotingStyle getQuotingStyle(nanodbc::connection & connection) { - auto identifier_quote = getIdentifierQuote(hdbc); + auto identifier_quote = getIdentifierQuote(connection); if (identifier_quote.length() == 0) return IdentifierQuotingStyle::None; else if (identifier_quote[0] == '`') diff --git a/programs/odbc-bridge/getIdentifierQuote.h b/programs/odbc-bridge/getIdentifierQuote.h index 0fb4c3bddb1..7f7156eff82 100644 --- a/programs/odbc-bridge/getIdentifierQuote.h +++ b/programs/odbc-bridge/getIdentifierQuote.h @@ -2,20 +2,19 @@ #if USE_ODBC -# include -# include -# include - -# include - +#include +#include +#include #include +#include + namespace DB { -std::string getIdentifierQuote(SQLHDBC hdbc); +std::string getIdentifierQuote(nanodbc::connection & connection); -IdentifierQuotingStyle getQuotingStyle(SQLHDBC hdbc); +IdentifierQuotingStyle getQuotingStyle(nanodbc::connection & connection); } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 6a22772ff82..8a96612721d 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -750,6 +750,7 @@ int Server::main(const std::vector & /*args*/) global_context->setClustersConfig(config); global_context->setMacros(std::make_unique(*config, "macros", log)); global_context->setExternalAuthenticatorsConfig(*config); + global_context->setExternalModelsConfig(config); /// Setup protection to avoid accidental DROP for big tables (that are greater than 50 GB by default) if (config->has("max_table_size_to_drop")) @@ -878,10 +879,30 @@ int Server::main(const std::vector & /*args*/) servers_to_start_before_tables->emplace_back( port_name, std::make_unique( - new KeeperTCPHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); + new KeeperTCPHandlerFactory(*this, false), server_pool, socket, new Poco::Net::TCPServerParams)); LOG_INFO(log, "Listening for connections to Keeper (tcp): {}", address.toString()); }); + + const char * secure_port_name = "keeper_server.tcp_port_secure"; + createServer(listen_host, secure_port_name, listen_try, [&](UInt16 port) + { +#if USE_SSL + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + servers_to_start_before_tables->emplace_back( + secure_port_name, + std::make_unique( + new KeeperTCPHandlerFactory(*this, true), server_pool, socket, new Poco::Net::TCPServerParams)); + LOG_INFO(log, "Listening for connections to Keeper with secure protocol (tcp_secure): {}", address.toString()); +#else + UNUSED(port); + throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + }); } #else throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination."); @@ -1302,7 +1323,7 @@ int Server::main(const std::vector & /*args*/) } /// try to load dictionaries immediately, throw on error and die - ext::scope_guard dictionaries_xmls, models_xmls; + ext::scope_guard dictionaries_xmls; try { if (!config().getBool("dictionaries_lazy_load", true)) @@ -1312,8 +1333,6 @@ int Server::main(const std::vector & /*args*/) } dictionaries_xmls = global_context->getExternalDictionariesLoader().addConfigRepository( std::make_unique(config(), "dictionaries_config")); - models_xmls = global_context->getExternalModelsLoader().addConfigRepository( - std::make_unique(config(), "models_config")); } catch (...) { diff --git a/src/Common/Config/AbstractConfigurationComparison.cpp b/src/Common/Config/AbstractConfigurationComparison.cpp index 59c0c895a89..eb677debb02 100644 --- a/src/Common/Config/AbstractConfigurationComparison.cpp +++ b/src/Common/Config/AbstractConfigurationComparison.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -31,6 +32,23 @@ bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const P return isSameConfiguration(left, key, right, key); } +bool isSameConfigurationWithMultipleKeys(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right, const String & root, const String & name) +{ + if (&left == &right) + return true; + + auto left_multiple_keys = getMultipleKeysFromConfig(left, root, name); + auto right_multiple_keys = getMultipleKeysFromConfig(right, root, name); + if (left_multiple_keys.size() != right_multiple_keys.size()) + return false; + + for (auto & key : left_multiple_keys) + if (!isSameConfiguration(left, right, concatKeyAndSubKey(root, key))) + return false; + + return true; +} + bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const String & left_key, const Poco::Util::AbstractConfiguration & right, const String & right_key) { diff --git a/src/Common/Config/AbstractConfigurationComparison.h b/src/Common/Config/AbstractConfigurationComparison.h index 795fca2af8e..6e1d8a890bb 100644 --- a/src/Common/Config/AbstractConfigurationComparison.h +++ b/src/Common/Config/AbstractConfigurationComparison.h @@ -13,6 +13,17 @@ namespace DB bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right); + /// Config may have multiple keys with one name. For example: + /// + /// ... + /// ... + /// + /// Returns true if the specified subview of the two configurations contains + /// the same keys and values for each key with the given name. + bool isSameConfigurationWithMultipleKeys(const Poco::Util::AbstractConfiguration & left, + const Poco::Util::AbstractConfiguration & right, + const String & root, const String & name); + /// Returns true if the specified subview of the two configurations contains the same keys and values. bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right, diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 478b3aa3d17..7827a25afdd 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -1,4 +1,9 @@ #include + +#if !defined(ARCADIA_BUILD) +# include "config_core.h" +#endif + #include #include #include @@ -9,6 +14,7 @@ #include #include #include +#include namespace DB { @@ -16,6 +22,42 @@ namespace DB namespace ErrorCodes { extern const int RAFT_ERROR; + extern const int NO_ELEMENTS_IN_CONFIG; + extern const int SUPPORT_IS_DISABLED; +} + +namespace +{ + +#if USE_SSL +void setSSLParams(nuraft::asio_service::options & asio_opts) +{ + const Poco::Util::LayeredConfiguration & config = Poco::Util::Application::instance().config(); + String certificate_file_property = "openSSL.server.certificateFile"; + String private_key_file_property = "openSSL.server.privateKeyFile"; + String root_ca_file_property = "openSSL.server.caConfig"; + + if (!config.has(certificate_file_property)) + throw Exception("Server certificate file is not set.", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + + if (!config.has(private_key_file_property)) + throw Exception("Server private key file is not set.", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + + asio_opts.enable_ssl_ = true; + asio_opts.server_cert_file_ = config.getString(certificate_file_property); + asio_opts.server_key_file_ = config.getString(private_key_file_property); + + if (config.has(root_ca_file_property)) + asio_opts.root_cert_file_ = config.getString(root_ca_file_property); + + if (config.getBool("openSSL.server.loadDefaultCAFile", false)) + asio_opts.load_default_ca_file_ = true; + + if (config.getString("openSSL.server.verificationMode", "none") == "none") + asio_opts.skip_verification_ = true; +} +#endif + } KeeperServer::KeeperServer( @@ -72,6 +114,15 @@ void KeeperServer::startup() params.return_method_ = nuraft::raft_params::blocking; nuraft::asio_service::options asio_opts{}; + if (state_manager->isSecure()) + { +#if USE_SSL + setSSLParams(asio_opts); +#else + throw Exception{"SSL support for NuRaft is disabled because ClickHouse was built without SSL support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + } launchRaftServer(params, asio_opts); diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index ffe81cebfab..e57ae7e7c19 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -12,6 +12,7 @@ namespace ErrorCodes KeeperStateManager::KeeperStateManager(int server_id_, const std::string & host, int port, const std::string & logs_path) : my_server_id(server_id_) , my_port(port) + , secure(false) , log_store(nuraft::cs_new(logs_path, 5000, false)) , cluster_config(nuraft::cs_new()) { @@ -25,6 +26,7 @@ KeeperStateManager::KeeperStateManager( const Poco::Util::AbstractConfiguration & config, const CoordinationSettingsPtr & coordination_settings) : my_server_id(my_server_id_) + , secure(config.getBool(config_prefix + ".raft_configuration.secure", false)) , log_store(nuraft::cs_new( config.getString(config_prefix + ".log_storage_path", config.getString("path", DBMS_DEFAULT_PATH) + "coordination/logs"), coordination_settings->rotate_log_storage_interval, coordination_settings->force_sync)) @@ -37,6 +39,9 @@ KeeperStateManager::KeeperStateManager( for (const auto & server_key : keys) { + if (!startsWith(server_key, "server")) + continue; + std::string full_prefix = config_prefix + ".raft_configuration." + server_key; int server_id = config.getInt(full_prefix + ".id"); std::string hostname = config.getString(full_prefix + ".hostname"); @@ -44,6 +49,7 @@ KeeperStateManager::KeeperStateManager( bool can_become_leader = config.getBool(full_prefix + ".can_become_leader", true); int32_t priority = config.getInt(full_prefix + ".priority", 1); bool start_as_follower = config.getBool(full_prefix + ".start_as_follower", false); + if (start_as_follower) start_as_follower_servers.insert(server_id); @@ -57,6 +63,7 @@ KeeperStateManager::KeeperStateManager( cluster_config->get_servers().push_back(peer_config); } + if (!my_server_config) throw Exception(ErrorCodes::RAFT_ERROR, "Our server id {} not found in raft_configuration section", my_server_id); diff --git a/src/Coordination/KeeperStateManager.h b/src/Coordination/KeeperStateManager.h index 708909e36f1..cb5181760cb 100644 --- a/src/Coordination/KeeperStateManager.h +++ b/src/Coordination/KeeperStateManager.h @@ -52,6 +52,11 @@ public: return start_as_follower_servers.count(my_server_id); } + bool isSecure() const + { + return secure; + } + nuraft::ptr getLogStore() const { return log_store; } uint64_t getTotalServers() const { return total_servers; } @@ -59,6 +64,7 @@ public: private: int my_server_id; int my_port; + bool secure; uint64_t total_servers{0}; std::unordered_set start_as_follower_servers; nuraft::ptr log_store; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index e5f34abbcd9..d31073ae932 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -118,6 +118,7 @@ class IColumn; M(Bool, optimize_distributed_group_by_sharding_key, false, "Optimize GROUP BY sharding_key queries (by avoiding costly aggregation on the initiator server).", 0) \ M(UInt64, optimize_skip_unused_shards_limit, 1000, "Limit for number of sharding key values, turns off optimize_skip_unused_shards if the limit is reached", 0) \ M(Bool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \ + M(Bool, optimize_skip_unused_shards_rewrite_in, true, "Rewrite IN in query for remote shards to exclude values that does not belong to the shard (requires optimize_skip_unused_shards)", 0) \ M(Bool, allow_nondeterministic_optimize_skip_unused_shards, false, "Allow non-deterministic functions (includes dictGet) in sharding_key for optimize_skip_unused_shards", 0) \ M(UInt64, force_optimize_skip_unused_shards, 0, "Throw an exception if unused shards cannot be skipped (1 - throw only if the table has the sharding key, 2 - always throw.", 0) \ M(UInt64, optimize_skip_unused_shards_nesting, 0, "Same as optimize_skip_unused_shards, but accept nesting level until which it will work.", 0) \ @@ -228,7 +229,7 @@ class IColumn; M(Seconds, http_connection_timeout, DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT, "HTTP connection timeout.", 0) \ M(Seconds, http_send_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, "HTTP send timeout", 0) \ M(Seconds, http_receive_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, "HTTP receive timeout", 0) \ - M(UInt64, http_max_uri_size, 16384, "Maximum URI length of HTTP request", 0) \ + M(UInt64, http_max_uri_size, 1048576, "Maximum URI length of HTTP request", 0) \ M(Bool, optimize_throw_if_noop, false, "If setting is enabled and OPTIMIZE query didn't actually assign a merge then an explanatory exception is thrown", 0) \ M(Bool, use_index_for_in_with_subqueries, true, "Try using an index if there is a subquery or a table expression on the right side of the IN operator.", 0) \ M(Bool, joined_subquery_requires_alias, true, "Force joined subqueries and table functions to have aliases for correct name qualification.", 0) \ @@ -372,6 +373,7 @@ class IColumn; M(UInt64, postgresql_connection_pool_size, 16, "Connection pool size for PostgreSQL table engine and database engine.", 0) \ M(Int64, postgresql_connection_pool_wait_timeout, -1, "Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool.", 0) \ M(UInt64, glob_expansion_max_elements, 1000, "Maximum number of allowed addresses (For external storages, table functions, etc).", 0) \ + M(UInt64, odbc_bridge_connection_pool_size, 16, "Connection pool size for each connection settings string in ODBC bridge.", 0) \ \ M(Seconds, distributed_replica_error_half_life, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD, "Time period reduces replica error counter by 2 times.", 0) \ M(UInt64, distributed_replica_error_cap, DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT, "Max number of errors per replica, prevents piling up an incredible amount of errors if replica was offline for some time and allows it to be reconsidered in a shorter amount of time.", 0) \ diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index 2239bae987f..293710fc190 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -480,7 +480,7 @@ public: // since right now LUT does not support Int64-values and not format instructions for subsecond parts, // treat DatTime64 values just as DateTime values by ignoring fractional and casting to UInt32. const auto c = DecimalUtils::split(vec[i], scale); - instruction.perform(pos, static_cast(c.whole), time_zone); + instruction.perform(pos, static_cast(c.whole), time_zone); } } else diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 369237f329d..cf25b819e6c 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -773,7 +773,8 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re while (!buf.eof() && isNumericASCII(*buf.position())) ++buf.position(); } - else if (scale && (whole >= 1000000000LL * scale)) + /// 9908870400 is time_t value for 2184-01-01 UTC (a bit over the last year supported by DateTime64) + else if (whole >= 9908870400LL) { /// Unix timestamp with subsecond precision, already scaled to integer. /// For disambiguation we support only time since 2001-09-09 01:46:40 UTC and less than 30 000 years in future. diff --git a/src/Interpreters/CatBoostModel.cpp b/src/Interpreters/CatBoostModel.cpp index 3e4329c6080..e19258540b9 100644 --- a/src/Interpreters/CatBoostModel.cpp +++ b/src/Interpreters/CatBoostModel.cpp @@ -480,20 +480,15 @@ void CatBoostLibHolder::initAPI() std::shared_ptr getCatBoostWrapperHolder(const std::string & lib_path) { - static std::weak_ptr ptr; + static std::shared_ptr ptr; static std::mutex mutex; std::lock_guard lock(mutex); - auto result = ptr.lock(); - if (!result || result->getCurrentPath() != lib_path) - { - result = std::make_shared(lib_path); - /// This assignment is not atomic, which prevents from creating lock only inside 'if'. - ptr = result; - } + if (!ptr || ptr->getCurrentPath() != lib_path) + ptr = std::make_shared(lib_path); - return result; + return ptr; } } diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 9c19094b061..bac688fe81e 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -292,7 +292,7 @@ void Clusters::updateClusters(const Poco::Util::AbstractConfiguration & new_conf std::lock_guard lock(mutex); - /// If old congig is set, remove deleted clusters from impl, otherwise just clear it. + /// If old config is set, remove deleted clusters from impl, otherwise just clear it. if (old_config) { for (const auto & key : deleted_keys) diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index 3837cc6726a..5976074ec7a 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -55,6 +55,8 @@ public: static Poco::Timespan saturate(const Poco::Timespan & v, const Poco::Timespan & limit); public: + using SlotToShard = std::vector; + struct Address { /** In configuration file, @@ -232,7 +234,6 @@ public: bool maybeCrossReplication() const; private: - using SlotToShard = std::vector; SlotToShard slot_to_shard; public: diff --git a/src/Interpreters/ClusterProxy/IStreamFactory.h b/src/Interpreters/ClusterProxy/IStreamFactory.h index ccd06c78985..f66eee93e0a 100644 --- a/src/Interpreters/ClusterProxy/IStreamFactory.h +++ b/src/Interpreters/ClusterProxy/IStreamFactory.h @@ -30,7 +30,7 @@ public: virtual void createForShard( const Cluster::ShardInfo & shard_info, - const String & query, const ASTPtr & query_ast, + const ASTPtr & query_ast, ContextPtr context, const ThrottlerPtr & throttler, const SelectQueryInfo & query_info, std::vector & res, diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 07d554e4dc8..7cb55f32162 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -115,7 +115,7 @@ String formattedAST(const ASTPtr & ast) void SelectStreamFactory::createForShard( const Cluster::ShardInfo & shard_info, - const String &, const ASTPtr & query_ast, + const ASTPtr & query_ast, ContextPtr context, const ThrottlerPtr & throttler, const SelectQueryInfo &, std::vector & plans, diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h index 5f414bc7c9a..0705bcb2903 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h @@ -34,7 +34,7 @@ public: void createForShard( const Cluster::ShardInfo & shard_info, - const String & query, const ASTPtr & query_ast, + const ASTPtr & query_ast, ContextPtr context, const ThrottlerPtr & throttler, const SelectQueryInfo & query_info, std::vector & plans, diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index ac5044ba781..5284756a4ff 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -91,7 +91,10 @@ ContextPtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, void executeQuery( QueryPlan & query_plan, IStreamFactory & stream_factory, Poco::Logger * log, - const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info) + const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info, + const ExpressionActionsPtr & sharding_key_expr, + const std::string & sharding_key_column_name, + const ClusterPtr & not_optimized_cluster) { assert(log); @@ -104,9 +107,7 @@ void executeQuery( Pipes remote_pipes; Pipes delayed_pipes; - const std::string query = queryToString(query_ast); - - auto new_context = updateSettingsForCluster(*query_info.cluster, context, settings, log); + auto new_context = updateSettingsForCluster(*query_info.getCluster(), context, settings, log); new_context->getClientInfo().distributed_depth += 1; @@ -127,9 +128,28 @@ void executeQuery( else throttler = user_level_throttler; - for (const auto & shard_info : query_info.cluster->getShardsInfo()) + size_t shards = query_info.getCluster()->getShardCount(); + for (const auto & shard_info : query_info.getCluster()->getShardsInfo()) { - stream_factory.createForShard(shard_info, query, query_ast, + ASTPtr query_ast_for_shard; + if (query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1) + { + query_ast_for_shard = query_ast->clone(); + + OptimizeShardingKeyRewriteInVisitor::Data visitor_data{ + sharding_key_expr, + sharding_key_column_name, + shard_info, + not_optimized_cluster->getSlotToShard(), + }; + OptimizeShardingKeyRewriteInVisitor visitor(visitor_data); + visitor.visit(query_ast_for_shard); + } + else + query_ast_for_shard = query_ast; + + stream_factory.createForShard(shard_info, + query_ast_for_shard, new_context, throttler, query_info, plans, remote_pipes, delayed_pipes, log); } diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index 59e4d9e7f98..46525335803 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -8,11 +8,15 @@ namespace DB struct Settings; class Cluster; +using ClusterPtr = std::shared_ptr; struct SelectQueryInfo; class Pipe; class QueryPlan; +class ExpressionActions; +using ExpressionActionsPtr = std::shared_ptr; + namespace ClusterProxy { @@ -35,7 +39,10 @@ ContextPtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, void executeQuery( QueryPlan & query_plan, IStreamFactory & stream_factory, Poco::Logger * log, - const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info); + const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info, + const ExpressionActionsPtr & sharding_key_expr, + const std::string & sharding_key_column_name, + const ClusterPtr & not_optimized_cluster); } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 5ea52556d48..187edf8843f 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -225,7 +226,6 @@ private: void cleanThread() { setThreadName("SessionCleaner"); - std::unique_lock lock{mutex}; while (true) @@ -338,6 +338,9 @@ struct ContextSharedPart mutable std::optional embedded_dictionaries; /// Metrica's dictionaries. Have lazy initialization. mutable std::optional external_dictionaries_loader; mutable std::optional external_models_loader; + ConfigurationPtr external_models_config; + ext::scope_guard models_repository_guard; + String default_profile_name; /// Default profile name used for default values. String system_profile_name; /// Profile used by system processes String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying @@ -445,6 +448,7 @@ struct ContextSharedPart system_logs.reset(); embedded_dictionaries.reset(); external_dictionaries_loader.reset(); + models_repository_guard.reset(); external_models_loader.reset(); buffer_flush_schedule_pool.reset(); schedule_pool.reset(); @@ -456,7 +460,6 @@ struct ContextSharedPart trace_collector.reset(); /// Stop zookeeper connection zookeeper.reset(); - } bool hasTraceCollector() const @@ -1353,11 +1356,29 @@ const ExternalModelsLoader & Context::getExternalModelsLoader() const ExternalModelsLoader & Context::getExternalModelsLoader() { std::lock_guard lock(shared->external_models_mutex); + return getExternalModelsLoaderUnlocked(); +} + +ExternalModelsLoader & Context::getExternalModelsLoaderUnlocked() +{ if (!shared->external_models_loader) shared->external_models_loader.emplace(getGlobalContext()); return *shared->external_models_loader; } +void Context::setExternalModelsConfig(const ConfigurationPtr & config, const std::string & config_name) +{ + std::lock_guard lock(shared->external_models_mutex); + + if (shared->external_models_config && isSameConfigurationWithMultipleKeys(*config, *shared->external_models_config, "", config_name)) + return; + + shared->external_models_config = config; + shared->models_repository_guard .reset(); + shared->models_repository_guard = getExternalModelsLoaderUnlocked().addConfigRepository( + std::make_unique(*config, config_name)); +} + EmbeddedDictionaries & Context::getEmbeddedDictionariesImpl(const bool throw_on_error) const { diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index eda72c1f2d2..b5912738833 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -499,8 +499,11 @@ public: EmbeddedDictionaries & getEmbeddedDictionaries(); ExternalDictionariesLoader & getExternalDictionariesLoader(); ExternalModelsLoader & getExternalModelsLoader(); + ExternalModelsLoader & getExternalModelsLoaderUnlocked(); void tryCreateEmbeddedDictionaries() const; + void setExternalModelsConfig(const ConfigurationPtr & config, const std::string & config_name = "models_config"); + /// I/O formats. BlockInputStreamPtr getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, UInt64 max_block_size) const; diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp new file mode 100644 index 00000000000..4d1c0526910 --- /dev/null +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp @@ -0,0 +1,110 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace +{ + +using namespace DB; + +Field executeFunctionOnField( + const Field & field, const std::string & name, + const ExpressionActionsPtr & expr, + const std::string & sharding_key_column_name) +{ + DataTypePtr type = applyVisitor(FieldToDataType{}, field); + + ColumnWithTypeAndName column; + column.column = type->createColumnConst(1, field); + column.name = name; + column.type = type; + + Block block{column}; + size_t num_rows = 1; + expr->execute(block, num_rows); + + ColumnWithTypeAndName & ret = block.getByName(sharding_key_column_name); + return (*ret.column)[0]; +} + +/// Return true if shard may contain such value (or it is unknown), otherwise false. +bool shardContains( + const Field & sharding_column_value, + const std::string & sharding_column_name, + const ExpressionActionsPtr & expr, + const std::string & sharding_key_column_name, + const Cluster::ShardInfo & shard_info, + const Cluster::SlotToShard & slots) +{ + /// NULL is not allowed in sharding key, + /// so it should be safe to assume that shard cannot contain it. + if (sharding_column_value.isNull()) + return false; + + Field sharding_value = executeFunctionOnField(sharding_column_value, sharding_column_name, expr, sharding_key_column_name); + UInt64 value = sharding_value.get(); + const auto shard_num = slots[value % slots.size()] + 1; + return shard_info.shard_num == shard_num; +} + +} + +namespace DB +{ + +bool OptimizeShardingKeyRewriteInMatcher::needChildVisit(ASTPtr & /*node*/, const ASTPtr & /*child*/) +{ + return true; +} + +void OptimizeShardingKeyRewriteInMatcher::visit(ASTPtr & node, Data & data) +{ + if (auto * function = node->as()) + visit(*function, data); +} + +void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & data) +{ + if (function.name != "in") + return; + + auto * left = function.arguments->children.front().get(); + auto * right = function.arguments->children.back().get(); + auto * identifier = left->as(); + if (!identifier) + return; + + const auto & expr = data.sharding_key_expr; + const auto & sharding_key_column_name = data.sharding_key_column_name; + + if (!expr->getRequiredColumnsWithTypes().contains(identifier->name())) + return; + + /// NOTE: that we should not take care about empty tuple, + /// since after optimize_skip_unused_shards, + /// at least one element should match each shard. + if (auto * tuple_func = right->as(); tuple_func && tuple_func->name == "tuple") + { + auto * tuple_elements = tuple_func->children.front()->as(); + std::erase_if(tuple_elements->children, [&](auto & child) + { + auto * literal = child->template as(); + return literal && !shardContains(literal->value, identifier->name(), expr, sharding_key_column_name, data.shard_info, data.slots); + }); + } + else if (auto * tuple_literal = right->as(); + tuple_literal && tuple_literal->value.getType() == Field::Types::Tuple) + { + auto & tuple = tuple_literal->value.get(); + std::erase_if(tuple, [&](auto & child) + { + return !shardContains(child, identifier->name(), expr, sharding_key_column_name, data.shard_info, data.slots); + }); + } +} + +} diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.h b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.h new file mode 100644 index 00000000000..3087fb844ed --- /dev/null +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ExpressionActions; +using ExpressionActionsPtr = std::shared_ptr; + +class ASTFunction; + +/// Rewrite `sharding_key IN (...)` for specific shard, +/// so that it will contain only values that belong to this specific shard. +/// +/// See also: +/// - evaluateExpressionOverConstantCondition() +/// - StorageDistributed::createSelector() +/// - createBlockSelector() +struct OptimizeShardingKeyRewriteInMatcher +{ + /// Cluster::SlotToShard + using SlotToShard = std::vector; + + struct Data + { + const ExpressionActionsPtr & sharding_key_expr; + const std::string & sharding_key_column_name; + const Cluster::ShardInfo & shard_info; + const Cluster::SlotToShard & slots; + }; + + static bool needChildVisit(ASTPtr & /*node*/, const ASTPtr & /*child*/); + static void visit(ASTPtr & node, Data & data); + static void visit(ASTFunction & function, Data & data); +}; + +using OptimizeShardingKeyRewriteInVisitor = InDepthNodeVisitor; + +} diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 70f191c579d..354f9d10099 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -293,13 +293,11 @@ void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, else { ASTFunction * func = elem->as(); + + /// Never remove untuple. It's result column may be in required columns. + /// It is not easy to analyze untuple here, because types were not calculated yes. if (func && func->name == "untuple") - for (const auto & col : required_result_columns) - if (col.rfind("_ut_", 0) == 0) - { - new_elements.push_back(elem); - break; - } + new_elements.push_back(elem); } } diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index a8550df633b..90998077a5a 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -118,6 +118,7 @@ SRCS( OpenTelemetrySpanLog.cpp OptimizeIfChains.cpp OptimizeIfWithConstantConditionVisitor.cpp + OptimizeShardingKeyRewriteInVisitor.cpp PartLog.cpp PredicateExpressionsOptimizer.cpp PredicateRewriteVisitor.cpp diff --git a/src/Server/KeeperTCPHandlerFactory.h b/src/Server/KeeperTCPHandlerFactory.h index adeb829b4c3..132a8b96c23 100644 --- a/src/Server/KeeperTCPHandlerFactory.h +++ b/src/Server/KeeperTCPHandlerFactory.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB { @@ -21,9 +22,9 @@ private: void run() override {} }; public: - KeeperTCPHandlerFactory(IServer & server_) + KeeperTCPHandlerFactory(IServer & server_, bool secure) : server(server_) - , log(&Poco::Logger::get("KeeperTCPHandlerFactory")) + , log(&Poco::Logger::get(std::string{"KeeperTCP"} + (secure ? "S" : "") + "HandlerFactory")) { } diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 7f33bba14fd..41bcb93b2c8 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1326,11 +1326,12 @@ String KeyCondition::toString() const * The set of all possible tuples can be considered as an n-dimensional space, where n is the size of the tuple. * A range of tuples specifies some subset of this space. * - * Hyperrectangles (you can also find the term "rail") - * will be the subrange of an n-dimensional space that is a direct product of one-dimensional ranges. - * In this case, the one-dimensional range can be: a period, a segment, an interval, a half-interval, unlimited on the left, unlimited on the right ... + * Hyperrectangles will be the subrange of an n-dimensional space that is a direct product of one-dimensional ranges. + * In this case, the one-dimensional range can be: + * a point, a segment, an open interval, a half-open interval; + * unlimited on the left, unlimited on the right ... * - * The range of tuples can always be represented as a combination of hyperrectangles. + * The range of tuples can always be represented as a combination (union) of hyperrectangles. * For example, the range [ x1 y1 .. x2 y2 ] given x1 != x2 is equal to the union of the following three hyperrectangles: * [x1] x [y1 .. +inf) * (x1 .. x2) x (-inf .. +inf) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index e7578027598..53ee063486b 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -299,6 +299,10 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx auto column = result.getByName(expression_ast->getColumnName()).column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality(); + + if (column->onlyNull()) + return false; + const auto * col_uint8 = typeid_cast(column.get()); const NullMap * null_map = nullptr; diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index e72039f7172..d373c004d10 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -486,9 +486,13 @@ void MergeTreeRangeReader::ReadResult::setFilter(const ColumnPtr & new_filter) ConstantFilterDescription const_description(*new_filter); if (const_description.always_true) + { setFilterConstTrue(); + } else if (const_description.always_false) + { clear(); + } else { FilterDescription filter_description(*new_filter); diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index fea9a7bad68..b4ac07c612a 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -119,9 +119,13 @@ struct SelectQueryInfo ASTPtr query; ASTPtr view_query; /// Optimized VIEW query - /// For optimize_skip_unused_shards. - /// Can be modified in getQueryProcessingStage() + /// Cluster for the query. ClusterPtr cluster; + /// Optimized cluster for the query. + /// In case of optimize_skip_unused_shards it may differs from original cluster. + /// + /// Configured in StorageDistributed::getQueryProcessingStage() + ClusterPtr optimized_cluster; TreeRewriterResultPtr syntax_analyzer_result; @@ -134,6 +138,8 @@ struct SelectQueryInfo /// Prepared sets are used for indices by storage engine. /// Example: x IN (1, 2, 3) PreparedSets sets; + + ClusterPtr getCluster() const { return !optimized_cluster ? cluster : optimized_cluster; } }; } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 52c8c4f79e0..e42e53d3f1b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -478,7 +478,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}", makeFormattedListOfShards(optimized_cluster)); cluster = optimized_cluster; - query_info.cluster = cluster; + query_info.optimized_cluster = cluster; } else { @@ -558,7 +558,7 @@ void StorageDistributed::read( InterpreterSelectQuery(query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); /// Return directly (with correct header) if no shard to query. - if (query_info.cluster->getShardsInfo().empty()) + if (query_info.getCluster()->getShardsInfo().empty()) { Pipe pipe(std::make_shared(header)); auto read_from_pipe = std::make_unique(std::move(pipe)); @@ -586,7 +586,9 @@ void StorageDistributed::read( local_context->getExternalTables()); ClusterProxy::executeQuery(query_plan, select_stream_factory, log, - modified_query_ast, local_context, query_info); + modified_query_ast, local_context, query_info, + sharding_key_expr, sharding_key_column_name, + getCluster()); /// This is a bug, it is possible only when there is no shards to query, and this is handled earlier. if (!query_plan.isInitialized()) @@ -753,7 +755,7 @@ void StorageDistributed::alter(const AlterCommands & params, ContextPtr local_co void StorageDistributed::startup() { - if (remote_database.empty() && !remote_table_function_ptr) + if (remote_database.empty() && !remote_table_function_ptr && !getCluster()->maybeCrossReplication()) LOG_WARNING(log, "Name of remote database is empty. Default database will be used implicitly."); if (!storage_policy) @@ -952,7 +954,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster( throw Exception(exception_message.str(), ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS); } - return cluster; + return {}; } IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, const ColumnWithTypeAndName & result) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 041994103f9..6ad7b0bce6e 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -159,8 +159,7 @@ bool StorageMerge::isRemote() const bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & /*metadata_snapshot*/) const { /// It's beneficial if it is true for at least one table. - StorageListWithLocks selected_tables = getSelectedTables( - query_context->getCurrentQueryId(), query_context->getSettingsRef()); + StorageListWithLocks selected_tables = getSelectedTables(query_context); size_t i = 0; for (const auto & table : selected_tables) @@ -250,8 +249,7 @@ Pipe StorageMerge::read( /** First we make list of selected tables to find out its size. * This is necessary to correctly pass the recommended number of threads to each table. */ - StorageListWithLocks selected_tables - = getSelectedTables(query_info, has_table_virtual_column, local_context->getCurrentQueryId(), local_context->getSettingsRef()); + StorageListWithLocks selected_tables = getSelectedTables(local_context, query_info.query, has_table_virtual_column); if (selected_tables.empty()) /// FIXME: do we support sampling in this case? @@ -427,34 +425,20 @@ Pipe StorageMerge::createSources( return pipe; } - -StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const String & query_id, const Settings & settings) const -{ - StorageListWithLocks selected_tables; - auto iterator = getDatabaseIterator(getContext()); - - while (iterator->isValid()) - { - const auto & table = iterator->table(); - if (table && table.get() != this) - selected_tables.emplace_back( - table, table->lockForShare(query_id, settings.lock_acquire_timeout), iterator->name()); - - iterator->next(); - } - - return selected_tables; -} - - StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables( - const SelectQueryInfo & query_info, bool has_virtual_column, const String & query_id, const Settings & settings) const + ContextPtr query_context, + const ASTPtr & query /* = nullptr */, + bool filter_by_virtual_column /* = false */) const { - const ASTPtr & query = query_info.query; + assert(!filter_by_virtual_column || query); + + const Settings & settings = query_context->getSettingsRef(); StorageListWithLocks selected_tables; DatabaseTablesIteratorPtr iterator = getDatabaseIterator(getContext()); - auto virtual_column = ColumnString::create(); + MutableColumnPtr table_name_virtual_column; + if (filter_by_virtual_column) + table_name_virtual_column = ColumnString::create(); while (iterator->isValid()) { @@ -467,18 +451,20 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables( if (storage.get() != this) { - selected_tables.emplace_back( - storage, storage->lockForShare(query_id, settings.lock_acquire_timeout), iterator->name()); - virtual_column->insert(iterator->name()); + auto table_lock = storage->lockForShare(query_context->getCurrentQueryId(), settings.lock_acquire_timeout); + selected_tables.emplace_back(storage, std::move(table_lock), iterator->name()); + if (filter_by_virtual_column) + table_name_virtual_column->insert(iterator->name()); } iterator->next(); } - if (has_virtual_column) + if (filter_by_virtual_column) { - Block virtual_columns_block = Block{ColumnWithTypeAndName(std::move(virtual_column), std::make_shared(), "_table")}; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, getContext()); + /// Filter names of selected tables if there is a condition on "_table" virtual column in WHERE clause + Block virtual_columns_block = Block{ColumnWithTypeAndName(std::move(table_name_virtual_column), std::make_shared(), "_table")}; + VirtualColumnUtils::filterBlockWithQuery(query, virtual_columns_block, query_context); auto values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_table"); /// Remove unused tables from the list @@ -488,7 +474,6 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables( return selected_tables; } - DatabaseTablesIteratorPtr StorageMerge::getDatabaseIterator(ContextPtr local_context) const { try diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index a1d5c7fd02d..ff016952686 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -55,10 +55,8 @@ private: using StorageWithLockAndName = std::tuple; using StorageListWithLocks = std::list; - StorageListWithLocks getSelectedTables(const String & query_id, const Settings & settings) const; - StorageMerge::StorageListWithLocks getSelectedTables( - const SelectQueryInfo & query_info, bool has_virtual_column, const String & query_id, const Settings & settings) const; + ContextPtr query_context, const ASTPtr & query = nullptr, bool filter_by_virtual_column = false) const; template StoragePtr getFirstTable(F && predicate) const; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index c479c6d45b3..a6a68f598c7 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -191,10 +191,15 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr contex ConstantFilterDescription constant_filter(*filter_column); if (constant_filter.always_true) + { return; + } if (constant_filter.always_false) + { block = block.cloneEmpty(); + return; + } FilterDescription filter(*filter_column); diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 93533f1b53d..3d85f494676 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -54,6 +54,26 @@ def run_and_check(args, env=None, shell=False, stdout=subprocess.PIPE, stderr=su raise Exception('Command {} return non-zero code {}: {}'.format(args, res.returncode, res.stderr.decode('utf-8'))) +def retry_exception(num, delay, func, exception=Exception, *args, **kwargs): + """ + Retry if `func()` throws, `num` times. + + :param func: func to run + :param num: number of retries + + :throws StopIteration + """ + i = 0 + while i <= num: + try: + func(*args, **kwargs) + time.sleep(delay) + except exception: # pylint: disable=broad-except + i += 1 + continue + return + raise StopIteration('Function did not finished successfully') + def subprocess_check_call(args): # Uncomment for debugging # print('run:', ' ' . join(args)) @@ -632,16 +652,6 @@ class ClickHouseCluster: if self.is_up: return - # Just in case kill unstopped containers from previous launch - try: - print("Trying to kill unstopped containers...") - - if not subprocess_call(['docker-compose', 'kill']): - subprocess_call(['docker-compose', 'down', '--volumes']) - print("Unstopped containers killed") - except: - pass - try: if destroy_dirs and p.exists(self.instances_dir): print(("Removing instances dir %s", self.instances_dir)) @@ -651,9 +661,24 @@ class ClickHouseCluster: print(('Setup directory for instance: {} destroy_dirs: {}'.format(instance.name, destroy_dirs))) instance.create_dir(destroy_dir=destroy_dirs) + # In case of multiple cluster we should not stop compose services. + if destroy_dirs: + # Just in case kill unstopped containers from previous launch + try: + print("Trying to kill unstopped containers...") + subprocess_call(['docker-compose', 'kill']) + subprocess_call(self.base_cmd + ['down', '--volumes', '--remove-orphans']) + print("Unstopped containers killed") + except: + pass + + clickhouse_pull_cmd = self.base_cmd + ['pull'] + print(f"Pulling images for {self.base_cmd}") + retry_exception(10, 5, subprocess_check_call, Exception, clickhouse_pull_cmd) + self.docker_client = docker.from_env(version=self.docker_api_version) - common_opts = ['up', '-d', '--force-recreate'] + common_opts = ['up', '-d'] if self.with_zookeeper and self.base_zookeeper_cmd: print('Setup ZooKeeper') @@ -735,7 +760,7 @@ class ClickHouseCluster: if self.with_redis and self.base_redis_cmd: print('Setup Redis') - subprocess_check_call(self.base_redis_cmd + ['up', '-d', '--force-recreate']) + subprocess_check_call(self.base_redis_cmd + ['up', '-d']) time.sleep(10) if self.with_minio and self.base_minio_cmd: @@ -769,7 +794,7 @@ class ClickHouseCluster: os.environ.pop('SSL_CERT_FILE') if self.with_cassandra and self.base_cassandra_cmd: - subprocess_check_call(self.base_cassandra_cmd + ['up', '-d', '--force-recreate']) + subprocess_check_call(self.base_cassandra_cmd + ['up', '-d']) self.wait_cassandra_to_start() clickhouse_start_cmd = self.base_cmd + ['up', '-d', '--no-recreate'] diff --git a/tests/integration/test_catboost_model_config_reload/__init__.py b/tests/integration/test_catboost_model_config_reload/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_catboost_model_config_reload/config/catboost_lib.xml b/tests/integration/test_catboost_model_config_reload/config/catboost_lib.xml new file mode 100644 index 00000000000..745be7cebe6 --- /dev/null +++ b/tests/integration/test_catboost_model_config_reload/config/catboost_lib.xml @@ -0,0 +1,3 @@ + + /etc/clickhouse-server/model/libcatboostmodel.so + diff --git a/tests/integration/test_catboost_model_config_reload/config/models_config.xml b/tests/integration/test_catboost_model_config_reload/config/models_config.xml new file mode 100644 index 00000000000..7e62283a83c --- /dev/null +++ b/tests/integration/test_catboost_model_config_reload/config/models_config.xml @@ -0,0 +1,2 @@ + + diff --git a/tests/integration/test_catboost_model_config_reload/model/libcatboostmodel.so b/tests/integration/test_catboost_model_config_reload/model/libcatboostmodel.so new file mode 100755 index 00000000000..388d9f887b4 Binary files /dev/null and b/tests/integration/test_catboost_model_config_reload/model/libcatboostmodel.so differ diff --git a/tests/integration/test_catboost_model_config_reload/model/model.bin b/tests/integration/test_catboost_model_config_reload/model/model.bin new file mode 100644 index 00000000000..118e099d176 Binary files /dev/null and b/tests/integration/test_catboost_model_config_reload/model/model.bin differ diff --git a/tests/integration/test_catboost_model_config_reload/model/model_config.xml b/tests/integration/test_catboost_model_config_reload/model/model_config.xml new file mode 100644 index 00000000000..af9778097fa --- /dev/null +++ b/tests/integration/test_catboost_model_config_reload/model/model_config.xml @@ -0,0 +1,8 @@ + + + catboost + model1 + /etc/clickhouse-server/model/model.bin + 0 + + diff --git a/tests/integration/test_catboost_model_config_reload/model/model_config2.xml b/tests/integration/test_catboost_model_config_reload/model/model_config2.xml new file mode 100644 index 00000000000..b81120ec900 --- /dev/null +++ b/tests/integration/test_catboost_model_config_reload/model/model_config2.xml @@ -0,0 +1,8 @@ + + + catboost + model2 + /etc/clickhouse-server/model/model.bin + 0 + + diff --git a/tests/integration/test_catboost_model_config_reload/test.py b/tests/integration/test_catboost_model_config_reload/test.py new file mode 100644 index 00000000000..34da1cda2d5 --- /dev/null +++ b/tests/integration/test_catboost_model_config_reload/test.py @@ -0,0 +1,58 @@ +import os +import sys +import time + +import pytest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance('node', stay_alive=True, main_configs=['config/models_config.xml', 'config/catboost_lib.xml']) + + +def copy_file_to_container(local_path, dist_path, container_id): + os.system("docker cp {local} {cont_id}:{dist}".format(local=local_path, cont_id=container_id, dist=dist_path)) + + +config = ''' + /etc/clickhouse-server/model/{model_config} +''' + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + copy_file_to_container(os.path.join(SCRIPT_DIR, 'model/.'), '/etc/clickhouse-server/model', node.docker_id) + node.restart_clickhouse() + + yield cluster + + finally: + cluster.shutdown() + + +def change_config(model_config): + node.replace_config("/etc/clickhouse-server/config.d/models_config.xml", config.format(model_config=model_config)) + node.query("SYSTEM RELOAD CONFIG;") + + +def test(started_cluster): + # Set config with the path to the first model. + change_config("model_config.xml") + + node.query("SELECT modelEvaluate('model1', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);") + + # Change path to the second model in config. + change_config("model_config2.xml") + + # Check that the new model is loaded. + node.query("SELECT modelEvaluate('model2', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);") + + # Check that the old model was unloaded. + node.query_and_get_error("SELECT modelEvaluate('model1', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);") + diff --git a/tests/integration/test_keeper_internal_secure/__init__.py b/tests/integration/test_keeper_internal_secure/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_keeper_internal_secure/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_keeper_internal_secure/configs/enable_secure_keeper1.xml b/tests/integration/test_keeper_internal_secure/configs/enable_secure_keeper1.xml new file mode 100644 index 00000000000..ecbd50c72a6 --- /dev/null +++ b/tests/integration/test_keeper_internal_secure/configs/enable_secure_keeper1.xml @@ -0,0 +1,42 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + 75 + trace + + + + true + + 1 + node1 + 44444 + true + 3 + + + 2 + node2 + 44444 + true + true + 2 + + + 3 + node3 + 44444 + true + true + 1 + + + + diff --git a/tests/integration/test_keeper_internal_secure/configs/enable_secure_keeper2.xml b/tests/integration/test_keeper_internal_secure/configs/enable_secure_keeper2.xml new file mode 100644 index 00000000000..53129ae0a75 --- /dev/null +++ b/tests/integration/test_keeper_internal_secure/configs/enable_secure_keeper2.xml @@ -0,0 +1,42 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + 75 + trace + + + + true + + 1 + node1 + 44444 + true + 3 + + + 2 + node2 + 44444 + true + true + 2 + + + 3 + node3 + 44444 + true + true + 1 + + + + diff --git a/tests/integration/test_keeper_internal_secure/configs/enable_secure_keeper3.xml b/tests/integration/test_keeper_internal_secure/configs/enable_secure_keeper3.xml new file mode 100644 index 00000000000..4c685764ec0 --- /dev/null +++ b/tests/integration/test_keeper_internal_secure/configs/enable_secure_keeper3.xml @@ -0,0 +1,42 @@ + + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + 75 + trace + + + + true + + 1 + node1 + 44444 + true + 3 + + + 2 + node2 + 44444 + true + true + 2 + + + 3 + node3 + 44444 + true + true + 1 + + + + diff --git a/tests/integration/test_keeper_internal_secure/configs/rootCA.pem b/tests/integration/test_keeper_internal_secure/configs/rootCA.pem new file mode 100644 index 00000000000..ec16533d98a --- /dev/null +++ b/tests/integration/test_keeper_internal_secure/configs/rootCA.pem @@ -0,0 +1,21 @@ +-----BEGIN CERTIFICATE----- +MIIDazCCAlOgAwIBAgIUUiyhAav08YhTLfUIXLN/0Ln09n4wDQYJKoZIhvcNAQEL +BQAwRTELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDAeFw0yMTA0MTIxMTQ1MjBaFw0yMTA1 +MTIxMTQ1MjBaMEUxCzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEw +HwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQwggEiMA0GCSqGSIb3DQEB +AQUAA4IBDwAwggEKAoIBAQDK0Ww4voPlkePBPS2MsEi7e1ePS+CDxTdDuOwWWEA7 +JiOyqIGqdyL6AE2EqjL3sSdVFVxytpGQWDuM6JHXdb01AnMngBuql9Jkiln7i267 +v54HtMWdm8o3rik/b/mB+kkn/sP715tI49Ybh/RobtvtK16ZgHr1ombkq6rXiom2 +8GmSmpYFwZtZsXtm2JwbZVayupQpWwdu3KrTXKBtVyKVvvWdgkf47DWYtWDS3vqE +cShM1H97G4DvI+4RX1WtQevQ0yCx1aFTg4xMHFkpUxlP8iW6mQaQPqy9rnI57e3L +RHc2I/B56xa43R3GmQ2S7bE4hvm1SrZDtVgrZLf4nvwNAgMBAAGjUzBRMB0GA1Ud +DgQWBBQ4+o0x1FzK7nRbcnm2pNLwaywCdzAfBgNVHSMEGDAWgBQ4+o0x1FzK7nRb +cnm2pNLwaywCdzAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4IBAQDE +YmM8MH6RKcaqMqCBefWLj0LTcZ/Wm4G/eCFC51PkAIsf7thnzViemBHRXUSF8wzc +1MBPD6II6OB1F0i7ntGjtlhnL2WcPYbo2Np59p7fo9SMbYwF49OZ40twsuKeeoAp +pfow+y/EBZqa99MY2q6FU6FDA3Rpv0Sdk+/5PHdsSP6cgeMszFBUS0tCQEvEl83n +FJUb0vjEX4x3J64XO/0DKXyCxFyF77OwHG2ZV5BeCpIhGXu+d/e221LJkGI2orKR +kgsaUwrkS8HQt3Hd0gYpLI1Opx/JlRpB0VLYLzRGj7kDpbAcTj3SMEUp/FAZmlXR +Iiebt73eE3rOWVFgyY9f +-----END CERTIFICATE----- diff --git a/tests/integration/test_keeper_internal_secure/configs/server.crt b/tests/integration/test_keeper_internal_secure/configs/server.crt new file mode 100644 index 00000000000..dfa32da5444 --- /dev/null +++ b/tests/integration/test_keeper_internal_secure/configs/server.crt @@ -0,0 +1,19 @@ +-----BEGIN CERTIFICATE----- +MIIDETCCAfkCFHL+gKBQnU0P73/nrFrGaVPauTPmMA0GCSqGSIb3DQEBCwUAMEUx +CzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRl +cm5ldCBXaWRnaXRzIFB0eSBMdGQwHhcNMjEwNDEyMTE0NzI5WhcNMjEwNTEyMTE0 +NzI5WjBFMQswCQYDVQQGEwJBVTETMBEGA1UECAwKU29tZS1TdGF0ZTEhMB8GA1UE +CgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMIIBIjANBgkqhkiG9w0BAQEFAAOC +AQ8AMIIBCgKCAQEA1iPeYn1Vy4QnQi6uNVqQnFLr0u3qdrMjGEBNAOuGmtIdhIn8 +rMCzaehNr3y2YTMRbZAqmv28P/wOXpzR1uQaFlQzTOjmsn/HOZ9JX2hv5sBUv7SU +UiPJS7UtptKDPbLv3N/v1dOXbY+vVyzo8U1Q9OS1J5yhYW6KtxP++hfSrOsFu669 +d1pqWFWaNBsmf0zF+ETvi6lywhyTFA1/PazcStP5GntcDL7eDvGq+DDsRC40oRpy +S4xRQRSteCTtGGmWpx+Jmt+90wFnLgruUbWT0veCoLxLvz0tJUk3ueUVnMkrxBQG +Fz+IWm+SQppNU5LlAcBcu9wJfo3h34BXp0NFNQIDAQABMA0GCSqGSIb3DQEBCwUA +A4IBAQCUnvQsv+GsPwGnIWqH9iiFVhgDx5QbSTW94Fyqk8dcIJBzWAiCshmLBWPJ +pfy4y2nxJbzovFsd9DA49pxqqILeLjue99yma2DVKeo+XDLDN3OX5faIMTBd7AnL +0MKqW7gUSLRUZrNOvFciAY8xRezgBQQBo4mcmmMbAbk5wKndGY6ZZOcY+JwXlqGB +5hyi6ishO8ciiZi3GMFNWWk9ViSfo27IqjKdSkQq1pr3FULvepd6SkdX+NvfZTAH +rG+CSoFGiJcOBbhDkvpY32cAJEnJOA1vHpFxfnGP8/1haeVZHqSwH1cySD78HVtF +fBs000wGHzBYWNI2KkwjNtYf06P4 +-----END CERTIFICATE----- diff --git a/tests/integration/test_keeper_internal_secure/configs/server.key b/tests/integration/test_keeper_internal_secure/configs/server.key new file mode 100644 index 00000000000..7e57c8b6b34 --- /dev/null +++ b/tests/integration/test_keeper_internal_secure/configs/server.key @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEowIBAAKCAQEA1iPeYn1Vy4QnQi6uNVqQnFLr0u3qdrMjGEBNAOuGmtIdhIn8 +rMCzaehNr3y2YTMRbZAqmv28P/wOXpzR1uQaFlQzTOjmsn/HOZ9JX2hv5sBUv7SU +UiPJS7UtptKDPbLv3N/v1dOXbY+vVyzo8U1Q9OS1J5yhYW6KtxP++hfSrOsFu669 +d1pqWFWaNBsmf0zF+ETvi6lywhyTFA1/PazcStP5GntcDL7eDvGq+DDsRC40oRpy +S4xRQRSteCTtGGmWpx+Jmt+90wFnLgruUbWT0veCoLxLvz0tJUk3ueUVnMkrxBQG +Fz+IWm+SQppNU5LlAcBcu9wJfo3h34BXp0NFNQIDAQABAoIBAHYDso2o8V2F6XTp +8QxqawQcFudaQztDonW9CjMVmks8vRPMUDqMwNP/OMEcBA8xa8tsBm8Ao3zH1suB +tYuujkn8AYHDYVDCZvN0u6UfE3yiRpKYXJ2gJ1HX+d7UaYvZT6P0rmKzh+LTqxhq +Ib7Kk3FDkirQgYgGueAH3x/JfUvaAGvFrq+HvvlhHOs7M7iFU4nJA8jNfBolpTnG +v5MMI+f8/GHGreVICJUoclE+4V/4LDHUlrc3l1kQk0keeD6ECw/pl48TNL6ncXKu +baez1rfKbMPjhLUy2q5UZa93oW+olchEOXs1nUNKUhIOOr0f0YweYhUHNTineVM9 +yTecMIkCgYEA7CFQMyeLVeBA6C9AHBe8Zf/k64cMPyr0lUz6548ulil580PNPbvW +kd2vIKfUMgCO5lMA47ArL4bXZ7cjTvJmPYE1Yv8z+F0Tk03fnTrudHOSBEiGXAu3 +MPTxCDU7Se5Dwj0Fq81aFRtCHl8Rrss+WiBD8eRoxb/vwXKFc6VUAWMCgYEA6CjZ +XrZz11lySBhjkyVXcdLj89hDZ+bPxA7b3VB7TfCxsn5xVck7U3TFkg5Z9XwEQ7Ob +XFAPuwT9GKm7QPp6L8T2RltoJ3ys40UH1RtcNLz2aIo/xSP7lopPdAfWHef5r4y9 +kRw+Gh4NP/l5wefXsRz/D0jY3+t+QnwnhuCKbocCgYEAiR6bPOlkvzyXVH1DxEyA +Sdb8b00f7nqaRyzJsrfxvJ9fQsWHpKa0ZkYOUW9ECLlMQjHHHXEK0vGBmqe9qDWY +63RhtRgvbLVYDb018k7rc9I846Hd7AudmJ9UbIjE4hyrWlsnNOntur32ej6IvTEn +Bx0fd5NEyDi6GGLRXiOOkbMCgYAressLE/yqDlR68CZl/o5cAPU0TAKDyRSMUYQX +9OTC+hstpMSxHlkADlSaQBnVAf8CdvbX2R65FfwYzGEHkGGl5KuDDcd57b2rathG +rzMbpXA4r/u1fkG2Nf0fbABL5ZA7so4mSTXQSmSM4LpO+I7K2vVh9XC4rzAcX4g/ +mHoUrQKBgBf3rxp5h9P3HWoZYjzBDo2FqXUjKLLjE9ed5e/VqecqfHIkmueuNHlN +xifHr7lpsYu6IXkTnlK14pvLoPuwP59dCIOUYwAFz8RlH4MSUGNhYeGA8cqRrhmJ +tYk3OKExuN/+O12kUPVTy6BMH1hBdRJP+7y7lapWsRhZt18y+8Za +-----END RSA PRIVATE KEY----- diff --git a/tests/integration/test_keeper_internal_secure/configs/ssl_conf.xml b/tests/integration/test_keeper_internal_secure/configs/ssl_conf.xml new file mode 100644 index 00000000000..babc7cf0f18 --- /dev/null +++ b/tests/integration/test_keeper_internal_secure/configs/ssl_conf.xml @@ -0,0 +1,15 @@ + + + + + /etc/clickhouse-server/config.d/server.crt + /etc/clickhouse-server/config.d/server.key + /etc/clickhouse-server/config.d/rootCA.pem + true + none + true + sslv2,sslv3 + true + + + diff --git a/tests/integration/test_keeper_internal_secure/test.py b/tests/integration/test_keeper_internal_secure/test.py new file mode 100644 index 00000000000..d9fbca624e1 --- /dev/null +++ b/tests/integration/test_keeper_internal_secure/test.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +import random +import string +import os +import time + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', main_configs=['configs/enable_secure_keeper1.xml', 'configs/ssl_conf.xml', 'configs/server.crt', 'configs/server.key', 'configs/rootCA.pem']) +node2 = cluster.add_instance('node2', main_configs=['configs/enable_secure_keeper2.xml', 'configs/ssl_conf.xml', 'configs/server.crt', 'configs/server.key', 'configs/rootCA.pem']) +node3 = cluster.add_instance('node3', main_configs=['configs/enable_secure_keeper3.xml', 'configs/ssl_conf.xml', 'configs/server.crt', 'configs/server.key', 'configs/rootCA.pem']) + +from kazoo.client import KazooClient, KazooState + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + +def get_fake_zk(nodename, timeout=30.0): + _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout) + def reset_listener(state): + nonlocal _fake_zk_instance + print("Fake zk callback called for state", state) + if state != KazooState.CONNECTED: + _fake_zk_instance._reset() + + _fake_zk_instance.add_listener(reset_listener) + _fake_zk_instance.start() + return _fake_zk_instance + +def test_secure_raft_works(started_cluster): + try: + node1_zk = get_fake_zk("node1") + node2_zk = get_fake_zk("node2") + node3_zk = get_fake_zk("node3") + + node1_zk.create("/test_node", b"somedata1") + node2_zk.sync("/test_node") + node3_zk.sync("/test_node") + + assert node1_zk.exists("/test_node") is not None + assert node2_zk.exists("/test_node") is not None + assert node3_zk.exists("/test_node") is not None + finally: + try: + for zk_conn in [node1_zk, node2_zk, node3_zk]: + zk_conn.stop() + zk_conn.close() + except: + pass diff --git a/tests/integration/test_keeper_secure_client/__init__.py b/tests/integration/test_keeper_secure_client/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_keeper_secure_client/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_keeper_secure_client/configs/dhparam.pem b/tests/integration/test_keeper_secure_client/configs/dhparam.pem new file mode 100644 index 00000000000..2e6cee0798d --- /dev/null +++ b/tests/integration/test_keeper_secure_client/configs/dhparam.pem @@ -0,0 +1,8 @@ +-----BEGIN DH PARAMETERS----- +MIIBCAKCAQEAua92DDli13gJ+//ZXyGaggjIuidqB0crXfhUlsrBk9BV1hH3i7fR +XGP9rUdk2ubnB3k2ejBStL5oBrkHm9SzUFSQHqfDjLZjKoUpOEmuDc4cHvX1XTR5 +Pr1vf5cd0yEncJWG5W4zyUB8k++SUdL2qaeslSs+f491HBLDYn/h8zCgRbBvxhxb +9qeho1xcbnWeqkN6Kc9bgGozA16P9NLuuLttNnOblkH+lMBf42BSne/TWt3AlGZf +slKmmZcySUhF8aKfJnLKbkBCFqOtFRh8zBA9a7g+BT/lSANATCDPaAk1YVih2EKb +dpc3briTDbRsiqg2JKMI7+VdULY9bh3EawIBAg== +-----END DH PARAMETERS----- diff --git a/tests/integration/test_keeper_secure_client/configs/enable_secure_keeper.xml b/tests/integration/test_keeper_secure_client/configs/enable_secure_keeper.xml new file mode 100644 index 00000000000..af815f4a3bc --- /dev/null +++ b/tests/integration/test_keeper_secure_client/configs/enable_secure_keeper.xml @@ -0,0 +1,24 @@ + + + + 10181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 10000 + 30000 + trace + false + + + + + 1 + localhost + 44444 + + + + diff --git a/tests/integration/test_keeper_secure_client/configs/server.crt b/tests/integration/test_keeper_secure_client/configs/server.crt new file mode 100644 index 00000000000..7ade2d96273 --- /dev/null +++ b/tests/integration/test_keeper_secure_client/configs/server.crt @@ -0,0 +1,19 @@ +-----BEGIN CERTIFICATE----- +MIIC/TCCAeWgAwIBAgIJANjx1QSR77HBMA0GCSqGSIb3DQEBCwUAMBQxEjAQBgNV +BAMMCWxvY2FsaG9zdDAgFw0xODA3MzAxODE2MDhaGA8yMjkyMDUxNDE4MTYwOFow +FDESMBAGA1UEAwwJbG9jYWxob3N0MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB +CgKCAQEAs9uSo6lJG8o8pw0fbVGVu0tPOljSWcVSXH9uiJBwlZLQnhN4SFSFohfI +4K8U1tBDTnxPLUo/V1K9yzoLiRDGMkwVj6+4+hE2udS2ePTQv5oaMeJ9wrs+5c9T +4pOtlq3pLAdm04ZMB1nbrEysceVudHRkQbGHzHp6VG29Fw7Ga6YpqyHQihRmEkTU +7UCYNA+Vk7aDPdMS/khweyTpXYZimaK9f0ECU3/VOeG3fH6Sp2X6FN4tUj/aFXEj +sRmU5G2TlYiSIUMF2JPdhSihfk1hJVALrHPTU38SOL+GyyBRWdNcrIwVwbpvsvPg +pryMSNxnpr0AK0dFhjwnupIv5hJIOQIDAQABo1AwTjAdBgNVHQ4EFgQUjPLb3uYC +kcamyZHK4/EV8jAP0wQwHwYDVR0jBBgwFoAUjPLb3uYCkcamyZHK4/EV8jAP0wQw +DAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAM/ocuDvfPus/KpMVD51j +4IdlU8R0vmnYLQ+ygzOAo7+hUWP5j0yvq4ILWNmQX6HNvUggCgFv9bjwDFhb/5Vr +85ieWfTd9+LTjrOzTw4avdGwpX9G+6jJJSSq15tw5ElOIFb/qNA9O4dBiu8vn03C +L/zRSXrARhSqTW5w/tZkUcSTT+M5h28+Lgn9ysx4Ff5vi44LJ1NnrbJbEAIYsAAD ++UA+4MBFKx1r6hHINULev8+lCfkpwIaeS8RL+op4fr6kQPxnULw8wT8gkuc8I4+L +P9gg/xDHB44T3ADGZ5Ib6O0DJaNiToO6rnoaaxs0KkotbvDWvRoxEytSbXKoYjYp +0g== +-----END CERTIFICATE----- diff --git a/tests/integration/test_keeper_secure_client/configs/server.key b/tests/integration/test_keeper_secure_client/configs/server.key new file mode 100644 index 00000000000..f0fb61ac443 --- /dev/null +++ b/tests/integration/test_keeper_secure_client/configs/server.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCz25KjqUkbyjyn +DR9tUZW7S086WNJZxVJcf26IkHCVktCeE3hIVIWiF8jgrxTW0ENOfE8tSj9XUr3L +OguJEMYyTBWPr7j6ETa51LZ49NC/mhox4n3Cuz7lz1Pik62WreksB2bThkwHWdus +TKxx5W50dGRBsYfMenpUbb0XDsZrpimrIdCKFGYSRNTtQJg0D5WTtoM90xL+SHB7 +JOldhmKZor1/QQJTf9U54bd8fpKnZfoU3i1SP9oVcSOxGZTkbZOViJIhQwXYk92F +KKF+TWElUAusc9NTfxI4v4bLIFFZ01ysjBXBum+y8+CmvIxI3GemvQArR0WGPCe6 +ki/mEkg5AgMBAAECggEATrbIBIxwDJOD2/BoUqWkDCY3dGevF8697vFuZKIiQ7PP +TX9j4vPq0DfsmDjHvAPFkTHiTQXzlroFik3LAp+uvhCCVzImmHq0IrwvZ9xtB43f +7Pkc5P6h1l3Ybo8HJ6zRIY3TuLtLxuPSuiOMTQSGRL0zq3SQ5DKuGwkz+kVjHXUN +MR2TECFwMHKQ5VLrC+7PMpsJYyOMlDAWhRfUalxC55xOXTpaN8TxNnwQ8K2ISVY5 +212Jz/a4hn4LdwxSz3Tiu95PN072K87HLWx3EdT6vW4Ge5P/A3y+smIuNAlanMnu +plHBRtpATLiTxZt/n6npyrfQVbYjSH7KWhB8hBHtaQKBgQDh9Cq1c/KtqDtE0Ccr +/r9tZNTUwBE6VP+3OJeKdEdtsfuxjOCkS1oAjgBJiSDOiWPh1DdoDeVZjPKq6pIu +Mq12OE3Doa8znfCXGbkSzEKOb2unKZMJxzrz99kXt40W5DtrqKPNb24CNqTiY8Aa +CjtcX+3weat82VRXvph6U8ltMwKBgQDLxjiQQzNoY7qvg7CwJCjf9qq8jmLK766g +1FHXopqS+dTxDLM8eJSRrpmxGWJvNeNc1uPhsKsKgotqAMdBUQTf7rSTbt4MyoH5 +bUcRLtr+0QTK9hDWMOOvleqNXha68vATkohWYfCueNsC60qD44o8RZAS6UNy3ENq +cM1cxqe84wKBgQDKkHutWnooJtajlTxY27O/nZKT/HA1bDgniMuKaz4R4Gr1PIez +on3YW3V0d0P7BP6PWRIm7bY79vkiMtLEKdiKUGWeyZdo3eHvhDb/3DCawtau8L2K +GZsHVp2//mS1Lfz7Qh8/L/NedqCQ+L4iWiPnZ3THjjwn3CoZ05ucpvrAMwKBgB54 +nay039MUVq44Owub3KDg+dcIU62U+cAC/9oG7qZbxYPmKkc4oL7IJSNecGHA5SbU +2268RFdl/gLz6tfRjbEOuOHzCjFPdvAdbysanpTMHLNc6FefJ+zxtgk9sJh0C4Jh +vxFrw9nTKKzfEl12gQ1SOaEaUIO0fEBGbe8ZpauRAoGAMAlGV+2/K4ebvAJKOVTa +dKAzQ+TD2SJmeR1HZmKDYddNqwtZlzg3v4ZhCk4eaUmGeC1Bdh8MDuB3QQvXz4Dr +vOIP4UVaOr+uM+7TgAgVnP4/K6IeJGzUDhX93pmpWhODfdu/oojEKVcpCojmEmS1 +KCBtmIrQLqzMpnBpLNuSY+Q= +-----END PRIVATE KEY----- diff --git a/tests/integration/test_keeper_secure_client/configs/ssl_conf.xml b/tests/integration/test_keeper_secure_client/configs/ssl_conf.xml new file mode 100644 index 00000000000..7ca51acde22 --- /dev/null +++ b/tests/integration/test_keeper_secure_client/configs/ssl_conf.xml @@ -0,0 +1,26 @@ + + + + /etc/clickhouse-server/config.d/server.crt + /etc/clickhouse-server/config.d/server.key + /etc/clickhouse-server/config.d/dhparam.pem + none + true + true + sslv2,sslv3 + true + + + /etc/clickhouse-server/config.d/server.crt + /etc/clickhouse-server/config.d/server.key + true + true + sslv2,sslv3 + true + none + + RejectCertificateHandler + + + + diff --git a/tests/integration/test_keeper_secure_client/configs/use_secure_keeper.xml b/tests/integration/test_keeper_secure_client/configs/use_secure_keeper.xml new file mode 100644 index 00000000000..a0d19300022 --- /dev/null +++ b/tests/integration/test_keeper_secure_client/configs/use_secure_keeper.xml @@ -0,0 +1,9 @@ + + + + node1 + 10181 + 1 + + + diff --git a/tests/integration/test_keeper_secure_client/test.py b/tests/integration/test_keeper_secure_client/test.py new file mode 100644 index 00000000000..fe03ed8dcf8 --- /dev/null +++ b/tests/integration/test_keeper_secure_client/test.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +import pytest +from helpers.cluster import ClickHouseCluster +import string +import os +import time + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', main_configs=['configs/enable_secure_keeper.xml', 'configs/ssl_conf.xml', "configs/dhparam.pem", "configs/server.crt", "configs/server.key"]) +node2 = cluster.add_instance('node2', main_configs=['configs/use_secure_keeper.xml', 'configs/ssl_conf.xml', "configs/server.crt", "configs/server.key"]) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + + +def test_connection(started_cluster): + # just nothrow + node2.query("SELECT * FROM system.zookeeper WHERE path = '/'") diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py index 2ef71927bdf..f6026e8dd3b 100644 --- a/tests/integration/test_odbc_interaction/test.py +++ b/tests/integration/test_odbc_interaction/test.py @@ -6,6 +6,7 @@ import pytest from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT +from multiprocessing.dummy import Pool cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance('node1', with_odbc_drivers=True, with_mysql=True, @@ -269,7 +270,7 @@ def test_sqlite_odbc_cached_dictionary(started_cluster): node1.exec_in_container(["bash", "-c", "chmod a+rw /tmp"], privileged=True, user='root') node1.exec_in_container(["bash", "-c", "chmod a+rw {}".format(sqlite_db)], privileged=True, user='root') - node1.query("insert into table function odbc('DSN={};', '', 't3') values (200, 2, 7)".format( + node1.query("insert into table function odbc('DSN={};ReadOnly=0', '', 't3') values (200, 2, 7)".format( node1.odbc_drivers["SQLite3"]["DSN"])) assert node1.query("select dictGetUInt8('sqlite3_odbc_cached', 'Z', toUInt64(200))") == "7\n" # new value @@ -381,5 +382,126 @@ def test_odbc_postgres_date_data_type(started_cluster): expected = '1\t2020-12-01\n2\t2020-12-02\n3\t2020-12-03\n' result = node1.query('SELECT * FROM test_date'); assert(result == expected) + cursor.execute("DROP TABLE IF EXISTS clickhouse.test_date") + node1.query("DROP TABLE IF EXISTS test_date") +def test_odbc_postgres_conversions(started_cluster): + conn = get_postgres_conn() + cursor = conn.cursor() + + cursor.execute( + '''CREATE TABLE IF NOT EXISTS clickhouse.test_types ( + a smallint, b integer, c bigint, d real, e double precision, f serial, g bigserial, + h timestamp)''') + + node1.query(''' + INSERT INTO TABLE FUNCTION + odbc('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types') + VALUES (-32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12')''') + + result = node1.query(''' + SELECT a, b, c, d, e, f, g, h + FROM odbc('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types') + ''') + + assert(result == '-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12\n') + cursor.execute("DROP TABLE IF EXISTS clickhouse.test_types") + + cursor.execute("""CREATE TABLE IF NOT EXISTS clickhouse.test_types (column1 Timestamp, column2 Numeric)""") + + node1.query( + ''' + CREATE TABLE test_types (column1 DateTime64, column2 Decimal(5, 1)) + ENGINE=ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types')''') + + node1.query( + """INSERT INTO test_types + SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Europe/Moscow'), toDecimal32(1.1, 1)""") + + expected = node1.query("SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Europe/Moscow'), toDecimal32(1.1, 1)") + result = node1.query("SELECT * FROM test_types") + print(result) + cursor.execute("DROP TABLE IF EXISTS clickhouse.test_types") + assert(result == expected) + + +def test_odbc_cyrillic_with_varchar(started_cluster): + conn = get_postgres_conn() + cursor = conn.cursor() + + cursor.execute("DROP TABLE IF EXISTS clickhouse.test_cyrillic") + cursor.execute("CREATE TABLE clickhouse.test_cyrillic (name varchar(11))") + + node1.query(''' + CREATE TABLE test_cyrillic (name String) + ENGINE = ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_cyrillic')''') + + cursor.execute("INSERT INTO clickhouse.test_cyrillic VALUES ('A-nice-word')") + cursor.execute("INSERT INTO clickhouse.test_cyrillic VALUES ('Красивенько')") + + result = node1.query(''' SELECT * FROM test_cyrillic ORDER BY name''') + assert(result == 'A-nice-word\nКрасивенько\n') + result = node1.query(''' SELECT name FROM odbc('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_cyrillic') ''') + assert(result == 'A-nice-word\nКрасивенько\n') + + +def test_many_connections(started_cluster): + conn = get_postgres_conn() + cursor = conn.cursor() + + cursor.execute('DROP TABLE IF EXISTS clickhouse.test_pg_table') + cursor.execute('CREATE TABLE clickhouse.test_pg_table (key integer, value integer)') + + node1.query(''' + DROP TABLE IF EXISTS test_pg_table; + CREATE TABLE test_pg_table (key UInt32, value UInt32) + ENGINE = ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_pg_table')''') + + node1.query("INSERT INTO test_pg_table SELECT number, number FROM numbers(10)") + + query = "SELECT count() FROM (" + for i in range (24): + query += "SELECT key FROM {t} UNION ALL " + query += "SELECT key FROM {t})" + + assert node1.query(query.format(t='test_pg_table')) == '250\n' + + +def test_concurrent_queries(started_cluster): + conn = get_postgres_conn() + cursor = conn.cursor() + + node1.query(''' + DROP TABLE IF EXISTS test_pg_table; + CREATE TABLE test_pg_table (key UInt32, value UInt32) + ENGINE = ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_pg_table')''') + + cursor.execute('DROP TABLE IF EXISTS clickhouse.test_pg_table') + cursor.execute('CREATE TABLE clickhouse.test_pg_table (key integer, value integer)') + + def node_insert(_): + for i in range(5): + node1.query("INSERT INTO test_pg_table SELECT number, number FROM numbers(1000)", user='default') + + busy_pool = Pool(5) + p = busy_pool.map_async(node_insert, range(5)) + p.wait() + result = node1.query("SELECT count() FROM test_pg_table", user='default') + print(result) + assert(int(result) == 5 * 5 * 1000) + + def node_insert_select(_): + for i in range(5): + result = node1.query("INSERT INTO test_pg_table SELECT number, number FROM numbers(1000)", user='default') + result = node1.query("SELECT * FROM test_pg_table LIMIT 100", user='default') + + busy_pool = Pool(5) + p = busy_pool.map_async(node_insert_select, range(5)) + p.wait() + result = node1.query("SELECT count() FROM test_pg_table", user='default') + print(result) + assert(int(result) == 5 * 5 * 1000 * 2) + + node1.query('DROP TABLE test_pg_table;') + cursor.execute('DROP TABLE clickhouse.test_pg_table;') diff --git a/tests/queries/0_stateless/01151_storage_merge_filter_tables_by_virtual_column.reference b/tests/queries/0_stateless/01151_storage_merge_filter_tables_by_virtual_column.reference new file mode 100644 index 00000000000..90755b06aa9 --- /dev/null +++ b/tests/queries/0_stateless/01151_storage_merge_filter_tables_by_virtual_column.reference @@ -0,0 +1,7 @@ +30 4995 +20 4950 +15 4700 +20 495 +20 4545 +15 470 +15 4520 diff --git a/tests/queries/0_stateless/01151_storage_merge_filter_tables_by_virtual_column.sql b/tests/queries/0_stateless/01151_storage_merge_filter_tables_by_virtual_column.sql new file mode 100644 index 00000000000..2a250725654 --- /dev/null +++ b/tests/queries/0_stateless/01151_storage_merge_filter_tables_by_virtual_column.sql @@ -0,0 +1,26 @@ +drop table if exists src_table_1; +drop table if exists src_table_2; +drop table if exists src_table_3; +drop table if exists set; + +create table src_table_1 (n UInt64) engine=Memory as select * from numbers(10); +create table src_table_2 (n UInt64) engine=Log as select number * 10 from numbers(10); +create table src_table_3 (n UInt64) engine=MergeTree order by n as select number * 100 from numbers(10); +create table set (s String) engine=Set as select arrayJoin(['src_table_1', 'src_table_2']); + +create temporary table tmp (s String); +insert into tmp values ('src_table_1'), ('src_table_3'); + +select count(), sum(n) from merge(currentDatabase(), 'src_table'); +-- FIXME #21401 select count(), sum(n) from merge(currentDatabase(), 'src_table') where _table = 'src_table_1' or toInt8(substr(_table, 11, 1)) = 2; +select count(), sum(n) from merge(currentDatabase(), 'src_table') where _table in ('src_table_2', 'src_table_3'); +select count(), sum(n) from merge(currentDatabase(), 'src_table') where _table in ('src_table_2', 'src_table_3') and n % 20 = 0; +select count(), sum(n) from merge(currentDatabase(), 'src_table') where _table in set; +select count(), sum(n) from merge(currentDatabase(), 'src_table') where _table in tmp; +select count(), sum(n) from merge(currentDatabase(), 'src_table') where _table in set and n % 2 = 0; +select count(), sum(n) from merge(currentDatabase(), 'src_table') where n % 2 = 0 and _table in tmp; + +drop table src_table_1; +drop table src_table_2; +drop table src_table_3; +drop table set; diff --git a/tests/queries/0_stateless/01753_max_uri_size.sh b/tests/queries/0_stateless/01753_max_uri_size.sh index 5c63d9274fd..62bc4f2c26f 100755 --- a/tests/queries/0_stateless/01753_max_uri_size.sh +++ b/tests/queries/0_stateless/01753_max_uri_size.sh @@ -4,8 +4,14 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# NOTE: since 'max_uri_size' doesn't affect the request itself, this test hardly depends on the default value of this setting (16Kb). +# NOTE: since 'max_uri_size' doesn't affect the request itself, this test hardly depends on the default value of this setting (1 MiB). -LONG_REQUEST=$(python3 -c "print('&max_uri_size=1'*2000, end='')") # ~30K +python3 -c " +print('${CLICKHOUSE_URL}', end='') +print('&hello=world'*100000, end='') +print('&query=SELECT+1') +" > "${CLICKHOUSE_TMP}/url.txt" -${CLICKHOUSE_CURL} -sSv "${CLICKHOUSE_URL}${LONG_REQUEST}&query=SELECT+1" 2>&1 | grep -Fc "HTTP/1.1 400 Bad Request" +wget --input-file "${CLICKHOUSE_TMP}/url.txt" 2>&1 | grep -Fc "400: Bad Request" + +rm "${CLICKHOUSE_TMP}/url.txt" diff --git a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference new file mode 100644 index 00000000000..494e9ca3237 --- /dev/null +++ b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference @@ -0,0 +1,22 @@ +(0, 2) +0 0 +0 0 +WITH CAST(\'default\', \'String\') AS id_no SELECT one.dummy, ignore(id_no) FROM system.one WHERE dummy IN (0, 2) +WITH CAST(\'default\', \'String\') AS id_no SELECT one.dummy, ignore(id_no) FROM system.one WHERE dummy IN (0, 2) +optimize_skip_unused_shards_rewrite_in(0, 2) +0 0 +WITH CAST(\'default\', \'String\') AS id_02 SELECT one.dummy, ignore(id_02) FROM system.one WHERE dummy IN tuple(0) +WITH CAST(\'default\', \'String\') AS id_02 SELECT one.dummy, ignore(id_02) FROM system.one WHERE dummy IN tuple(2) +optimize_skip_unused_shards_rewrite_in(2,) +WITH CAST(\'default\', \'String\') AS id_2 SELECT one.dummy, ignore(id_2) FROM system.one WHERE dummy IN tuple(2) +optimize_skip_unused_shards_rewrite_in(0,) +0 0 +WITH CAST(\'default\', \'String\') AS id_0 SELECT one.dummy, ignore(id_0) FROM system.one WHERE dummy IN tuple(0) +errors +others +0 +0 +0 +optimize_skip_unused_shards_limit +0 +0 diff --git a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql new file mode 100644 index 00000000000..59e2ad75fcc --- /dev/null +++ b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql @@ -0,0 +1,127 @@ +-- NOTE: this test cannot use 'current_database = currentDatabase()', +-- because it does not propagated via remote queries, +-- hence it uses 'with (select currentDatabase()) as X' +-- (with subquery to expand it on the initiator). + +drop table if exists dist_01756; +drop table if exists dist_01756_str; +drop table if exists data_01756_str; + +-- SELECT +-- intHash64(0) % 2, +-- intHash64(2) % 2 +-- ┌─modulo(intHash64(0), 2)─┬─modulo(intHash64(2), 2)─┐ +-- │ 0 │ 1 │ +-- └─────────────────────────┴─────────────────────────┘ +create table dist_01756 as system.one engine=Distributed(test_cluster_two_shards, system, one, intHash64(dummy)); + +-- separate log entry for localhost queries +set prefer_localhost_replica=0; +set force_optimize_skip_unused_shards=2; +set optimize_skip_unused_shards=1; +set optimize_skip_unused_shards_rewrite_in=0; +set log_queries=1; + +-- +-- w/o optimize_skip_unused_shards_rewrite_in=1 +-- +select '(0, 2)'; +with (select currentDatabase()) as id_no select *, ignore(id_no) from dist_01756 where dummy in (0, 2); +system flush logs; +select query from system.query_log where + event_date = today() and + event_time > now() - interval 1 hour and + not is_initial_query and + query not like '%system.query_log%' and + query like concat('WITH%', currentDatabase(), '%AS id_no %') and + type = 'QueryFinish' +order by query; + +-- +-- w/ optimize_skip_unused_shards_rewrite_in=1 +-- + +set optimize_skip_unused_shards_rewrite_in=1; + +-- detailed coverage for realistic examples +select 'optimize_skip_unused_shards_rewrite_in(0, 2)'; +with (select currentDatabase()) as id_02 select *, ignore(id_02) from dist_01756 where dummy in (0, 2); +system flush logs; +select query from system.query_log where + event_date = today() and + event_time > now() - interval 1 hour and + not is_initial_query and + query not like '%system.query_log%' and + query like concat('WITH%', currentDatabase(), '%AS id_02 %') and + type = 'QueryFinish' +order by query; + +select 'optimize_skip_unused_shards_rewrite_in(2,)'; +with (select currentDatabase()) as id_2 select *, ignore(id_2) from dist_01756 where dummy in (2,); +system flush logs; +select query from system.query_log where + event_date = today() and + event_time > now() - interval 1 hour and + not is_initial_query and + query not like '%system.query_log%' and + query like concat('WITH%', currentDatabase(), '%AS id_2 %') and + type = 'QueryFinish' +order by query; + +select 'optimize_skip_unused_shards_rewrite_in(0,)'; +with (select currentDatabase()) as id_0 select *, ignore(id_0) from dist_01756 where dummy in (0,); +system flush logs; +select query from system.query_log where + event_date = today() and + event_time > now() - interval 1 hour and + not is_initial_query and + query not like '%system.query_log%' and + query like concat('WITH%', currentDatabase(), '%AS id_0 %') and + type = 'QueryFinish' +order by query; + +-- +-- errors +-- +select 'errors'; + +-- not tuple +select * from dist_01756 where dummy in (0); -- { serverError 507 } +-- optimize_skip_unused_shards does not support non-constants +select * from dist_01756 where dummy in (select * from system.one); -- { serverError 507 } +select * from dist_01756 where dummy in (toUInt8(0)); -- { serverError 507 } +-- wrong type +select * from dist_01756 where dummy in ('0'); -- { serverError 507 } +-- NOT IN does not supported +select * from dist_01756 where dummy not in (0, 2); -- { serverError 507 } + +-- +-- others +-- +select 'others'; + +select * from dist_01756 where dummy not in (2, 3) and dummy in (0, 2); +select * from dist_01756 where dummy in tuple(0, 2); +select * from dist_01756 where dummy in tuple(0); +select * from dist_01756 where dummy in tuple(2); +-- Identifier is NULL +select (2 IN (2,)), * from dist_01756 where dummy in (0, 2) format Null; +-- Literal is NULL +select (dummy IN (toUInt8(2),)), * from dist_01756 where dummy in (0, 2) format Null; + +-- different type +create table data_01756_str (key String) engine=Memory(); +create table dist_01756_str as data_01756_str engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01756_str, cityHash64(key)); +select * from dist_01756_str where key in ('0', '2'); +select * from dist_01756_str where key in ('0', Null); -- { serverError 507 } +select * from dist_01756_str where key in (0, 2); -- { serverError 53 } +select * from dist_01756_str where key in (0, Null); -- { serverError 53 } + +-- optimize_skip_unused_shards_limit +select 'optimize_skip_unused_shards_limit'; +select * from dist_01756 where dummy in (0, 2) settings optimize_skip_unused_shards_limit=1; -- { serverError 507 } +select * from dist_01756 where dummy in (0, 2) settings optimize_skip_unused_shards_limit=1, force_optimize_skip_unused_shards=0; + +drop table dist_01756; +drop table dist_01756_str; +drop table data_01756_str; diff --git a/tests/queries/0_stateless/018002_formatDateTime_DateTime64_century.reference b/tests/queries/0_stateless/018002_formatDateTime_DateTime64_century.reference new file mode 100644 index 00000000000..75c114cdd74 --- /dev/null +++ b/tests/queries/0_stateless/018002_formatDateTime_DateTime64_century.reference @@ -0,0 +1,27 @@ +-- { echo } + +SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +19 +SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +19 +SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +19 +SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Europe/Moscow'), '%C'); +20 +SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +21 +SELECT formatDateTime(toDateTime64('2205-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +22 +-- non-zero scale +SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); +19 +SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); +19 +SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); +19 +SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Europe/Moscow'), '%C'); +20 +SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); +21 +SELECT formatDateTime(toDateTime64('2205-01-12 12:12:12', 6, 'Europe/Moscow'), '%C'); +22 diff --git a/tests/queries/0_stateless/018002_formatDateTime_DateTime64_century.sql b/tests/queries/0_stateless/018002_formatDateTime_DateTime64_century.sql new file mode 100644 index 00000000000..e368f45cbda --- /dev/null +++ b/tests/queries/0_stateless/018002_formatDateTime_DateTime64_century.sql @@ -0,0 +1,16 @@ +-- { echo } + +SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('2205-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); + +-- non-zero scale +SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('2205-01-12 12:12:12', 6, 'Europe/Moscow'), '%C'); \ No newline at end of file diff --git a/tests/queries/0_stateless/018002_toDateTime64_large_values.reference b/tests/queries/0_stateless/018002_toDateTime64_large_values.reference new file mode 100644 index 00000000000..c44c61ab93a --- /dev/null +++ b/tests/queries/0_stateless/018002_toDateTime64_large_values.reference @@ -0,0 +1,10 @@ +-- { echo } + +SELECT toDateTime64('2205-12-12 12:12:12', 0, 'UTC'); +2205-12-12 12:12:12 +SELECT toDateTime64('2205-12-12 12:12:12', 0, 'Europe/Moscow'); +2205-12-12 12:12:12 +SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Europe/Moscow'); +2205-12-12 12:12:12.000000 +SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Europe/Moscow'); +2205-12-12 12:12:12.000000 diff --git a/tests/queries/0_stateless/018002_toDateTime64_large_values.sql b/tests/queries/0_stateless/018002_toDateTime64_large_values.sql new file mode 100644 index 00000000000..299111f43bc --- /dev/null +++ b/tests/queries/0_stateless/018002_toDateTime64_large_values.sql @@ -0,0 +1,7 @@ +-- { echo } + +SELECT toDateTime64('2205-12-12 12:12:12', 0, 'UTC'); +SELECT toDateTime64('2205-12-12 12:12:12', 0, 'Europe/Moscow'); + +SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Europe/Moscow'); +SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Europe/Moscow'); \ No newline at end of file diff --git a/tests/queries/0_stateless/01803_untuple_subquery.reference b/tests/queries/0_stateless/01803_untuple_subquery.reference new file mode 100644 index 00000000000..838ff3aa952 --- /dev/null +++ b/tests/queries/0_stateless/01803_untuple_subquery.reference @@ -0,0 +1,2 @@ +(0.5,'92233720368547758.07',NULL) 1.00 256 \N \N +\N diff --git a/tests/queries/0_stateless/01803_untuple_subquery.sql b/tests/queries/0_stateless/01803_untuple_subquery.sql new file mode 100644 index 00000000000..512b4c561af --- /dev/null +++ b/tests/queries/0_stateless/01803_untuple_subquery.sql @@ -0,0 +1,3 @@ +SELECT (0.5, '92233720368547758.07', NULL), '', '1.00', untuple(('256', NULL)), NULL FROM (SELECT untuple(((NULL, untuple((('0.0000000100', (65536, NULL, (65535, 9223372036854775807), '25.7', (0.00009999999747378752, '10.25', 1048577), 65536)), '0.0000001024', '65537', NULL))), untuple((9223372036854775807, -inf, 0.5)), NULL, -9223372036854775808)), 257, 7, ('0.0001048575', (1024, NULL, (7, 3), (untuple(tuple(-NULL)), NULL, '0.0001048577', NULL), 0)), 0, (0, 0.9998999834060669, '65537'), untuple(tuple('10.25'))); + +SELECT NULL FROM (SELECT untuple((NULL, dummy))); diff --git a/tests/queries/0_stateless/01811_filter_by_null.reference b/tests/queries/0_stateless/01811_filter_by_null.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01811_filter_by_null.sql b/tests/queries/0_stateless/01811_filter_by_null.sql new file mode 100644 index 00000000000..496faf428ab --- /dev/null +++ b/tests/queries/0_stateless/01811_filter_by_null.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS test_01344; + +CREATE TABLE test_01344 (x String, INDEX idx (x) TYPE set(10) GRANULARITY 1) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; +INSERT INTO test_01344 VALUES ('Hello, world'); +SELECT NULL FROM test_01344 WHERE ignore(1) = NULL; +SELECT NULL FROM test_01344 WHERE encrypt(ignore(encrypt(NULL, '0.0001048577', lcm(2, 65537), NULL, inf, NULL), lcm(-2, 1048575)), '-0.0000000001', lcm(NULL, NULL)) = NULL; +SELECT NULL FROM test_01344 WHERE ignore(x, lcm(NULL, 1048576), -2) = NULL; + +DROP TABLE test_01344; diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 9d9138a9bd9..bf9ef37e2b7 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -12,6 +12,7 @@ v21.2.5.5-stable 2021-03-02 v21.2.4.6-stable 2021-02-20 v21.2.3.15-stable 2021-02-14 v21.2.2.8-stable 2021-02-07 +v21.1.9.41-stable 2021-04-13 v21.1.8.30-stable 2021-04-07 v21.1.7.1-stable 2021-03-15 v21.1.6.13-stable 2021-03-02