diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 816bdfd4f31..9a719a205d4 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -23,6 +23,10 @@ jobs: clear-repository: true # to ensure correct digests fetch-depth: 0 # to get version filter: tree:0 + - name: Merge sync PR + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 sync_pr.py --merge || : - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 74ce8452de8..0eac9a9a722 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -157,16 +157,25 @@ jobs: ################################# Stage Final ################################# # FinishCheck: - if: ${{ !failure() && !cancelled() && github.event_name != 'merge_group' }} - needs: [Tests_1, Tests_2] + if: ${{ !failure() && !cancelled() }} + needs: [Tests_1, Tests_2, Builds_1_Report, Builds_2_Report] runs-on: [self-hosted, style-checker] steps: - name: Check out repository code uses: ClickHouse/checkout@v1 + - name: Check sync status + if: ${{ github.event_name == 'merge_group' }} + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 sync_pr.py --status - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py + - name: Auto merge if approved + if: ${{ github.event_name != 'merge_group' }} + run: | + cd "$GITHUB_WORKSPACE/tests/ci" python3 merge_pr.py --check-approved diff --git a/base/base/scope_guard.h b/base/base/scope_guard.h index 03670792d59..e6789c5cb1b 100644 --- a/base/base/scope_guard.h +++ b/base/base/scope_guard.h @@ -29,11 +29,13 @@ public: requires std::is_convertible_v constexpr BasicScopeGuard & operator=(BasicScopeGuard && src) // NOLINT(cppcoreguidelines-rvalue-reference-param-not-moved, cppcoreguidelines-noexcept-move-operations) { - if (this != &src) + if constexpr (std::is_same_v) { - invoke(); - function = src.release(); + if (this == &src) + return *this; } + invoke(); + function = src.release(); return *this; } diff --git a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml index 023f257253a..c31d2fd7f39 100644 --- a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml +++ b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml @@ -26,6 +26,11 @@ 200 + + + + + diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index f87b6144deb..eb93d9cda5b 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -42,6 +42,19 @@ Type: UInt32 Default: 1 +## auth_use_forwarded_address + +Use originating address for authentication for clients connected through proxy. + +:::note +This setting should be used with extra caution since forwarded address can be easily spoofed - server accepting such authentication should not be accessed directly but rather exclusively through a trusted proxy. +::: + +Type: Bool + +Default: 0 + + ## background_buffer_flush_schedule_pool_size The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in the background. diff --git a/docs/en/operations/settings/composable-protocols.md b/docs/en/operations/settings/composable-protocols.md new file mode 100644 index 00000000000..b68a5906abf --- /dev/null +++ b/docs/en/operations/settings/composable-protocols.md @@ -0,0 +1,155 @@ +--- +slug: /en/operations/settings/composable-protocols +sidebar_position: 64 +sidebar_label: Composable Protocols +--- + +# Composable Protocols + +Composable protocols allows more flexible configuration of TCP access to the ClickHouse server. This configuration can co-exist with or replace conventional configuration. + +## Composable protocols section is denoted as `protocols` in configuration xml +**Example:** +``` xml + + + +``` + +## Basic modules define protocol layers +**Example:** +``` xml + + + + + http + + + +``` +where: +- `plain_http` - name which can be referred by another layer +- `type` - denotes protocol handler which will be instantiated to process data, set of protocol handlers is predefined: + * `tcp` - native clickhouse protocol handler + * `http` - http clickhouse protocol handler + * `tls` - TLS encryption layer + * `proxy1` - PROXYv1 layer + * `mysql` - MySQL compatibility protocol handler + * `postgres` - PostgreSQL compatibility protocol handler + * `prometheus` - Prometheus protocol handler + * `interserver` - clickhouse interserver handler + +:::note +`gRPC` protocol handler is not implemented for `Composable protocols` +::: + +## Endpoint (i.e. listening port) is denoted by `` and (optional) `` tags +**Example:** +``` xml + + + + + http + + 127.0.0.1 + 8123 + + + + +``` +If `` is omitted, then `` from root config is used. + +## Layers sequence is defined by `` tag, referencing another module +**Example:** definition for HTTPS protocol +``` xml + + + + + http + + + + + tls + plain_http + 127.0.0.1 + 8443 + + + +``` + +## Endpoint can be attached to any layer +**Example:** definition for HTTP (port 8123) and HTTPS (port 8443) endpoints +``` xml + + + + http + 127.0.0.1 + 8123 + + + + tls + plain_http + 127.0.0.1 + 8443 + + + +``` + +## Additional endpoints can be defined by referencing any module and omitting `` tag +**Example:** `another_http` endpoint is defined for `plain_http` module +``` xml + + + + http + 127.0.0.1 + 8123 + + + + tls + plain_http + 127.0.0.1 + 8443 + + + + plain_http + 127.0.0.1 + 8223 + + + +``` + +## Some modules can contain specific for its layer parameters +**Example:** for TLS layer private key (`privateKeyFile`) and certificate files (`certificateFile`) can be specified +``` xml + + + + http + 127.0.0.1 + 8123 + + + + tls + plain_http + 127.0.0.1 + 8443 + another_server.key + another_server.crt + + + +``` diff --git a/docs/en/operations/system-tables/backup_log.md b/docs/en/operations/system-tables/backup_log.md index c73fd26683e..d9c2a61cb81 100644 --- a/docs/en/operations/system-tables/backup_log.md +++ b/docs/en/operations/system-tables/backup_log.md @@ -9,6 +9,7 @@ Columns: - `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of the entry. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time of the entry. - `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Time of the entry with microseconds precision. - `id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the backup or restore operation. - `name` ([String](../../sql-reference/data-types/string.md)) — Name of the backup storage (the contents of the `FROM` or `TO` clause). @@ -67,6 +68,7 @@ Row 2: ────── hostname: clickhouse.eu-central1.internal event_date: 2023-08-19 +event_time: 2023-08-19 11:08:56 event_time_microseconds: 2023-08-19 11:08:56.916192 id: e5b74ecb-f6f1-426a-80be-872f90043885 name: Disk('backups_disk', '1.zip') diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index f7e56e73520..df266b224fb 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -970,7 +970,7 @@ If the haystack or the LIKE expression are not valid UTF-8, the behavior is unde No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. -To match against literal `%`, `_` and `/` (which are LIKE metacharacters), prepend them with a backslash: `\%`, `\_` and `\\`. +To match against literal `%`, `_` and `\` (which are LIKE metacharacters), prepend them with a backslash: `\%`, `\_` and `\\`. The backslash loses its special meaning (i.e. is interpreted literally) if it prepends a character different than `%`, `_` or `\`. Note that ClickHouse requires backslashes in strings [to be quoted as well](../syntax.md#string), so you would actually need to write `\\%`, `\\_` and `\\\\`. @@ -1768,4 +1768,4 @@ SELECT hasTokenCaseInsensitiveOrNull('Hello World','hello,world'); ```response null -``` \ No newline at end of file +``` diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index 941dc000a02..ce5cecf6fd6 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -133,8 +133,6 @@ For the query to run successfully, the following conditions must be met: - Both tables must have the same indices and projections. - Both tables must have the same storage policy. -If both tables have the same storage policy, use hardlink to attach partition. Otherwise, use copying the data to attach partition. - ## REPLACE PARTITION ``` sql diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 053b29481ce..090dda383b3 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -687,7 +687,11 @@ bool Client::processWithFuzzing(const String & full_query) try { const char * begin = full_query.data(); - orig_ast = parseQuery(begin, begin + full_query.size(), true); + orig_ast = parseQuery(begin, begin + full_query.size(), + global_context->getSettingsRef(), + /*allow_multi_statements=*/ true, + /*is_interactive=*/ is_interactive, + /*ignore_error=*/ ignore_error); } catch (const Exception & e) { diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 20c1a0ad4a8..f2ef3857d63 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -46,12 +46,12 @@ INCBIN(resource_users_xml, SOURCE_DIR "/programs/server/users.xml"); * * The following steps are performed: * - * - copying the binary to binary directory (/usr/bin). + * - copying the binary to binary directory (/usr/bin/) * - creation of symlinks for tools. * - creation of clickhouse user and group. - * - creation of config directory (/etc/clickhouse-server). + * - creation of config directory (/etc/clickhouse-server/). * - creation of default configuration files. - * - creation of a directory for logs (/var/log/clickhouse-server). + * - creation of a directory for logs (/var/log/clickhouse-server/). * - creation of a data directory if not exists. * - setting a password for default user. * - choose an option to listen connections. @@ -226,7 +226,12 @@ int mainEntryClickHouseInstall(int argc, char ** argv) desc.add_options() ("help,h", "produce help message") ("prefix", po::value()->default_value("/"), "prefix for all paths") +#if defined (OS_DARWIN) + /// https://stackoverflow.com/a/36734569/22422288 + ("binary-path", po::value()->default_value("usr/local/bin"), "where to install binaries") +#else ("binary-path", po::value()->default_value("usr/bin"), "where to install binaries") +#endif ("config-path", po::value()->default_value("etc/clickhouse-server"), "where to install configs") ("log-path", po::value()->default_value("var/log/clickhouse-server"), "where to create log directory") ("data-path", po::value()->default_value("var/lib/clickhouse"), "directory for data") @@ -1216,7 +1221,12 @@ int mainEntryClickHouseStart(int argc, char ** argv) desc.add_options() ("help,h", "produce help message") ("prefix", po::value()->default_value("/"), "prefix for all paths") +#if defined (OS_DARWIN) + /// https://stackoverflow.com/a/36734569/22422288 + ("binary-path", po::value()->default_value("usr/local/bin"), "directory with binary") +#else ("binary-path", po::value()->default_value("usr/bin"), "directory with binary") +#endif ("config-path", po::value()->default_value("etc/clickhouse-server"), "directory with configs") ("pid-path", po::value()->default_value("var/run/clickhouse-server"), "directory for pid file") ("user", po::value()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user") @@ -1332,7 +1342,12 @@ int mainEntryClickHouseRestart(int argc, char ** argv) desc.add_options() ("help,h", "produce help message") ("prefix", po::value()->default_value("/"), "prefix for all paths") +#if defined (OS_DARWIN) + /// https://stackoverflow.com/a/36734569/22422288 + ("binary-path", po::value()->default_value("usr/local/bin"), "directory with binary") +#else ("binary-path", po::value()->default_value("usr/bin"), "directory with binary") +#endif ("config-path", po::value()->default_value("etc/clickhouse-server"), "directory with configs") ("pid-path", po::value()->default_value("var/run/clickhouse-server"), "directory for pid file") ("user", po::value()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user") diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 72920fbd855..a0aa6d085d8 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -413,8 +413,20 @@ void LocalServer::setupUsers() void LocalServer::connect() { connection_parameters = ConnectionParameters(config(), "localhost"); + + ReadBuffer * in; + auto table_file = config().getString("table-file", "-"); + if (table_file == "-" || table_file == "stdin") + { + in = &std_in; + } + else + { + input = std::make_unique(table_file); + in = input.get(); + } connection = LocalConnection::createConnection( - connection_parameters, global_context, need_render_progress, need_render_profile_events, server_display_name); + connection_parameters, global_context, in, need_render_progress, need_render_profile_events, server_display_name); } diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index ca0ce513b09..9b67aab02d4 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -65,6 +65,8 @@ private: std::optional status; std::optional temporary_directory_to_delete; + + std::unique_ptr input; }; } diff --git a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp index d0a5656d334..3e2a2055fdb 100644 --- a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp +++ b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp @@ -15,6 +15,7 @@ #include #include +#include namespace DB @@ -61,47 +62,7 @@ const QueryTreeNodePtr & getEquiArgument(const QueryTreeNodePtr & cond, size_t i return func->getArguments().getNodes()[index]; } - -/// Check that node has only one source and return it. -/// {_, false} - multiple sources -/// {nullptr, true} - no sources -/// {source, true} - single source -std::pair getExpressionSource(const QueryTreeNodePtr & node) -{ - if (const auto * column = node->as()) - { - auto source = column->getColumnSourceOrNull(); - if (!source) - return {nullptr, false}; - return {source.get(), true}; - } - - if (const auto * func = node->as()) - { - const IQueryTreeNode * source = nullptr; - const auto & args = func->getArguments().getNodes(); - for (const auto & arg : args) - { - auto [arg_source, is_ok] = getExpressionSource(arg); - if (!is_ok) - return {nullptr, false}; - - if (!source) - source = arg_source; - else if (arg_source && !source->isEqual(*arg_source)) - return {nullptr, false}; - } - return {source, true}; - - } - - if (node->as()) - return {nullptr, true}; - - return {nullptr, false}; -} - -bool findInTableExpression(const IQueryTreeNode * source, const QueryTreeNodePtr & table_expression) +bool findInTableExpression(const QueryTreeNodePtr & source, const QueryTreeNodePtr & table_expression) { if (!source) return true; @@ -115,7 +76,6 @@ bool findInTableExpression(const IQueryTreeNode * source, const QueryTreeNodePtr || findInTableExpression(source, join_node->getRightTableExpression()); } - return false; } @@ -169,10 +129,10 @@ public: auto left_src = getExpressionSource(lhs_equi_argument); auto right_src = getExpressionSource(rhs_equi_argument); - if (left_src.second && right_src.second && left_src.first && right_src.first) + if (left_src && right_src) { - if ((findInTableExpression(left_src.first, left_table) && findInTableExpression(right_src.first, right_table)) || - (findInTableExpression(left_src.first, right_table) && findInTableExpression(right_src.first, left_table))) + if ((findInTableExpression(left_src, left_table) && findInTableExpression(right_src, right_table)) || + (findInTableExpression(left_src, right_table) && findInTableExpression(right_src, left_table))) { can_convert_cross_to_inner = true; continue; diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index 5f08bb9035e..b798a256ebd 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -25,8 +25,9 @@ class JoinOnLogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWi public: using Base = InDepthQueryTreeVisitorWithContext; - explicit JoinOnLogicalExpressionOptimizerVisitor(ContextPtr context) + explicit JoinOnLogicalExpressionOptimizerVisitor(const JoinNode * join_node_, ContextPtr context) : Base(std::move(context)) + , join_node(join_node_) {} void enterImpl(QueryTreeNodePtr & node) @@ -55,10 +56,11 @@ public: } private: + const JoinNode * join_node; bool need_rerun_resolve = false; /// Returns true if type of some operand is changed and parent function needs to be re-resolved - static bool tryOptimizeIsNotDistinctOrIsNull(QueryTreeNodePtr & node, const ContextPtr & context) + bool tryOptimizeIsNotDistinctOrIsNull(QueryTreeNodePtr & node, const ContextPtr & context) { auto & function_node = node->as(); chassert(function_node.getFunctionName() == "or"); @@ -93,6 +95,21 @@ private: const auto & func_name = argument_function->getFunctionName(); if (func_name == "equals" || func_name == "isNotDistinctFrom") { + const auto & argument_nodes = argument_function->getArguments().getNodes(); + if (argument_nodes.size() != 2) + continue; + /// We can rewrite to a <=> b only if we are joining on a and b, + /// because the function is not yet implemented for other cases. + auto first_src = getExpressionSource(argument_nodes[0]); + auto second_src = getExpressionSource(argument_nodes[1]); + if (!first_src || !second_src) + continue; + const auto & lhs_join = *join_node->getLeftTableExpression(); + const auto & rhs_join = *join_node->getRightTableExpression(); + bool arguments_from_both_sides = (first_src->isEqual(lhs_join) && second_src->isEqual(rhs_join)) || + (first_src->isEqual(rhs_join) && second_src->isEqual(lhs_join)); + if (!arguments_from_both_sides) + continue; equals_functions_indices.push_back(or_operands.size() - 1); } else if (func_name == "and") @@ -231,7 +248,7 @@ public: /// Operator <=> is not supported outside of JOIN ON section if (join_node->hasJoinExpression()) { - JoinOnLogicalExpressionOptimizerVisitor join_on_visitor(getContext()); + JoinOnLogicalExpressionOptimizerVisitor join_on_visitor(join_node, getContext()); join_on_visitor.visit(join_node->getJoinExpression()); } return; diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index 2882c4e0c02..4299050a094 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -760,6 +760,54 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_ty return function_node; } +/** Returns: + * {_, false} - multiple sources + * {nullptr, true} - no sources (for constants) + * {source, true} - single source + */ +std::pair getExpressionSourceImpl(const QueryTreeNodePtr & node) +{ + if (const auto * column = node->as()) + { + auto source = column->getColumnSourceOrNull(); + if (!source) + return {nullptr, false}; + return {source, true}; + } + + if (const auto * func = node->as()) + { + QueryTreeNodePtr source = nullptr; + const auto & args = func->getArguments().getNodes(); + for (const auto & arg : args) + { + auto [arg_source, is_ok] = getExpressionSourceImpl(arg); + if (!is_ok) + return {nullptr, false}; + + if (!source) + source = arg_source; + else if (arg_source && !source->isEqual(*arg_source)) + return {nullptr, false}; + } + return {source, true}; + + } + + if (node->as()) + return {nullptr, true}; + + return {nullptr, false}; +} + +QueryTreeNodePtr getExpressionSource(const QueryTreeNodePtr & node) +{ + auto [source, is_ok] = getExpressionSourceImpl(node); + if (!is_ok) + return nullptr; + return source; +} + QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(QueryTreeNodePtr table_node, const ContextPtr & context) { const auto & storage_snapshot = table_node->as()->getStorageSnapshot(); diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index 1b4a7d5ef3c..93d439453c6 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -105,6 +105,9 @@ NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node); /// Wrap node into `_CAST` function QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context); +/// Checks that node has only one source and returns it +QueryTreeNodePtr getExpressionSource(const QueryTreeNodePtr & node); + /// Build subquery which we execute for `IN table` function. QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(QueryTreeNodePtr table_node, const ContextPtr & context); diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index 2efb6bf84ad..58dac0c0843 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -125,7 +125,7 @@ BackupEntries BackupEntriesCollector::run() = BackupSettings::Util::filterHostIDs(backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num); /// Do renaming in the create queries according to the renaming config. - renaming_map = makeRenamingMapFromBackupQuery(backup_query_elements); + renaming_map = BackupUtils::makeRenamingMap(backup_query_elements); /// Calculate the root path for collecting backup entries, it's either empty or has the format "shards//replicas//". calculateRootPathInBackup(); @@ -570,17 +570,16 @@ std::vector> BackupEntriesCollector::findTablesInD checkIsQueryCancelled(); - auto filter_by_table_name = [my_database_info = &database_info](const String & table_name) + auto filter_by_table_name = [&](const String & table_name) { - /// We skip inner tables of materialized views. - if (table_name.starts_with(".inner_id.")) + if (BackupUtils::isInnerTable(database_name, table_name)) return false; - if (my_database_info->tables.contains(table_name)) + if (database_info.tables.contains(table_name)) return true; - if (my_database_info->all_tables) - return !my_database_info->except_table_names.contains(table_name); + if (database_info.all_tables) + return !database_info.except_table_names.contains(table_name); return false; }; diff --git a/src/Backups/BackupUtils.cpp b/src/Backups/BackupUtils.cpp index 6efca053f05..fb448fb64ad 100644 --- a/src/Backups/BackupUtils.cpp +++ b/src/Backups/BackupUtils.cpp @@ -8,10 +8,10 @@ #include -namespace DB +namespace DB::BackupUtils { -DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & elements) +DDLRenamingMap makeRenamingMap(const ASTBackupQuery::Elements & elements) { DDLRenamingMap map; @@ -120,4 +120,15 @@ bool compareRestoredDatabaseDef(const IAST & restored_database_create_query, con return compareRestoredTableDef(restored_database_create_query, create_query_from_backup, global_context); } +bool isInnerTable(const QualifiedTableName & table_name) +{ + return isInnerTable(table_name.database, table_name.table); +} + +bool isInnerTable(const String & /* database_name */, const String & table_name) +{ + /// We skip inner tables of materialized views. + return table_name.starts_with(".inner.") || table_name.starts_with(".inner_id."); +} + } diff --git a/src/Backups/BackupUtils.h b/src/Backups/BackupUtils.h index 7976de818e2..ba889eccc48 100644 --- a/src/Backups/BackupUtils.h +++ b/src/Backups/BackupUtils.h @@ -9,9 +9,13 @@ namespace DB class IBackup; class AccessRightsElements; class DDLRenamingMap; +struct QualifiedTableName; + +namespace BackupUtils +{ /// Initializes a DDLRenamingMap from a BACKUP or RESTORE query. -DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & elements); +DDLRenamingMap makeRenamingMap(const ASTBackupQuery::Elements & elements); /// Returns access required to execute BACKUP query. AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & elements); @@ -20,4 +24,10 @@ AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & bool compareRestoredTableDef(const IAST & restored_table_create_query, const IAST & create_query_from_backup, const ContextPtr & global_context); bool compareRestoredDatabaseDef(const IAST & restored_database_create_query, const IAST & create_query_from_backup, const ContextPtr & global_context); +/// Returns true if this table should be skipped while making a backup because it's an inner table. +bool isInnerTable(const QualifiedTableName & table_name); +bool isInnerTable(const String & database_name, const String & table_name); + +} + } diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 9a3e1052e0b..bb04c72dd3c 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -564,7 +564,7 @@ void BackupsWorker::doBackup( /// Checks access rights if this is not ON CLUSTER query. /// (If this is ON CLUSTER query executeDDLQueryOnCluster() will check access rights later.) - auto required_access = getRequiredAccessToBackup(backup_query->elements); + auto required_access = BackupUtils::getRequiredAccessToBackup(backup_query->elements); if (!on_cluster) context->checkAccess(required_access); diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index d8383fdbb1d..ec0a717cfcd 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -124,7 +124,7 @@ void RestorerFromBackup::run(Mode mode) restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num); /// Do renaming in the create queries according to the renaming config. - renaming_map = makeRenamingMapFromBackupQuery(restore_query_elements); + renaming_map = BackupUtils::makeRenamingMap(restore_query_elements); /// Calculate the root path in the backup for restoring, it's either empty or has the format "shards//replicas//". findRootPathsInBackup(); @@ -346,12 +346,12 @@ void RestorerFromBackup::findDatabasesAndTablesInBackup() { case ASTBackupQuery::ElementType::TABLE: { - findTableInBackup({element.database_name, element.table_name}, element.partitions); + findTableInBackup({element.database_name, element.table_name}, /* skip_if_inner_table= */ false, element.partitions); break; } case ASTBackupQuery::ElementType::TEMPORARY_TABLE: { - findTableInBackup({DatabaseCatalog::TEMPORARY_DATABASE, element.table_name}, element.partitions); + findTableInBackup({DatabaseCatalog::TEMPORARY_DATABASE, element.table_name}, /* skip_if_inner_table= */ false, element.partitions); break; } case ASTBackupQuery::ElementType::DATABASE: @@ -370,14 +370,14 @@ void RestorerFromBackup::findDatabasesAndTablesInBackup() LOG_INFO(log, "Will restore {} databases and {} tables", getNumDatabases(), getNumTables()); } -void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name_in_backup, const std::optional & partitions) +void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name_in_backup, bool skip_if_inner_table, const std::optional & partitions) { schedule( - [this, table_name_in_backup, partitions]() { findTableInBackupImpl(table_name_in_backup, partitions); }, + [this, table_name_in_backup, skip_if_inner_table, partitions]() { findTableInBackupImpl(table_name_in_backup, skip_if_inner_table, partitions); }, "Restore_FindTbl"); } -void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_name_in_backup, const std::optional & partitions) +void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_name_in_backup, bool skip_if_inner_table, const std::optional & partitions) { bool is_temporary_table = (table_name_in_backup.database == DatabaseCatalog::TEMPORARY_DATABASE); @@ -422,6 +422,10 @@ void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_ = *root_path_in_use / "data" / escapeForFileName(table_name_in_backup.database) / escapeForFileName(table_name_in_backup.table); } + QualifiedTableName table_name = renaming_map.getNewTableName(table_name_in_backup); + if (skip_if_inner_table && BackupUtils::isInnerTable(table_name)) + return; + auto read_buffer = backup->readFile(*metadata_path); String create_query_str; readStringUntilEOF(create_query_str, *read_buffer); @@ -432,8 +436,6 @@ void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_ renameDatabaseAndTableNameInCreateQuery(create_table_query, renaming_map, context->getGlobalContext()); String create_table_query_str = serializeAST(*create_table_query); - QualifiedTableName table_name = renaming_map.getNewTableName(table_name_in_backup); - bool is_predefined_table = DatabaseCatalog::instance().isPredefinedTable(StorageID{table_name.database, table_name.table}); auto table_dependencies = getDependenciesFromCreateQuery(context, table_name, create_table_query); bool table_has_data = backup->hasFiles(data_path_in_backup); @@ -568,7 +570,7 @@ void RestorerFromBackup::findDatabaseInBackupImpl(const String & database_name_i if (except_table_names.contains({database_name_in_backup, table_name_in_backup})) continue; - findTableInBackup({database_name_in_backup, table_name_in_backup}, /* partitions= */ {}); + findTableInBackup({database_name_in_backup, table_name_in_backup}, /* skip_if_inner_table= */ true, /* partitions= */ {}); } } @@ -767,7 +769,7 @@ void RestorerFromBackup::checkDatabase(const String & database_name) ASTPtr existing_database_def = database->getCreateDatabaseQuery(); ASTPtr database_def_from_backup = database_info.create_database_query; - if (!compareRestoredDatabaseDef(*existing_database_def, *database_def_from_backup, context->getGlobalContext())) + if (!BackupUtils::compareRestoredDatabaseDef(*existing_database_def, *database_def_from_backup, context->getGlobalContext())) { throw Exception( ErrorCodes::CANNOT_RESTORE_DATABASE, @@ -938,7 +940,7 @@ void RestorerFromBackup::checkTable(const QualifiedTableName & table_name) { ASTPtr existing_table_def = database->getCreateTableQuery(resolved_id.table_name, context); ASTPtr table_def_from_backup = table_info.create_table_query; - if (!compareRestoredTableDef(*existing_table_def, *table_def_from_backup, context->getGlobalContext())) + if (!BackupUtils::compareRestoredTableDef(*existing_table_def, *table_def_from_backup, context->getGlobalContext())) { throw Exception( ErrorCodes::CANNOT_RESTORE_TABLE, diff --git a/src/Backups/RestorerFromBackup.h b/src/Backups/RestorerFromBackup.h index 238569ac755..7b36eea0ba0 100644 --- a/src/Backups/RestorerFromBackup.h +++ b/src/Backups/RestorerFromBackup.h @@ -92,8 +92,8 @@ private: void findRootPathsInBackup(); void findDatabasesAndTablesInBackup(); - void findTableInBackup(const QualifiedTableName & table_name_in_backup, const std::optional & partitions); - void findTableInBackupImpl(const QualifiedTableName & table_name_in_backup, const std::optional & partitions); + void findTableInBackup(const QualifiedTableName & table_name_in_backup, bool skip_if_inner_table, const std::optional & partitions); + void findTableInBackupImpl(const QualifiedTableName & table_name_in_backup, bool skip_if_inner_table, const std::optional & partitions); void findDatabaseInBackup(const String & database_name_in_backup, const std::set & except_table_names); void findDatabaseInBackupImpl(const String & database_name_in_backup, const std::set & except_table_names); void findEverythingInBackup(const std::set & except_database_names, const std::set & except_table_names); diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index f37b391eb66..d243a1d74b9 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -329,12 +329,11 @@ void ClientBase::setupSignalHandler() } -ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const +ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements, bool is_interactive, bool ignore_error) { std::unique_ptr parser; ASTPtr res; - const auto & settings = global_context->getSettingsRef(); size_t max_length = 0; if (!allow_multi_statements) @@ -343,11 +342,11 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu const Dialect & dialect = settings.dialect; if (dialect == Dialect::kusto) - parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); + parser = std::make_unique(end, settings.allow_settings_after_format_in_insert); else if (dialect == Dialect::prql) parser = std::make_unique(max_length, settings.max_parser_depth, settings.max_parser_backtracks); else - parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); + parser = std::make_unique(end, settings.allow_settings_after_format_in_insert); if (is_interactive || ignore_error) { @@ -916,7 +915,11 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) /// Some parts of a query (result output and formatting) are executed /// client-side. Thus we need to parse the query. const char * begin = full_query.data(); - auto parsed_query = parseQuery(begin, begin + full_query.size(), false); + auto parsed_query = parseQuery(begin, begin + full_query.size(), + global_context->getSettingsRef(), + /*allow_multi_statements=*/ false, + is_interactive, + ignore_error); if (!parsed_query) return; @@ -2061,7 +2064,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( return MultiQueryProcessingStage::QUERIES_END; // Remove leading empty newlines and other whitespace, because they - // are annoying to filter in the query log. This is mostly relevant for + // are annoying to filter in query log. This is mostly relevant for // the tests. while (this_query_begin < all_queries_end && isWhitespaceASCII(*this_query_begin)) ++this_query_begin; @@ -2089,9 +2092,13 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( this_query_end = this_query_begin; try { - parsed_query = parseQuery(this_query_end, all_queries_end, true); + parsed_query = parseQuery(this_query_end, all_queries_end, + global_context->getSettingsRef(), + /*allow_multi_statements=*/ true, + is_interactive, + ignore_error); } - catch (const Exception & e) + catch (Exception & e) { current_exception.reset(e.clone()); return MultiQueryProcessingStage::PARSING_EXCEPTION; @@ -2116,9 +2123,9 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( // INSERT queries may have the inserted data in the query text // that follow the query itself, e.g. "insert into t format CSV 1;2". // They need special handling. First of all, here we find where the - // inserted data ends. In multi-query mode, it is delimited by a + // inserted data ends. In multy-query mode, it is delimited by a // newline. - // The VALUES format needs even more handling - we also allow the + // The VALUES format needs even more handling -- we also allow the // data to be delimited by semicolon. This case is handled later by // the format parser itself. // We can't do multiline INSERTs with inline data, because most diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 43c369422fe..710a72a3238 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -73,6 +73,7 @@ public: void init(int argc, char ** argv); std::vector getAllRegisteredNames() const override { return cmd_options; } + static ASTPtr parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements, bool is_interactive, bool ignore_error); protected: void runInteractive(); @@ -98,7 +99,6 @@ protected: ASTPtr parsed_query, std::optional echo_query_ = {}, bool report_error = false); static void adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, uint32_t max_parser_depth, uint32_t max_parser_backtracks); - ASTPtr parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const; static void setupSignalHandler(); bool executeMultiQuery(const String & all_queries_text); diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp index b1d29b34ffc..b0862696295 100644 --- a/src/Client/ClientBaseHelpers.cpp +++ b/src/Client/ClientBaseHelpers.cpp @@ -1,14 +1,11 @@ #include "ClientBaseHelpers.h" + #include #include -#include -#include +#include #include -#include - - namespace DB { @@ -99,102 +96,77 @@ void highlight(const String & query, std::vector & colors { using namespace replxx; - /// The `colors` array maps to a Unicode code point position in a string into a color. - /// A color is set for every position individually (not for a range). + static const std::unordered_map token_to_color + = {{TokenType::Whitespace, Replxx::Color::DEFAULT}, + {TokenType::Comment, Replxx::Color::GRAY}, + {TokenType::BareWord, Replxx::Color::DEFAULT}, + {TokenType::Number, Replxx::Color::GREEN}, + {TokenType::StringLiteral, Replxx::Color::CYAN}, + {TokenType::QuotedIdentifier, Replxx::Color::MAGENTA}, + {TokenType::OpeningRoundBracket, Replxx::Color::BROWN}, + {TokenType::ClosingRoundBracket, Replxx::Color::BROWN}, + {TokenType::OpeningSquareBracket, Replxx::Color::BROWN}, + {TokenType::ClosingSquareBracket, Replxx::Color::BROWN}, + {TokenType::DoubleColon, Replxx::Color::BROWN}, + {TokenType::OpeningCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::ClosingCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)}, - /// Empty input. - if (colors.empty()) - return; + {TokenType::Comma, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Semicolon, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::VerticalDelimiter, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Dot, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Asterisk, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::HereDoc, Replxx::Color::CYAN}, + {TokenType::Plus, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Minus, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Slash, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Percent, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Arrow, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::QuestionMark, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Colon, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Equals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::NotEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Less, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Greater, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::LessOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::GreaterOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Spaceship, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Concatenation, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::At, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::DoubleAt, Replxx::Color::MAGENTA}, - /// The colors should be legible (and look gorgeous) in both dark and light themes. - /// When modifying this, check it in both themes. + {TokenType::EndOfStream, Replxx::Color::DEFAULT}, - static const std::unordered_map type_to_color = - { - {Highlight::keyword, replxx::color::bold(Replxx::Color::DEFAULT)}, - {Highlight::identifier, Replxx::Color::CYAN}, - {Highlight::function, Replxx::Color::BROWN}, - {Highlight::alias, replxx::color::rgb666(0, 4, 4)}, - {Highlight::substitution, Replxx::Color::MAGENTA}, - {Highlight::number, replxx::color::rgb666(0, 4, 0)}, - {Highlight::string, Replxx::Color::GREEN}, - }; + {TokenType::Error, Replxx::Color::RED}, + {TokenType::ErrorMultilineCommentIsNotClosed, Replxx::Color::RED}, + {TokenType::ErrorSingleQuoteIsNotClosed, Replxx::Color::RED}, + {TokenType::ErrorDoubleQuoteIsNotClosed, Replxx::Color::RED}, + {TokenType::ErrorSinglePipeMark, Replxx::Color::RED}, + {TokenType::ErrorWrongNumber, Replxx::Color::RED}, + {TokenType::ErrorMaxQuerySizeExceeded, Replxx::Color::RED}}; - /// We set reasonably small limits for size/depth, because we don't want the CLI to be slow. - /// While syntax highlighting is unneeded for long queries, which the user couldn't read anyway. - - const char * begin = query.data(); - const char * end = begin + query.size(); - Tokens tokens(begin, end, 1000, true); - IParser::Pos token_iterator(tokens, static_cast(1000), static_cast(10000)); - Expected expected; - - /// We don't do highlighting for foreign dialects, such as PRQL and Kusto. - /// Only normal ClickHouse SQL queries are highlighted. - - /// Currently we highlight only the first query in the multi-query mode. - - ParserQuery parser(end); - ASTPtr ast; - bool parse_res = false; - - try - { - parse_res = parser.parse(token_iterator, ast, expected); - } - catch (...) - { - /// Skip highlighting in the case of exceptions during parsing. - /// It is ok to ignore unknown exceptions here. - return; - } + const Replxx::Color unknown_token_color = Replxx::Color::RED; + Lexer lexer(query.data(), query.data() + query.size()); size_t pos = 0; - const char * prev = begin; - for (const auto & range : expected.highlights) + + for (Token token = lexer.nextToken(); !token.isEnd(); token = lexer.nextToken()) { - auto it = type_to_color.find(range.highlight); - if (it != type_to_color.end()) + if (token.type == TokenType::Semicolon || token.type == TokenType::VerticalDelimiter) + ReplxxLineReader::setLastIsDelimiter(true); + else if (token.type != TokenType::Whitespace) + ReplxxLineReader::setLastIsDelimiter(false); + + size_t utf8_len = UTF8::countCodePoints(reinterpret_cast(token.begin), token.size()); + for (size_t code_point_index = 0; code_point_index < utf8_len; ++code_point_index) { - /// We have to map from byte positions to Unicode positions. - pos += UTF8::countCodePoints(reinterpret_cast(prev), range.begin - prev); - size_t utf8_len = UTF8::countCodePoints(reinterpret_cast(range.begin), range.end - range.begin); - - for (size_t code_point_index = 0; code_point_index < utf8_len; ++code_point_index) - colors[pos + code_point_index] = it->second; - - pos += utf8_len; - prev = range.end; + if (token_to_color.find(token.type) != token_to_color.end()) + colors[pos + code_point_index] = token_to_color.at(token.type); + else + colors[pos + code_point_index] = unknown_token_color; } - } - Token last_token = token_iterator.max(); - /// Raw data in INSERT queries, which is not necessarily tokenized. - const char * insert_data = ast ? getInsertData(ast) : nullptr; - - /// Highlight the last error in red. If the parser failed or the lexer found an invalid token, - /// or if it didn't parse all the data (except, the data for INSERT query, which is legitimately unparsed) - if ((!parse_res || last_token.isError() || (!token_iterator->isEnd() && token_iterator->type != TokenType::Semicolon)) - && !(insert_data && expected.max_parsed_pos >= insert_data) - && expected.max_parsed_pos >= prev) - { - pos += UTF8::countCodePoints(reinterpret_cast(prev), expected.max_parsed_pos - prev); - - if (pos >= colors.size()) - pos = colors.size() - 1; - - colors[pos] = Replxx::Color::BRIGHTRED; - } - - /// This is a callback for the client/local app to better find query end. Note: this is a kludge, remove it. - if (last_token.type == TokenType::Semicolon || last_token.type == TokenType::VerticalDelimiter - || query.ends_with(';') || query.ends_with("\\G")) /// This is for raw data in INSERT queries, which is not necessarily tokenized. - { - ReplxxLineReader::setLastIsDelimiter(true); - } - else if (last_token.type != TokenType::Whitespace) - { - ReplxxLineReader::setLastIsDelimiter(false); + pos += utf8_len; } } #endif diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index afcaa4d6098..c7494e31605 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -1,11 +1,18 @@ #include "LocalConnection.h" +#include +#include #include #include #include +#include #include #include #include #include +#include +#include +#include +#include #include #include #include @@ -22,12 +29,13 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -LocalConnection::LocalConnection(ContextPtr context_, bool send_progress_, bool send_profile_events_, const String & server_display_name_) +LocalConnection::LocalConnection(ContextPtr context_, ReadBuffer * in_, bool send_progress_, bool send_profile_events_, const String & server_display_name_) : WithContext(context_) , session(getContext(), ClientInfo::Interface::LOCAL) , send_progress(send_progress_) , send_profile_events(send_profile_events_) , server_display_name(server_display_name_) + , in(in_) { /// Authenticate and create a context to execute queries. session.authenticate("default", "", Poco::Net::SocketAddress{}); @@ -130,6 +138,57 @@ void LocalConnection::sendQuery( next_packet_type.reset(); + /// Prepare input() function + query_context->setInputInitializer([this] (ContextPtr context, const StoragePtr & input_storage) + { + if (context != query_context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected context in Input initializer"); + + auto metadata_snapshot = input_storage->getInMemoryMetadataPtr(); + Block sample = metadata_snapshot->getSampleBlock(); + + next_packet_type = Protocol::Server::Data; + state->block = sample; + + String current_format = "Values"; + const char * begin = state->query.data(); + auto parsed_query = ClientBase::parseQuery(begin, begin + state->query.size(), + context->getSettingsRef(), + /*allow_multi_statements=*/ false, + /*is_interactive=*/ false, + /*ignore_error=*/ false); + if (const auto * insert = parsed_query->as()) + { + if (!insert->format.empty()) + current_format = insert->format; + } + + auto source = context->getInputFormat(current_format, *in, sample, context->getSettingsRef().max_insert_block_size); + Pipe pipe(source); + + auto columns_description = metadata_snapshot->getColumns(); + if (columns_description.hasDefaults()) + { + pipe.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, columns_description, *source, context); + }); + } + + state->input_pipeline = std::make_unique(std::move(pipe)); + state->input_pipeline_executor = std::make_unique(*state->input_pipeline); + + }); + query_context->setInputBlocksReaderCallback([this] (ContextPtr context) -> Block + { + if (context != query_context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected context in InputBlocksReader"); + + Block block; + state->input_pipeline_executor->pull(block); + return block; + }); + try { state->io = executeQuery(state->query, query_context, QueryFlags{}, state->stage).second; @@ -537,11 +596,12 @@ void LocalConnection::sendMergeTreeReadTaskResponse(const ParallelReadResponse & ServerConnectionPtr LocalConnection::createConnection( const ConnectionParameters &, ContextPtr current_context, + ReadBuffer * in, bool send_progress, bool send_profile_events, const String & server_display_name) { - return std::make_unique(current_context, send_progress, send_profile_events, server_display_name); + return std::make_unique(current_context, in, send_progress, send_profile_events, server_display_name); } diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index 9c2d0a81d8d..a23450709ad 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -15,6 +15,8 @@ namespace DB class PullingAsyncPipelineExecutor; class PushingAsyncPipelineExecutor; class PushingPipelineExecutor; +class QueryPipeline; +class ReadBuffer; /// State of query processing. struct LocalQueryState @@ -31,6 +33,10 @@ struct LocalQueryState std::unique_ptr executor; std::unique_ptr pushing_executor; std::unique_ptr pushing_async_executor; + /// For sending data for input() function. + std::unique_ptr input_pipeline; + std::unique_ptr input_pipeline_executor; + InternalProfileEventsQueuePtr profile_queue; std::unique_ptr exception; @@ -64,7 +70,11 @@ class LocalConnection : public IServerConnection, WithContext { public: explicit LocalConnection( - ContextPtr context_, bool send_progress_ = false, bool send_profile_events_ = false, const String & server_display_name_ = ""); + ContextPtr context_, + ReadBuffer * in_, + bool send_progress_, + bool send_profile_events_, + const String & server_display_name_); ~LocalConnection() override; @@ -73,6 +83,7 @@ public: static ServerConnectionPtr createConnection( const ConnectionParameters & connection_parameters, ContextPtr current_context, + ReadBuffer * in = nullptr, bool send_progress = false, bool send_profile_events = false, const String & server_display_name = ""); @@ -158,5 +169,7 @@ private: String current_database; ProfileEvents::ThreadIdToCountersSnapshot last_sent_snapshots; + + ReadBuffer * in; }; } diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index 4634eae7759..48bb510e589 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -403,7 +403,7 @@ void ZooKeeperSetACLRequest::readImpl(ReadBuffer & in) std::string ZooKeeperSetACLRequest::toStringImpl() const { - return fmt::format("path = {}\n", "version = {}", path, version); + return fmt::format("path = {}\nversion = {}", path, version); } void ZooKeeperSetACLResponse::writeImpl(WriteBuffer & out) const @@ -457,7 +457,7 @@ void ZooKeeperCheckRequest::readImpl(ReadBuffer & in) std::string ZooKeeperCheckRequest::toStringImpl() const { - return fmt::format("path = {}\n", "version = {}", path, version); + return fmt::format("path = {}\nversion = {}", path, version); } void ZooKeeperErrorResponse::readImpl(ReadBuffer & in) diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.h b/src/Common/ZooKeeper/ZooKeeperConstants.h index a5c1d21eda6..1d9830505f8 100644 --- a/src/Common/ZooKeeper/ZooKeeperConstants.h +++ b/src/Common/ZooKeeper/ZooKeeperConstants.h @@ -2,6 +2,7 @@ #include #include +#include namespace Coordination @@ -64,3 +65,12 @@ static constexpr int32_t DEFAULT_OPERATION_TIMEOUT_MS = 10000; static constexpr int32_t DEFAULT_CONNECTION_TIMEOUT_MS = 1000; } + +/// This is used by fmt::format to print OpNum as strings. +/// All OpNum values should be in range [min, max] to be printed. +template <> +struct magic_enum::customize::enum_range +{ + static constexpr int min = -100; + static constexpr int max = 1000; +}; diff --git a/src/Core/Field.h b/src/Core/Field.h index eb01be6c43d..4424d669c4d 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -898,11 +898,13 @@ NearestFieldType> & Field::get() template auto & Field::safeGet() { - const Types::Which requested = TypeToEnum>>::value; + const Types::Which target = TypeToEnum>>::value; - if (which != requested) + /// We allow converting int64 <-> uint64, int64 <-> bool, uint64 <-> bool in safeGet(). + if (target != which + && (!isInt64OrUInt64orBoolFieldType(target) || !isInt64OrUInt64orBoolFieldType(which))) throw Exception(ErrorCodes::BAD_GET, - "Bad get: has {}, requested {}", getTypeName(), requested); + "Bad get: has {}, requested {}", getTypeName(), target); return get(); } diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index eb4ab2d4438..bdfb7207724 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -89,7 +89,8 @@ static std::map sett {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"}, {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"}, - }}, + {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, + }}, {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, @@ -129,7 +130,6 @@ static std::map sett {"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."}, {"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."}, {"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."}, - {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, }}, {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 09e9364a3f1..06e6e1f8fc8 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -2135,13 +2135,6 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere } } - // std::cerr << "Allowed " << conjunction.allowed.size() << std::endl; - // for (const auto & node : conjunction.allowed) - // std::cerr << node->result_name << std::endl; - // std::cerr << "Rejected " << conjunction.rejected.size() << std::endl; - // for (const auto & node : conjunction.rejected) - // std::cerr << node->result_name << std::endl; - return conjunction; } @@ -2170,7 +2163,7 @@ ColumnsWithTypeAndName prepareFunctionArguments(const ActionsDAG::NodeRawConstPt /// /// Result actions add single column with conjunction result (it is always first in outputs). /// No other columns are added or removed. -ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs) +ActionsDAGPtr ActionsDAG::createActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs) { if (conjunction.empty()) return nullptr; @@ -2265,9 +2258,9 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(NodeRawConstPtrs conjunctio return actions; } -ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( +ActionsDAGPtr ActionsDAG::splitActionsForFilterPushDown( const std::string & filter_name, - bool can_remove_filter, + bool removes_filter, const Names & available_inputs, const ColumnsWithTypeAndName & all_inputs) { @@ -2321,16 +2314,232 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( } } - auto actions = cloneActionsForConjunction(conjunction.allowed, all_inputs); + auto actions = createActionsForConjunction(conjunction.allowed, all_inputs); if (!actions) return nullptr; /// Now, when actions are created, update the current DAG. + removeUnusedConjunctions(std::move(conjunction.rejected), predicate, removes_filter); - if (conjunction.rejected.empty()) + return actions; +} + +ActionsDAG::ActionsForJOINFilterPushDown ActionsDAG::splitActionsForJOINFilterPushDown( + const std::string & filter_name, + bool removes_filter, + const Names & left_stream_available_columns_to_push_down, + const Block & left_stream_header, + const Names & right_stream_available_columns_to_push_down, + const Block & right_stream_header, + const Names & equivalent_columns_to_push_down, + const std::unordered_map & equivalent_left_stream_column_to_right_stream_column, + const std::unordered_map & equivalent_right_stream_column_to_left_stream_column) +{ + Node * predicate = const_cast(tryFindInOutputs(filter_name)); + if (!predicate) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Output nodes for ActionsDAG do not contain filter column name {}. DAG:\n{}", + filter_name, + dumpDAG()); + + /// If condition is constant let's do nothing. + /// It means there is nothing to push down or optimization was already applied. + if (predicate->type == ActionType::COLUMN) + return {}; + + auto get_input_nodes = [this](const Names & inputs_names) + { + std::unordered_set allowed_nodes; + + std::unordered_map> inputs_map; + for (const auto & input_node : inputs) + inputs_map[input_node->result_name].emplace_back(input_node); + + for (const auto & name : inputs_names) + { + auto & inputs_list = inputs_map[name]; + if (inputs_list.empty()) + continue; + + allowed_nodes.emplace(inputs_list.front()); + inputs_list.pop_front(); + } + + return allowed_nodes; + }; + + auto left_stream_allowed_nodes = get_input_nodes(left_stream_available_columns_to_push_down); + auto right_stream_allowed_nodes = get_input_nodes(right_stream_available_columns_to_push_down); + auto both_streams_allowed_nodes = get_input_nodes(equivalent_columns_to_push_down); + + auto left_stream_push_down_conjunctions = getConjunctionNodes(predicate, left_stream_allowed_nodes); + auto right_stream_push_down_conjunctions = getConjunctionNodes(predicate, right_stream_allowed_nodes); + auto both_streams_push_down_conjunctions = getConjunctionNodes(predicate, both_streams_allowed_nodes); + + NodeRawConstPtrs left_stream_allowed_conjunctions = std::move(left_stream_push_down_conjunctions.allowed); + NodeRawConstPtrs right_stream_allowed_conjunctions = std::move(right_stream_push_down_conjunctions.allowed); + + std::unordered_set left_stream_allowed_conjunctions_set(left_stream_allowed_conjunctions.begin(), left_stream_allowed_conjunctions.end()); + std::unordered_set right_stream_allowed_conjunctions_set(right_stream_allowed_conjunctions.begin(), right_stream_allowed_conjunctions.end()); + + for (const auto * both_streams_push_down_allowed_conjunction_node : both_streams_push_down_conjunctions.allowed) + { + if (!left_stream_allowed_conjunctions_set.contains(both_streams_push_down_allowed_conjunction_node)) + left_stream_allowed_conjunctions.push_back(both_streams_push_down_allowed_conjunction_node); + + if (!right_stream_allowed_conjunctions_set.contains(both_streams_push_down_allowed_conjunction_node)) + right_stream_allowed_conjunctions.push_back(both_streams_push_down_allowed_conjunction_node); + } + + std::unordered_set rejected_conjunctions_set; + rejected_conjunctions_set.insert(left_stream_push_down_conjunctions.rejected.begin(), left_stream_push_down_conjunctions.rejected.end()); + rejected_conjunctions_set.insert(right_stream_push_down_conjunctions.rejected.begin(), right_stream_push_down_conjunctions.rejected.end()); + rejected_conjunctions_set.insert(both_streams_push_down_conjunctions.rejected.begin(), both_streams_push_down_conjunctions.rejected.end()); + + for (const auto & left_stream_allowed_conjunction : left_stream_allowed_conjunctions) + rejected_conjunctions_set.erase(left_stream_allowed_conjunction); + + for (const auto & right_stream_allowed_conjunction : right_stream_allowed_conjunctions) + rejected_conjunctions_set.erase(right_stream_allowed_conjunction); + + NodeRawConstPtrs rejected_conjunctions(rejected_conjunctions_set.begin(), rejected_conjunctions_set.end()); + + if (rejected_conjunctions.size() == 1) + { + chassert(rejected_conjunctions.front()->result_type); + + bool left_stream_push_constant = !left_stream_allowed_conjunctions.empty() && left_stream_allowed_conjunctions[0]->type == ActionType::COLUMN; + bool right_stream_push_constant = !right_stream_allowed_conjunctions.empty() && right_stream_allowed_conjunctions[0]->type == ActionType::COLUMN; + + if ((left_stream_push_constant || right_stream_push_constant) && !rejected_conjunctions.front()->result_type->equals(*predicate->result_type)) + { + /// No further optimization can be done + return {}; + } + } + + auto left_stream_filter_to_push_down = createActionsForConjunction(left_stream_allowed_conjunctions, left_stream_header.getColumnsWithTypeAndName()); + auto right_stream_filter_to_push_down = createActionsForConjunction(right_stream_allowed_conjunctions, right_stream_header.getColumnsWithTypeAndName()); + + auto replace_equivalent_columns_in_filter = [](const ActionsDAGPtr & filter, + const Block & stream_header, + const std::unordered_map & columns_to_replace) + { + auto updated_filter = ActionsDAG::buildFilterActionsDAG({filter->getOutputs()[0]}, columns_to_replace); + chassert(updated_filter->getOutputs().size() == 1); + + /** If result filter to left or right stream has column that is one of the stream inputs, we need distinguish filter column from + * actual input column. It is necessary because after filter step, filter column became constant column with value 1, and + * not all JOIN algorithms properly work with constants. + * + * Example: SELECT key FROM ( SELECT key FROM t1 ) AS t1 JOIN ( SELECT key FROM t1 ) AS t2 ON t1.key = t2.key WHERE key; + */ + const auto * stream_filter_node = updated_filter->getOutputs()[0]; + if (stream_header.has(stream_filter_node->result_name)) + { + const auto & alias_node = updated_filter->addAlias(*stream_filter_node, "__filter" + stream_filter_node->result_name); + updated_filter->getOutputs()[0] = &alias_node; + } + + std::unordered_map> updated_filter_inputs; + + for (const auto & input : updated_filter->getInputs()) + updated_filter_inputs[input->result_name].push_back(input); + + for (const auto & input : filter->getInputs()) + { + if (updated_filter_inputs.contains(input->result_name)) + continue; + + const Node * updated_filter_input_node = nullptr; + + auto it = columns_to_replace.find(input->result_name); + if (it != columns_to_replace.end()) + updated_filter_input_node = &updated_filter->addInput(it->second); + else + updated_filter_input_node = &updated_filter->addInput({input->column, input->result_type, input->result_name}); + + updated_filter_inputs[input->result_name].push_back(updated_filter_input_node); + } + + for (const auto & input_column : stream_header.getColumnsWithTypeAndName()) + { + const Node * input; + auto & list = updated_filter_inputs[input_column.name]; + if (list.empty()) + { + input = &updated_filter->addInput(input_column); + } + else + { + input = list.front(); + list.pop_front(); + } + + if (input != updated_filter->getOutputs()[0]) + updated_filter->outputs.push_back(input); + } + + return updated_filter; + }; + + if (left_stream_filter_to_push_down) + left_stream_filter_to_push_down = replace_equivalent_columns_in_filter(left_stream_filter_to_push_down, + left_stream_header, + equivalent_right_stream_column_to_left_stream_column); + + if (right_stream_filter_to_push_down) + right_stream_filter_to_push_down = replace_equivalent_columns_in_filter(right_stream_filter_to_push_down, + right_stream_header, + equivalent_left_stream_column_to_right_stream_column); + + /* + * We should check the presence of a split filter column name in stream columns to avoid removing the required column. + * + * Example: + * A filter expression is `a AND b = c`, but `b` and `c` belong to another side of the join and not in allowed columns to push down, + * so the final split filter is just `a`. + * In this case `a` can be in stream columns but not `and(a, equals(b, c))`. + */ + + bool left_stream_filter_removes_filter = true; + bool right_stream_filter_removes_filter = true; + + if (left_stream_filter_to_push_down) + { + const auto & left_stream_filter_column_name = left_stream_filter_to_push_down->getOutputs()[0]->result_name; + left_stream_filter_removes_filter = !left_stream_header.has(left_stream_filter_column_name); + } + + if (right_stream_filter_to_push_down) + { + const auto & right_stream_filter_column_name = right_stream_filter_to_push_down->getOutputs()[0]->result_name; + right_stream_filter_removes_filter = !right_stream_header.has(right_stream_filter_column_name); + } + + ActionsDAG::ActionsForJOINFilterPushDown result + { + .left_stream_filter_to_push_down = std::move(left_stream_filter_to_push_down), + .left_stream_filter_removes_filter = left_stream_filter_removes_filter, + .right_stream_filter_to_push_down = std::move(right_stream_filter_to_push_down), + .right_stream_filter_removes_filter = right_stream_filter_removes_filter + }; + + if (!result.left_stream_filter_to_push_down && !result.right_stream_filter_to_push_down) + return result; + + /// Now, when actions are created, update the current DAG. + removeUnusedConjunctions(std::move(rejected_conjunctions), predicate, removes_filter); + + return result; +} + +void ActionsDAG::removeUnusedConjunctions(NodeRawConstPtrs rejected_conjunctions, Node * predicate, bool removes_filter) +{ + if (rejected_conjunctions.empty()) { /// The whole predicate was split. - if (can_remove_filter) + if (removes_filter) { /// If filter column is not needed, remove it from output nodes. std::erase_if(outputs, [&](const Node * node) { return node == predicate; }); @@ -2362,7 +2571,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( { /// Predicate is conjunction, where both allowed and rejected sets are not empty. - NodeRawConstPtrs new_children = std::move(conjunction.rejected); + NodeRawConstPtrs new_children = std::move(rejected_conjunctions); if (new_children.size() == 1 && new_children.front()->result_type->equals(*predicate->result_type)) { @@ -2403,13 +2612,12 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( std::unordered_set used_inputs; for (const auto * input : inputs) { - if (can_remove_filter && input == predicate) + if (removes_filter && input == predicate) continue; used_inputs.insert(input); } removeUnusedActions(used_inputs); - return actions; } static bool isColumnSortingPreserved(const ActionsDAG::Node * start_node, const String & sorted_column) @@ -2557,8 +2765,11 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( auto input_node_it = node_name_to_input_node_column.find(node->result_name); if (input_node_it != node_name_to_input_node_column.end()) { - result_node = &result_dag->addInput(input_node_it->second); - node_to_result_node.emplace(node, result_node); + auto & result_input = result_inputs[input_node_it->second.name]; + if (!result_input) + result_input = &result_dag->addInput(input_node_it->second); + + node_to_result_node.emplace(node, result_input); nodes_to_process.pop_back(); continue; } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 469fe9ea7f1..a8a377866d3 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -372,12 +372,46 @@ public: /// columns will be transformed like `x, y, z` -> `z > 0, z, x, y` -(remove filter)-> `z, x, y`. /// To avoid it, add inputs from `all_inputs` list, /// so actions `x, y, z -> z > 0, x, y, z` -(remove filter)-> `x, y, z` will not change columns order. - ActionsDAGPtr cloneActionsForFilterPushDown( + ActionsDAGPtr splitActionsForFilterPushDown( const std::string & filter_name, - bool can_remove_filter, + bool removes_filter, const Names & available_inputs, const ColumnsWithTypeAndName & all_inputs); + struct ActionsForJOINFilterPushDown + { + ActionsDAGPtr left_stream_filter_to_push_down; + bool left_stream_filter_removes_filter; + ActionsDAGPtr right_stream_filter_to_push_down; + bool right_stream_filter_removes_filter; + }; + + /** Split actions for JOIN filter push down. + * + * @param filter_name - name of filter node in current DAG. + * @param removes_filter - if filter is removed after it is applied. + * @param left_stream_available_columns_to_push_down - columns from left stream that are safe to use in push down conditions + * to left stream. + * @param left_stream_header - left stream header. + * @param right_stream_available_columns_to_push_down - columns from right stream that are safe to use in push down conditions + * to right stream. + * @param right_stream_header - right stream header. + * @param equivalent_columns_to_push_down - columns from left and right streams that are safe to use in push down conditions + * to left and right streams. + * @param equivalent_left_stream_column_to_right_stream_column - equivalent left stream column name to right stream column map. + * @param equivalent_right_stream_column_to_left_stream_column - equivalent right stream column name to left stream column map. + */ + ActionsForJOINFilterPushDown splitActionsForJOINFilterPushDown( + const std::string & filter_name, + bool removes_filter, + const Names & left_stream_available_columns_to_push_down, + const Block & left_stream_header, + const Names & right_stream_available_columns_to_push_down, + const Block & right_stream_header, + const Names & equivalent_columns_to_push_down, + const std::unordered_map & equivalent_left_stream_column_to_right_stream_column, + const std::unordered_map & equivalent_right_stream_column_to_left_stream_column); + bool isSortingPreserved(const Block & input_header, const SortDescription & sort_description, const String & ignore_output_column = "") const; @@ -429,7 +463,9 @@ private: void compileFunctions(size_t min_count_to_compile_expression, const std::unordered_set & lazy_executed_nodes = {}); #endif - static ActionsDAGPtr cloneActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs); + static ActionsDAGPtr createActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs); + + void removeUnusedConjunctions(NodeRawConstPtrs rejected_conjunctions, Node * predicate, bool removes_filter); }; class FindOriginalNodeForOutputName diff --git a/src/Interpreters/BackupLog.cpp b/src/Interpreters/BackupLog.cpp index af6c7cf6234..a22c6e322bf 100644 --- a/src/Interpreters/BackupLog.cpp +++ b/src/Interpreters/BackupLog.cpp @@ -24,6 +24,7 @@ ColumnsDescription BackupLogElement::getColumnsDescription() { {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, {"event_date", std::make_shared(), "Date of the entry."}, + {"event_time", std::make_shared(), "Time of the entry."}, {"event_time_microseconds", std::make_shared(6), "Time of the entry with microseconds precision."}, {"id", std::make_shared(), "Identifier of the backup or restore operation."}, {"name", std::make_shared(), "Name of the backup storage (the contents of the FROM or TO clause)."}, @@ -48,6 +49,7 @@ void BackupLogElement::appendToBlock(MutableColumns & columns) const size_t i = 0; columns[i++]->insert(getFQDNOrHostName()); columns[i++]->insert(DateLUT::instance().toDayNum(std::chrono::system_clock::to_time_t(event_time)).toUnderType()); + columns[i++]->insert(std::chrono::system_clock::to_time_t(event_time)); columns[i++]->insert(event_time_usec); columns[i++]->insert(info.id); columns[i++]->insert(info.name); diff --git a/src/Parsers/ASTOrderByElement.cpp b/src/Parsers/ASTOrderByElement.cpp index 09193a8b5e1..be0416359a1 100644 --- a/src/Parsers/ASTOrderByElement.cpp +++ b/src/Parsers/ASTOrderByElement.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index 2277e348b0f..49964b5c728 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -601,8 +601,6 @@ public: constexpr const char * getName() const override { return s.data(); } - Highlight highlight() const override { return Highlight::keyword; } - protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index dce0bc62d5b..2c8ab65d1fc 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -278,7 +278,7 @@ bool ParserTableAsStringLiteralIdentifier::parseImpl(Pos & pos, ASTPtr & node, E bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr id_list; - if (!ParserList(std::make_unique(allow_query_parameter, highlight_type), std::make_unique(TokenType::Dot), false) + if (!ParserList(std::make_unique(allow_query_parameter), std::make_unique(TokenType::Dot), false) .parse(pos, id_list, expected)) return false; @@ -1491,7 +1491,7 @@ const char * ParserAlias::restricted_keywords[] = bool ParserAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_as(Keyword::AS); - ParserIdentifier id_p(false, Highlight::alias); + ParserIdentifier id_p; bool has_as_word = s_as.ignore(pos, expected); if (!allow_alias_without_as_keyword && !has_as_word) diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 6dbb75450ed..b29f5cc4251 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -25,15 +25,12 @@ protected: class ParserIdentifier : public IParserBase { public: - explicit ParserIdentifier(bool allow_query_parameter_ = false, Highlight highlight_type_ = Highlight::identifier) - : allow_query_parameter(allow_query_parameter_), highlight_type(highlight_type_) {} - Highlight highlight() const override { return highlight_type; } + explicit ParserIdentifier(bool allow_query_parameter_ = false) : allow_query_parameter(allow_query_parameter_) {} protected: const char * getName() const override { return "identifier"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; bool allow_query_parameter; - Highlight highlight_type; }; @@ -56,8 +53,8 @@ protected: class ParserCompoundIdentifier : public IParserBase { public: - explicit ParserCompoundIdentifier(bool table_name_with_optional_uuid_ = false, bool allow_query_parameter_ = false, Highlight highlight_type_ = Highlight::identifier) - : table_name_with_optional_uuid(table_name_with_optional_uuid_), allow_query_parameter(allow_query_parameter_), highlight_type(highlight_type_) + explicit ParserCompoundIdentifier(bool table_name_with_optional_uuid_ = false, bool allow_query_parameter_ = false) + : table_name_with_optional_uuid(table_name_with_optional_uuid_), allow_query_parameter(allow_query_parameter_) { } @@ -66,7 +63,6 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; bool table_name_with_optional_uuid; bool allow_query_parameter; - Highlight highlight_type; }; /** *, t.*, db.table.*, COLUMNS('') APPLY(...) or EXCEPT(...) or REPLACE(...) @@ -257,7 +253,6 @@ class ParserNumber : public IParserBase protected: const char * getName() const override { return "number"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - Highlight highlight() const override { return Highlight::number; } }; /** Unsigned integer, used in right hand side of tuple access operator (x.1). @@ -278,7 +273,6 @@ class ParserStringLiteral : public IParserBase protected: const char * getName() const override { return "string literal"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - Highlight highlight() const override { return Highlight::string; } }; @@ -391,7 +385,6 @@ class ParserSubstitution : public IParserBase protected: const char * getName() const override { return "substitution"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - Highlight highlight() const override { return Highlight::substitution; } }; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 276b4e82074..05691529f43 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -441,21 +441,6 @@ bool ParserKeyValuePairsList::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return parser.parse(pos, node, expected); } -namespace -{ - /// This wrapper is needed to highlight function names differently. - class ParserFunctionName : public IParserBase - { - protected: - const char * getName() const override { return "function name"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - ParserCompoundIdentifier parser(false, true, Highlight::function); - return parser.parse(pos, node, expected); - } - }; -} - enum class Action { @@ -824,7 +809,6 @@ struct ParserExpressionImpl static const Operator finish_between_operator; - ParserFunctionName function_name_parser; ParserCompoundIdentifier identifier_parser{false, true}; ParserNumber number_parser; ParserAsterisk asterisk_parser; @@ -2375,7 +2359,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr identifier; - if (ParserFunctionName().parse(pos, identifier, expected) + if (ParserCompoundIdentifier(false,true).parse(pos, identifier, expected) && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) { auto start = getFunctionLayer(identifier, is_table_function, allow_function_parameters); @@ -2513,7 +2497,7 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos { if (typeid_cast(layers.back().get()) || typeid_cast(layers.back().get())) { - if (function_name_parser.parse(pos, tmp, expected) + if (identifier_parser.parse(pos, tmp, expected) && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) { layers.push_back(getFunctionLayer(tmp, layers.front()->is_table_function)); @@ -2645,53 +2629,50 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos { layers.back()->pushOperand(std::move(tmp)); } - else + else if (identifier_parser.parse(pos, tmp, expected)) { - old_pos = pos; - if (function_name_parser.parse(pos, tmp, expected) && pos->type == TokenType::OpeningRoundBracket) + if (pos->type == TokenType::OpeningRoundBracket) { ++pos; layers.push_back(getFunctionLayer(tmp, layers.front()->is_table_function)); return Action::OPERAND; } - pos = old_pos; - - if (identifier_parser.parse(pos, tmp, expected)) - { - layers.back()->pushOperand(std::move(tmp)); - } - else if (substitution_parser.parse(pos, tmp, expected)) - { - layers.back()->pushOperand(std::move(tmp)); - } - else if (pos->type == TokenType::OpeningRoundBracket) - { - - if (subquery_parser.parse(pos, tmp, expected)) - { - layers.back()->pushOperand(std::move(tmp)); - return Action::OPERATOR; - } - - ++pos; - layers.push_back(std::make_unique()); - return Action::OPERAND; - } - else if (pos->type == TokenType::OpeningSquareBracket) - { - ++pos; - layers.push_back(std::make_unique()); - return Action::OPERAND; - } - else if (mysql_global_variable_parser.parse(pos, tmp, expected)) - { - layers.back()->pushOperand(std::move(tmp)); - } else { - return Action::NONE; + layers.back()->pushOperand(std::move(tmp)); } } + else if (substitution_parser.parse(pos, tmp, expected)) + { + layers.back()->pushOperand(std::move(tmp)); + } + else if (pos->type == TokenType::OpeningRoundBracket) + { + + if (subquery_parser.parse(pos, tmp, expected)) + { + layers.back()->pushOperand(std::move(tmp)); + return Action::OPERATOR; + } + + ++pos; + layers.push_back(std::make_unique()); + return Action::OPERAND; + } + else if (pos->type == TokenType::OpeningSquareBracket) + { + ++pos; + layers.push_back(std::make_unique()); + return Action::OPERAND; + } + else if (mysql_global_variable_parser.parse(pos, tmp, expected)) + { + layers.back()->pushOperand(std::move(tmp)); + } + else + { + return Action::NONE; + } return Action::OPERATOR; } diff --git a/src/Parsers/IParser.cpp b/src/Parsers/IParser.cpp index eb4ddfa01d2..41981a4bb8a 100644 --- a/src/Parsers/IParser.cpp +++ b/src/Parsers/IParser.cpp @@ -9,7 +9,6 @@ namespace ErrorCodes extern const int TOO_SLOW_PARSING; } - IParser::Pos & IParser::Pos::operator=(const IParser::Pos & rhs) { depth = rhs.depth; @@ -33,26 +32,4 @@ IParser::Pos & IParser::Pos::operator=(const IParser::Pos & rhs) return *this; } - -template -static bool intersects(T a_begin, T a_end, T b_begin, T b_end) -{ - return (a_begin <= b_begin && b_begin < a_end) - || (b_begin <= a_begin && a_begin < b_end); -} - - -void Expected::highlight(HighlightedRange range) -{ - auto it = highlights.lower_bound(range); - while (it != highlights.end() && range.begin < it->end) - { - if (intersects(range.begin, range.end, it->begin, it->end)) - it = highlights.erase(it); - else - ++it; - } - highlights.insert(range); -} - } diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index f8146c0a4f6..291f8ee7d44 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include @@ -22,42 +21,14 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -enum class Highlight -{ - none = 0, - keyword, - identifier, - function, - alias, - substitution, - number, - string, -}; - -struct HighlightedRange -{ - const char * begin; - const char * end; - Highlight highlight; - - auto operator<=>(const HighlightedRange & other) const - { - return begin <=> other.begin; - } -}; - /** Collects variants, how parser could proceed further at rightmost position. - * Also collects a mapping of parsed ranges for highlighting, - * which is accumulated through the parsing. */ struct Expected { absl::InlinedVector variants; const char * max_parsed_pos = nullptr; - std::set highlights; - /// 'description' should be statically allocated string. ALWAYS_INLINE void add(const char * current_pos, const char * description) { @@ -77,8 +48,6 @@ struct Expected { add(it->begin, description); } - - void highlight(HighlightedRange range); }; @@ -189,14 +158,6 @@ public: return parse(pos, node, expected); } - /** If the parsed fragment should be highlighted in the query editor, - * which type of highlighting to use? - */ - virtual Highlight highlight() const - { - return Highlight::none; - } - virtual ~IParser() = default; }; diff --git a/src/Parsers/IParserBase.cpp b/src/Parsers/IParserBase.cpp index 9d39056a8f1..0241250926d 100644 --- a/src/Parsers/IParserBase.cpp +++ b/src/Parsers/IParserBase.cpp @@ -10,25 +10,8 @@ bool IParserBase::parse(Pos & pos, ASTPtr & node, Expected & expected) return wrapParseImpl(pos, IncreaseDepthTag{}, [&] { - const char * begin = pos->begin; bool res = parseImpl(pos, node, expected); - if (res) - { - Highlight type = highlight(); - if (pos->begin > begin && type != Highlight::none) - { - Pos prev_token = pos; - --prev_token; - - HighlightedRange range; - range.begin = begin; - range.end = prev_token->end; - range.highlight = type; - - expected.highlight(range); - } - } - else + if (!res) node = nullptr; return res; }); diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp index 0bbb181b39c..9373e6a1c93 100644 --- a/src/Parsers/ParserInsertQuery.cpp +++ b/src/Parsers/ParserInsertQuery.cpp @@ -40,6 +40,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_with(Keyword::WITH); ParserToken s_lparen(TokenType::OpeningRoundBracket); ParserToken s_rparen(TokenType::ClosingRoundBracket); + ParserToken s_semicolon(TokenType::Semicolon); ParserIdentifier name_p(true); ParserList columns_p(std::make_unique(), std::make_unique(TokenType::Comma), false); ParserFunction table_function_p{false}; @@ -146,9 +147,8 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { /// If VALUES is defined in query, everything except setting will be parsed as data, /// and if values followed by semicolon, the data should be null. - if (pos->type != TokenType::Semicolon) + if (!s_semicolon.checkWithoutMoving(pos, expected)) data = pos->begin; - format_str = "Values"; } else if (s_format.ignore(pos, expected)) diff --git a/src/Parsers/ParserSetQuery.cpp b/src/Parsers/ParserSetQuery.cpp index 13b881635cd..f08d2b978c6 100644 --- a/src/Parsers/ParserSetQuery.cpp +++ b/src/Parsers/ParserSetQuery.cpp @@ -210,12 +210,8 @@ bool ParserSetQuery::parseNameValuePair(SettingChange & change, IParser::Pos & p if (!s_eq.ignore(pos, expected)) return false; - if (ParserKeyword(Keyword::TRUE_KEYWORD).ignore(pos, expected)) - value = std::make_shared(Field(static_cast(1))); - else if (ParserKeyword(Keyword::FALSE_KEYWORD).ignore(pos, expected)) - value = std::make_shared(Field(static_cast(0))); /// for SETTINGS disk=disk(type='s3', path='', ...) - else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") + if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") { tryGetIdentifierNameInto(name, change.name); change.value = createFieldFromAST(function_ast); @@ -276,11 +272,7 @@ bool ParserSetQuery::parseNameValuePairWithParameterOrDefault( } /// Setting - if (ParserKeyword(Keyword::TRUE_KEYWORD).ignore(pos, expected)) - node = std::make_shared(Field(static_cast(1))); - else if (ParserKeyword(Keyword::FALSE_KEYWORD).ignore(pos, expected)) - node = std::make_shared(Field(static_cast(0))); - else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") + if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") { change.name = name; change.value = createFieldFromAST(function_ast); diff --git a/src/Parsers/parseDatabaseAndTableName.cpp b/src/Parsers/parseDatabaseAndTableName.cpp index eaf020e445b..81660bc4600 100644 --- a/src/Parsers/parseDatabaseAndTableName.cpp +++ b/src/Parsers/parseDatabaseAndTableName.cpp @@ -60,6 +60,21 @@ bool parseDatabaseAndTableAsAST(IParser::Pos & pos, Expected & expected, ASTPtr } +bool parseDatabase(IParser::Pos & pos, Expected & expected, String & database_str) +{ + ParserToken s_dot(TokenType::Dot); + ParserIdentifier identifier_parser; + + ASTPtr database; + database_str = ""; + + if (!identifier_parser.parse(pos, database, expected)) + return false; + + tryGetIdentifierNameInto(database, database_str); + return true; +} + bool parseDatabaseAsAST(IParser::Pos & pos, Expected & expected, ASTPtr & database) { ParserIdentifier identifier_parser(/* allow_query_parameter */true); diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index 2a6abc23406..51878efa706 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -226,32 +226,6 @@ std::string getUnmatchedParenthesesErrorMessage( } -static ASTInsertQuery * getInsertAST(const ASTPtr & ast) -{ - /// Either it is INSERT or EXPLAIN INSERT. - if (auto * explain = ast->as()) - { - if (auto explained_query = explain->getExplainedQuery()) - { - return explained_query->as(); - } - } - else - { - return ast->as(); - } - - return nullptr; -} - -const char * getInsertData(const ASTPtr & ast) -{ - if (const ASTInsertQuery * insert = getInsertAST(ast)) - return insert->data; - return nullptr; -} - - ASTPtr tryParseQuery( IParser & parser, const char * & _out_query_end, /* also query begin as input parameter */ @@ -296,11 +270,29 @@ ASTPtr tryParseQuery( if (res && max_parser_depth) res->checkDepth(max_parser_depth); - /// If parsed query ends at data for insertion. Data for insertion could be - /// in any format and not necessary be lexical correct, so we can't perform - /// most of the checks. - if (res && getInsertData(res)) + ASTInsertQuery * insert = nullptr; + if (parse_res) + { + if (auto * explain = res->as()) + { + if (auto explained_query = explain->getExplainedQuery()) + { + insert = explained_query->as(); + } + } + else + { + insert = res->as(); + } + } + + // If parsed query ends at data for insertion. Data for insertion could be + // in any format and not necessary be lexical correct, so we can't perform + // most of the checks. + if (insert && insert->data) + { return res; + } // More granular checks for queries other than INSERT w/inline data. /// Lexical error @@ -442,9 +434,11 @@ std::pair splitMultipartQuery( ast = parseQueryAndMovePosition(parser, pos, end, "", true, max_query_size, max_parser_depth, max_parser_backtracks); - if (ASTInsertQuery * insert = getInsertAST(ast)) + auto * insert = ast->as(); + + if (insert && insert->data) { - /// Data for INSERT is broken on the new line + /// Data for INSERT is broken on new line pos = insert->data; while (*pos && *pos != '\n') ++pos; diff --git a/src/Parsers/parseQuery.h b/src/Parsers/parseQuery.h index 564415d0b85..93c1a465267 100644 --- a/src/Parsers/parseQuery.h +++ b/src/Parsers/parseQuery.h @@ -71,9 +71,4 @@ std::pair splitMultipartQuery( size_t max_parser_backtracks, bool allow_settings_after_format_in_insert); -/** If the query contains raw data part, such as INSERT ... FORMAT ..., return a pointer to it. - * The SQL parser stops at the raw data part, which is parsed by a separate parser. - */ -const char * getInsertData(const ASTPtr & ast); - } diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 656b6cdaa6e..02c1c56fae2 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -39,6 +39,7 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; + extern const int INCORRECT_QUERY; } namespace @@ -500,7 +501,41 @@ public: ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node); private: - using NodeNameAndNodeMinLevel = std::pair; + + class Levels + { + public: + explicit Levels(size_t level) { set(level); } + + void set(size_t level) + { + check(level); + if (level) + mask |= (uint64_t(1) << (level - 1)); + } + + void reset(size_t level) + { + check(level); + if (level) + mask &= ~(uint64_t(1) << (level - 1)); + } + + void add(Levels levels) { mask |= levels.mask; } + + size_t max() const { return 64 - getLeadingZeroBits(mask); } + + private: + uint64_t mask = 0; + + void check(size_t level) + { + if (level > 64) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Maximum lambda depth exceeded. Maximum 64."); + } + }; + + using NodeNameAndNodeMinLevel = std::pair; NodeNameAndNodeMinLevel visitImpl(QueryTreeNodePtr node); @@ -586,11 +621,11 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi column_source->getNodeType() == QueryTreeNodeType::LAMBDA && actions_stack[i].getScopeNode().get() == column_source.get()) { - return {column_node_name, i}; + return {column_node_name, Levels(i)}; } } - return {column_node_name, 0}; + return {column_node_name, Levels(0)}; } PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitConstant(const QueryTreeNodePtr & node) @@ -660,7 +695,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi actions_stack_node.addInputConstantColumnIfNecessary(constant_node_name, column); } - return {constant_node_name, 0}; + return {constant_node_name, Levels(0)}; } @@ -688,7 +723,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi auto lambda_actions_dag = std::make_shared(); actions_stack.emplace_back(lambda_actions_dag, node); - auto [lambda_expression_node_name, level] = visitImpl(lambda_node.getExpression()); + auto [lambda_expression_node_name, levels] = visitImpl(lambda_node.getExpression()); lambda_actions_dag->getOutputs().push_back(actions_stack.back().getNodeOrThrow(lambda_expression_node_name)); lambda_actions_dag->removeUnusedActions(Names(1, lambda_expression_node_name)); @@ -699,8 +734,9 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi ActionsDAG::NodeRawConstPtrs lambda_children; Names required_column_names = lambda_actions->getRequiredColumns(); - if (level == actions_stack.size() - 1) - --level; + actions_stack.pop_back(); + levels.reset(actions_stack.size()); + size_t level = levels.max(); const auto & lambda_argument_names = lambda_node.getArgumentNames(); @@ -718,7 +754,6 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi auto lambda_node_name = calculateActionNodeName(node, *planner_context); auto function_capture = std::make_shared( lambda_actions, captured_column_names, lambda_arguments_names_and_types, lambda_node.getExpression()->getResultType(), lambda_expression_node_name); - actions_stack.pop_back(); // TODO: Pass IFunctionBase here not FunctionCaptureOverloadResolver. const auto * actions_node = actions_stack[level].addFunctionIfNecessary(lambda_node_name, std::move(lambda_children), function_capture); @@ -735,7 +770,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi actions_stack_node.addInputColumnIfNecessary(lambda_node_name, result_type); } - return {lambda_node_name, level}; + return {lambda_node_name, levels}; } PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::makeSetForInFunction(const QueryTreeNodePtr & node) @@ -799,7 +834,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma actions_stack_node.addInputConstantColumnIfNecessary(column.name, column); } - return {column.name, 0}; + return {column.name, Levels(0)}; } PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitIndexHintFunction(const QueryTreeNodePtr & node) @@ -833,7 +868,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi size_t index_hint_function_level = actions_stack.size() - 1; actions_stack[index_hint_function_level].addFunctionIfNecessary(function_node_name, {}, index_hint_function_overload_resolver); - return {function_node_name, index_hint_function_level}; + return {function_node_name, Levels(index_hint_function_level)}; } PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitFunction(const QueryTreeNodePtr & node) @@ -868,7 +903,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi actions_stack_node.addInputColumnIfNecessary(function_node_name, function_node.getResultType()); } - return {function_node_name, 0}; + return {function_node_name, Levels(0)}; } const auto & function_arguments = function_node.getArguments().getNodes(); @@ -877,14 +912,14 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi Names function_arguments_node_names; function_arguments_node_names.reserve(function_arguments_size); - size_t level = 0; + Levels levels(0); for (size_t function_argument_index = 0; function_argument_index < function_arguments_size; ++function_argument_index) { if (in_function_second_argument_node_name_with_level && function_argument_index == 1) { - auto & [node_name, node_min_level] = *in_function_second_argument_node_name_with_level; + auto & [node_name, node_levels] = *in_function_second_argument_node_name_with_level; function_arguments_node_names.push_back(std::move(node_name)); - level = std::max(level, node_min_level); + levels.add(node_levels); continue; } @@ -892,20 +927,21 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi if (argument->getNodeType() == QueryTreeNodeType::LAMBDA) { - auto [node_name, node_min_level] = visitLambda(argument); + auto [node_name, node_levels] = visitLambda(argument); function_arguments_node_names.push_back(std::move(node_name)); - level = std::max(level, node_min_level); + levels.add(node_levels); continue; } - auto [node_name, node_min_level] = visitImpl(argument); + auto [node_name, node_levels] = visitImpl(argument); function_arguments_node_names.push_back(std::move(node_name)); - level = std::max(level, node_min_level); + levels.add(node_levels); } ActionsDAG::NodeRawConstPtrs children; children.reserve(function_arguments_size); + size_t level = levels.max(); for (auto & function_argument_node_name : function_arguments_node_names) children.push_back(actions_stack[level].getNodeOrThrow(function_argument_node_name)); @@ -930,7 +966,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi actions_stack_node.addInputColumnIfNecessary(function_node_name, function_node.getResultType()); } - return {function_node_name, level}; + return {function_node_name, levels}; } } diff --git a/src/Processors/QueryPlan/FilterStep.h b/src/Processors/QueryPlan/FilterStep.h index e020cd3c4d3..939d0900c86 100644 --- a/src/Processors/QueryPlan/FilterStep.h +++ b/src/Processors/QueryPlan/FilterStep.h @@ -24,6 +24,7 @@ public: void describeActions(FormatSettings & settings) const override; const ActionsDAGPtr & getExpression() const { return actions_dag; } + ActionsDAGPtr & getExpression() { return actions_dag; } const String & getFilterColumnName() const { return filter_column_name; } bool removesFilterColumn() const { return remove_filter_column; } diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index e71bcc5602a..ebf780bb692 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -100,7 +100,7 @@ static NameSet findIdentifiersOfNode(const ActionsDAG::Node * node) return res; } -static ActionsDAGPtr splitFilter(QueryPlan::Node * parent_node, const Names & allowed_inputs, size_t child_idx = 0) +static ActionsDAGPtr splitFilter(QueryPlan::Node * parent_node, const Names & available_inputs, size_t child_idx = 0) { QueryPlan::Node * child_node = parent_node->children.front(); checkChildrenSize(child_node, child_idx + 1); @@ -114,14 +114,12 @@ static ActionsDAGPtr splitFilter(QueryPlan::Node * parent_node, const Names & al bool removes_filter = filter->removesFilterColumn(); const auto & all_inputs = child->getInputStreams()[child_idx].header.getColumnsWithTypeAndName(); - - auto split_filter = expression->cloneActionsForFilterPushDown(filter_column_name, removes_filter, allowed_inputs, all_inputs); - return split_filter; + return expression->splitActionsForFilterPushDown(filter_column_name, removes_filter, available_inputs, all_inputs); } static size_t -tryAddNewFilterStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, const ActionsDAGPtr & split_filter, - bool can_remove_filter = true, size_t child_idx = 0) +addNewFilterStepOrThrow(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, const ActionsDAGPtr & split_filter, + bool can_remove_filter = true, size_t child_idx = 0, bool update_parent_filter = true) { QueryPlan::Node * child_node = parent_node->children.front(); checkChildrenSize(child_node, child_idx + 1); @@ -134,21 +132,18 @@ tryAddNewFilterStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, con const auto & filter_column_name = filter->getFilterColumnName(); const auto * filter_node = expression->tryFindInOutputs(filter_column_name); - if (!filter_node && !filter->removesFilterColumn()) + if (update_parent_filter && !filter_node && !filter->removesFilterColumn()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}", filter_column_name, expression->dumpDAG()); - /// Filter column was replaced to constant. - const bool filter_is_constant = filter_node && filter_node->column && isColumnConst(*filter_node->column); - - /// Add new Filter step before Aggregating. - /// Expression/Filter -> Aggregating -> Something + /// Add new Filter step before Child. + /// Expression/Filter -> Child -> Something auto & node = nodes.emplace_back(); node.children.emplace_back(&node); std::swap(node.children[0], child_node->children[child_idx]); - /// Expression/Filter -> Aggregating -> Filter -> Something + /// Expression/Filter -> Child -> Filter -> Something /// New filter column is the first one. String split_filter_column_name = split_filter->getOutputs().front()->result_name; @@ -171,12 +166,22 @@ tryAddNewFilterStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, con ErrorCodes::LOGICAL_ERROR, "We are trying to push down a filter through a step for which we cannot update input stream"); } - if (!filter_node || filter_is_constant) - /// This means that all predicates of filter were pushed down. - /// Replace current actions to expression, as we don't need to filter anything. - parent = std::make_unique(child->getOutputStream(), expression); - else - filter->updateInputStream(child->getOutputStream()); + if (update_parent_filter) + { + /// Filter column was replaced to constant. + const bool filter_is_constant = filter_node && filter_node->column && isColumnConst(*filter_node->column); + + if (!filter_node || filter_is_constant) + { + /// This means that all predicates of filter were pushed down. + /// Replace current actions to expression, as we don't need to filter anything. + parent = std::make_unique(child->getOutputStream(), expression); + } + else + { + filter->updateInputStream(child->getOutputStream()); + } + } return 3; } @@ -186,7 +191,7 @@ tryAddNewFilterStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, con bool can_remove_filter = true, size_t child_idx = 0) { if (auto split_filter = splitFilter(parent_node, allowed_inputs, child_idx)) - return tryAddNewFilterStep(parent_node, nodes, split_filter, can_remove_filter, child_idx); + return addNewFilterStepOrThrow(parent_node, nodes, split_filter, can_remove_filter, child_idx); return 0; } @@ -204,6 +209,204 @@ static size_t simplePushDownOverStep(QueryPlan::Node * parent_node, QueryPlan::N return 0; } +static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, QueryPlanStepPtr & child) +{ + auto & parent = parent_node->step; + auto * filter = assert_cast(parent.get()); + + auto * join = typeid_cast(child.get()); + auto * filled_join = typeid_cast(child.get()); + + if (!join && !filled_join) + return 0; + + /** For equivalent JOIN with condition `ON lhs.x_1 = rhs.y_1 AND lhs.x_2 = rhs.y_2 ...`, we can build equivalent sets of columns and this + * will allow to push conditions that only use columns from equivalent sets to both sides of JOIN, without considering JOIN type. + * + * For example: `FROM lhs INNER JOIN rhs ON lhs.id = rhs.id AND lhs.value = rhs.value` + * In this example columns `id` and `value` from both tables are equivalent. + * + * During filter push down for different JOIN types filter push down logic is different: + * + * 1. For INNER JOIN we can push all valid conditions to both sides of JOIN. We also can push all valid conditions that use columns from + * equivalent sets to both sides of JOIN. + * 2. For LEFT/RIGHT JOIN we can push conditions that use columns from LEFT/RIGHT stream to LEFT/RIGHT JOIN side. We can also push conditions + * that use columns from LEFT/RIGHT equivalent sets to RIGHT/LEFT JOIN side. + * + * Additional filter push down optimizations: + * 1. TODO: Support building equivalent sets for more than 2 JOINS. It is possible, but will require more complex analysis step. + * 2. TODO: Support building equivalent sets for JOINs with more than 1 clause. + * 3. TODO: For LEFT/RIGHT JOIN, we can assume that RIGHT/LEFT columns used in filter will be default/NULL constants and + * check if filter will always be false, in those scenario we can transform LEFT/RIGHT JOIN into INNER JOIN and push conditions to both tables. + * 4. TODO: It is possible to pull up filter conditions from LEFT/RIGHT stream and push conditions that use columns from LEFT/RIGHT equivalent sets + * to RIGHT/LEFT JOIN side. + */ + + const auto & join_header = child->getOutputStream().header; + const auto & table_join = join ? join->getJoin()->getTableJoin() : filled_join->getJoin()->getTableJoin(); + const auto & left_stream_input_header = child->getInputStreams().front().header; + const auto & right_stream_input_header = child->getInputStreams().back().header; + + if (table_join.kind() == JoinKind::Full) + return 0; + + std::unordered_map equivalent_left_stream_column_to_right_stream_column; + std::unordered_map equivalent_right_stream_column_to_left_stream_column; + + bool has_single_clause = table_join.getClauses().size() == 1; + + if (has_single_clause) + { + const auto & join_clause = table_join.getClauses()[0]; + size_t key_names_size = join_clause.key_names_left.size(); + + for (size_t i = 0; i < key_names_size; ++i) + { + const auto & left_table_key_name = join_clause.key_names_left[i]; + const auto & right_table_key_name = join_clause.key_names_right[i]; + + if (!join_header.has(left_table_key_name) || !join_header.has(right_table_key_name)) + continue; + + const auto & left_table_column = left_stream_input_header.getByName(left_table_key_name); + const auto & right_table_column = right_stream_input_header.getByName(right_table_key_name); + + if (!left_table_column.type->equals(*right_table_column.type)) + continue; + + equivalent_left_stream_column_to_right_stream_column[left_table_key_name] = right_table_column; + equivalent_right_stream_column_to_left_stream_column[right_table_key_name] = left_table_column; + } + } + + auto get_available_columns_for_filter = [&](bool push_to_left_stream, bool filter_push_down_input_columns_available) + { + Names available_input_columns_for_filter; + + if (!filter_push_down_input_columns_available) + return available_input_columns_for_filter; + + const auto & input_header = push_to_left_stream ? left_stream_input_header : right_stream_input_header; + const auto & input_columns_names = input_header.getNames(); + + for (const auto & name : input_columns_names) + { + if (!join_header.has(name)) + continue; + + /// Skip if type is changed. Push down expression expect equal types. + if (!input_header.getByName(name).type->equals(*join_header.getByName(name).type)) + continue; + + available_input_columns_for_filter.push_back(name); + } + + return available_input_columns_for_filter; + }; + + bool left_stream_filter_push_down_input_columns_available = true; + bool right_stream_filter_push_down_input_columns_available = true; + + if (table_join.kind() == JoinKind::Left) + right_stream_filter_push_down_input_columns_available = false; + else if (table_join.kind() == JoinKind::Right) + left_stream_filter_push_down_input_columns_available = false; + + /** We disable push down to right table in cases: + * 1. Right side is already filled. Example: JOIN with Dictionary. + * 2. ASOF Right join is not supported. + */ + bool allow_push_down_to_right = join && join->allowPushDownToRight() && table_join.strictness() != JoinStrictness::Asof; + if (!allow_push_down_to_right) + right_stream_filter_push_down_input_columns_available = false; + + Names equivalent_columns_to_push_down; + + if (left_stream_filter_push_down_input_columns_available) + { + for (const auto & [name, _] : equivalent_left_stream_column_to_right_stream_column) + equivalent_columns_to_push_down.push_back(name); + } + + if (right_stream_filter_push_down_input_columns_available) + { + for (const auto & [name, _] : equivalent_right_stream_column_to_left_stream_column) + equivalent_columns_to_push_down.push_back(name); + } + + Names left_stream_available_columns_to_push_down = get_available_columns_for_filter(true /*push_to_left_stream*/, left_stream_filter_push_down_input_columns_available); + Names right_stream_available_columns_to_push_down = get_available_columns_for_filter(false /*push_to_left_stream*/, right_stream_filter_push_down_input_columns_available); + + auto join_filter_push_down_actions = filter->getExpression()->splitActionsForJOINFilterPushDown(filter->getFilterColumnName(), + filter->removesFilterColumn(), + left_stream_available_columns_to_push_down, + left_stream_input_header.getColumnsWithTypeAndName(), + right_stream_available_columns_to_push_down, + right_stream_input_header.getColumnsWithTypeAndName(), + equivalent_columns_to_push_down, + equivalent_left_stream_column_to_right_stream_column, + equivalent_right_stream_column_to_left_stream_column); + + size_t updated_steps = 0; + + if (join_filter_push_down_actions.left_stream_filter_to_push_down) + { + updated_steps += addNewFilterStepOrThrow(parent_node, + nodes, + join_filter_push_down_actions.left_stream_filter_to_push_down, + join_filter_push_down_actions.left_stream_filter_removes_filter, + 0 /*child_idx*/, + false /*update_parent_filter*/); + LOG_DEBUG(&Poco::Logger::get("QueryPlanOptimizations"), + "Pushed down filter {} to the {} side of join", + join_filter_push_down_actions.left_stream_filter_to_push_down->getOutputs()[0]->result_name, + JoinKind::Left); + } + + if (join_filter_push_down_actions.right_stream_filter_to_push_down) + { + updated_steps += addNewFilterStepOrThrow(parent_node, + nodes, + join_filter_push_down_actions.right_stream_filter_to_push_down, + join_filter_push_down_actions.right_stream_filter_removes_filter, + 1 /*child_idx*/, + false /*update_parent_filter*/); + LOG_DEBUG(&Poco::Logger::get("QueryPlanOptimizations"), + "Pushed down filter {} to the {} side of join", + join_filter_push_down_actions.right_stream_filter_to_push_down->getOutputs()[0]->result_name, + JoinKind::Right); + } + + if (updated_steps > 0) + { + const auto & filter_column_name = filter->getFilterColumnName(); + const auto & filter_expression = filter->getExpression(); + + const auto * filter_node = filter_expression->tryFindInOutputs(filter_column_name); + if (!filter_node && !filter->removesFilterColumn()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}", + filter_column_name, filter_expression->dumpDAG()); + + + /// Filter column was replaced to constant. + const bool filter_is_constant = filter_node && filter_node->column && isColumnConst(*filter_node->column); + + if (!filter_node || filter_is_constant) + { + /// This means that all predicates of filter were pushed down. + /// Replace current actions to expression, as we don't need to filter anything. + parent = std::make_unique(child->getOutputStream(), filter_expression); + } + else + { + filter->updateInputStream(child->getOutputStream()); + } + } + + return updated_steps; +} + size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) { if (parent_node->children.size() != 1) @@ -317,9 +520,6 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (!keys.contains(column.name)) allowed_inputs.push_back(column.name); - // for (const auto & name : allowed_inputs) - // std::cerr << name << std::endl; - if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs)) return updated_steps; } @@ -327,77 +527,8 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (auto updated_steps = simplePushDownOverStep(parent_node, nodes, child)) return updated_steps; - auto * join = typeid_cast(child.get()); - auto * filled_join = typeid_cast(child.get()); - - if (join || filled_join) - { - auto join_push_down = [&](JoinKind kind) -> size_t - { - const auto & table_join = join ? join->getJoin()->getTableJoin() : filled_join->getJoin()->getTableJoin(); - - /// Only inner, cross and left(/right) join are supported. Other types may generate default values for left table keys. - /// So, if we push down a condition like `key != 0`, not all rows may be filtered. - if (table_join.kind() != JoinKind::Inner && table_join.kind() != JoinKind::Cross && table_join.kind() != kind) - return 0; - - /// There is no ASOF Right join, so we're talking about pushing to the right side - if (kind == JoinKind::Right && table_join.strictness() == JoinStrictness::Asof) - return 0; - - bool is_left = kind == JoinKind::Left; - const auto & input_header = is_left ? child->getInputStreams().front().header : child->getInputStreams().back().header; - const auto & res_header = child->getOutputStream().header; - Names allowed_keys; - const auto & source_columns = input_header.getNames(); - for (const auto & name : source_columns) - { - /// Skip key if it is renamed. - /// I don't know if it is possible. Just in case. - if (!input_header.has(name) || !res_header.has(name)) - continue; - - /// Skip if type is changed. Push down expression expect equal types. - if (!input_header.getByName(name).type->equals(*res_header.getByName(name).type)) - continue; - - allowed_keys.push_back(name); - } - - /// For left JOIN, push down to the first child; for right - to the second one. - const auto child_idx = is_left ? 0 : 1; - ActionsDAGPtr split_filter = splitFilter(parent_node, allowed_keys, child_idx); - if (!split_filter) - return 0; - /* - * We should check the presence of a split filter column name in `source_columns` to avoid removing the required column. - * - * Example: - * A filter expression is `a AND b = c`, but `b` and `c` belong to another side of the join and not in `allowed_keys`, so the final split filter is just `a`. - * In this case `a` can be in `source_columns` but not `and(a, equals(b, c))`. - * - * New filter column is the first one. - */ - const String & split_filter_column_name = split_filter->getOutputs().front()->result_name; - bool can_remove_filter = source_columns.end() == std::find(source_columns.begin(), source_columns.end(), split_filter_column_name); - const size_t updated_steps = tryAddNewFilterStep(parent_node, nodes, split_filter, can_remove_filter, child_idx); - if (updated_steps > 0) - { - LOG_DEBUG(getLogger("QueryPlanOptimizations"), "Pushed down filter {} to the {} side of join", split_filter_column_name, kind); - } - return updated_steps; - }; - - if (size_t updated_steps = join_push_down(JoinKind::Left)) - return updated_steps; - - /// For full sorting merge join we push down both to the left and right tables, because left and right streams are not independent. - if (join && join->allowPushDownToRight()) - { - if (size_t updated_steps = join_push_down(JoinKind::Right)) - return updated_steps; - } - } + if (auto updated_steps = tryPushDownOverJoinStep(parent_node, nodes, child)) + return updated_steps; /// TODO. /// We can filter earlier if expression does not depend on WITH FILL columns. diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 5d4c3ab078e..927001dd0f6 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7074,7 +7074,7 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData( return checkStructureAndGetMergeTreeData(*source_table, src_snapshot, my_snapshot); } -std::pair MergeTreeData::cloneAndLoadDataPart( +std::pair MergeTreeData::cloneAndLoadDataPartOnSameDisk( const MergeTreeData::DataPartPtr & src_part, const String & tmp_part_prefix, const MergeTreePartInfo & dst_part_info, @@ -7084,23 +7084,28 @@ std::pair MergeTreeData::cloneAn const WriteSettings & write_settings) { chassert(!isStaticStorage()); - bool on_same_disk = false; - for (const DiskPtr & disk : this->getStoragePolicy()->getDisks()) + + /// Check that the storage policy contains the disk where the src_part is located. + bool does_storage_policy_allow_same_disk = false; + for (const DiskPtr & disk : getStoragePolicy()->getDisks()) { if (disk->getName() == src_part->getDataPartStorage().getDiskName()) { - on_same_disk = true; + does_storage_policy_allow_same_disk = true; break; } } - + if (!does_storage_policy_allow_same_disk) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Could not clone and load part {} because disk does not belong to storage policy", + quoteString(src_part->getDataPartStorage().getFullPath())); String dst_part_name = src_part->getNewName(dst_part_info); String tmp_dst_part_name = tmp_part_prefix + dst_part_name; auto temporary_directory_lock = getTemporaryPartDirectoryHolder(tmp_dst_part_name); /// Why it is needed if we only hardlink files? - /// Answer: In issue #59377, add copy when attach from different disk. auto reservation = src_part->getDataPartStorage().reserve(src_part->getBytesOnDisk()); auto src_part_storage = src_part->getDataPartStoragePtr(); @@ -7108,30 +7113,16 @@ std::pair MergeTreeData::cloneAn MergeTreeData::MutableDataPartPtr src_flushed_tmp_part; String with_copy; - if (params.copy_instead_of_hardlink || !on_same_disk) + if (params.copy_instead_of_hardlink) with_copy = " (copying data)"; - - std::shared_ptr dst_part_storage{}; - if (on_same_disk && !params.copy_instead_of_hardlink) - { - dst_part_storage = src_part_storage->freeze( - relative_data_path, - tmp_dst_part_name, - read_settings, - write_settings, - /* save_metadata_callback= */ {}, - params); - } - else - { - auto reservation_on_dst = getStoragePolicy()->reserve(src_part->getBytesOnDisk()); - if (!reservation_on_dst) - throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space on disk."); - dst_part_storage = src_part_storage->clonePart( - this->getRelativeDataPath(), tmp_dst_part_name, reservation_on_dst->getDisk(), read_settings, write_settings, {}, {}); - } - + auto dst_part_storage = src_part_storage->freeze( + relative_data_path, + tmp_dst_part_name, + read_settings, + write_settings, + /* save_metadata_callback= */ {}, + params); if (params.metadata_version_to_write.has_value()) { @@ -7153,7 +7144,7 @@ std::pair MergeTreeData::cloneAn .withPartFormatFromDisk() .build(); - if (on_same_disk && !params.copy_instead_of_hardlink && params.hardlinked_files) + if (!params.copy_instead_of_hardlink && params.hardlinked_files) { params.hardlinked_files->source_part_name = src_part->name; params.hardlinked_files->source_table_shared_id = src_part->storage.getTableSharedID(); @@ -7197,7 +7188,6 @@ std::pair MergeTreeData::cloneAn return std::make_pair(dst_data_part, std::move(temporary_directory_lock)); } - String MergeTreeData::getFullPathOnDisk(const DiskPtr & disk) const { return disk->getPath() + relative_data_path; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index d21f87c337e..b1fbadc57f0 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -839,7 +839,7 @@ public: MergeTreeData & checkStructureAndGetMergeTreeData(const StoragePtr & source_table, const StorageMetadataPtr & src_snapshot, const StorageMetadataPtr & my_snapshot) const; MergeTreeData & checkStructureAndGetMergeTreeData(IStorage & source_table, const StorageMetadataPtr & src_snapshot, const StorageMetadataPtr & my_snapshot) const; - std::pair cloneAndLoadDataPart( + std::pair cloneAndLoadDataPartOnSameDisk( const MergeTreeData::DataPartPtr & src_part, const String & tmp_part_prefix, const MergeTreePartInfo & dst_part_info, diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index a971c4fda1c..5e388d6a8ac 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -2146,7 +2146,7 @@ bool MutateTask::prepare() scope_guard lock; { - std::tie(part, lock) = ctx->data->cloneAndLoadDataPart( + std::tie(part, lock) = ctx->data->cloneAndLoadDataPartOnSameDisk( ctx->source_part, prefix, ctx->future_part->part_info, ctx->metadata_snapshot, clone_params, ctx->context->getReadSettings(), ctx->context->getWriteSettings()); part->getDataPartStorage().beginTransaction(); ctx->temporary_directory_lock = std::move(lock); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index c8523e540dd..da94916d514 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -8,10 +8,8 @@ #include #include #include -#include "Storages/MutationCommands.h" #include #include - #include #include @@ -221,6 +219,43 @@ void ReplicatedMergeTreeQueue::createLogEntriesToFetchBrokenParts() broken_parts_to_enqueue_fetches_on_loading.clear(); } +void ReplicatedMergeTreeQueue::addDropReplaceIntent(const MergeTreePartInfo & intent) +{ + std::lock_guard lock{state_mutex}; + drop_replace_range_intents.push_back(intent); +} + +void ReplicatedMergeTreeQueue::removeDropReplaceIntent(const MergeTreePartInfo & intent) +{ + std::lock_guard lock{state_mutex}; + auto it = std::find(drop_replace_range_intents.begin(), drop_replace_range_intents.end(), intent); + chassert(it != drop_replace_range_intents.end()); + drop_replace_range_intents.erase(it); +} + +bool ReplicatedMergeTreeQueue::isIntersectingWithDropReplaceIntent( + const LogEntry & entry, const String & part_name, String & out_reason, std::unique_lock & /*state_mutex lock*/) const +{ + const auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); + for (const auto & intent : drop_replace_range_intents) + { + if (!intent.isDisjoint(part_info)) + { + constexpr auto fmt_string = "Not executing {} of type {} for part {} (actual part {})" + "because there is a drop or replace intent with part name {}."; + LOG_INFO( + LogToStr(out_reason, log), + fmt_string, + entry.znode_name, + entry.type, + entry.new_part_name, + part_name, + intent.getPartNameForLogs()); + return true; + } + } + return false; +} void ReplicatedMergeTreeQueue::insertUnlocked( const LogEntryPtr & entry, std::optional & min_unprocessed_insert_time_changed, @@ -1175,6 +1210,33 @@ void ReplicatedMergeTreeQueue::removePartProducingOpsInRange( entry->execution_complete.wait(lock, [&entry] { return !entry->currently_executing; }); } +void ReplicatedMergeTreeQueue::waitForCurrentlyExecutingOpsInRange(const MergeTreePartInfo & part_info) const +{ + Queue to_wait; + + std::unique_lock lock(state_mutex); + + for (const auto& entry : queue) + { + if (!entry->currently_executing) + continue; + + const auto virtual_part_names = entry->getVirtualPartNames(format_version); + for (const auto & virtual_part_name : virtual_part_names) + { + if (!part_info.isDisjoint(MergeTreePartInfo::fromPartName(virtual_part_name, format_version))) + { + to_wait.push_back(entry); + break; + } + } + } + + LOG_DEBUG(log, "Waiting for {} entries that are currently executing.", to_wait.size()); + + for (LogEntryPtr & entry : to_wait) + entry->execution_complete.wait(lock, [&entry] { return !entry->currently_executing; }); +} bool ReplicatedMergeTreeQueue::isCoveredByFuturePartsImpl(const LogEntry & entry, const String & new_part_name, String & out_reason, std::unique_lock & /* queue_lock */, @@ -1303,6 +1365,9 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( /// We can wait in worker threads, but not in scheduler. if (isCoveredByFuturePartsImpl(entry, new_part_name, out_postpone_reason, state_lock, /* covered_entries_to_wait */ nullptr)) return false; + + if (isIntersectingWithDropReplaceIntent(entry, new_part_name, out_postpone_reason, state_lock)) + return false; } if (entry.type != LogEntry::DROP_RANGE && entry.type != LogEntry::DROP_PART) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 85f3aacc766..df4176f5e3d 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -107,6 +107,8 @@ private: */ ActiveDataPartSet virtual_parts; + /// Used to prevent operations to start in ranges which will be affected by DROP_RANGE/REPLACE_RANGE + std::vector drop_replace_range_intents; /// We do not add DROP_PARTs to virtual_parts because they can intersect, /// so we store them separately in this structure. @@ -251,6 +253,10 @@ private: std::optional min_unprocessed_insert_time_changed, std::optional max_processed_insert_time_changed) const; + bool isIntersectingWithDropReplaceIntent( + const LogEntry & entry, + const String & part_name, String & out_reason, std::unique_lock & /*state_mutex lock*/) const; + /// Marks the element of the queue as running. class CurrentlyExecuting { @@ -349,6 +355,9 @@ public: const MergeTreePartInfo & part_info, const std::optional & covering_entry); + /// Wait for the execution of currently executing actions with virtual parts intersecting with part_info + void waitForCurrentlyExecutingOpsInRange(const MergeTreePartInfo & part_info) const; + /** In the case where there are not enough parts to perform the merge in part_name * - move actions with merged parts to the end of the queue * (in order to download a already merged part from another replica). @@ -490,6 +499,12 @@ public: void setBrokenPartsToEnqueueFetchesOnLoading(Strings && parts_to_fetch); /// Must be called right after queue loading. void createLogEntriesToFetchBrokenParts(); + + /// Add an intent to block operations to start in the range. All intents must be removed by calling + /// removeDropReplaceIntent(). The same intent can be added multiple times, but it has to be removed exactly + /// the same amount of times. + void addDropReplaceIntent(const MergeTreePartInfo& intent); + void removeDropReplaceIntent(const MergeTreePartInfo& intent); }; using CommittingBlocks = std::unordered_map>; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 86af02be899..0235a74400c 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -2096,7 +2096,7 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level); IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()}; - auto [dst_part, part_lock] = cloneAndLoadDataPart( + auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( src_part, TMP_PREFIX, dst_part_info, @@ -2207,7 +2207,7 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const .copy_instead_of_hardlink = getSettings()->always_use_copy_instead_of_hardlinks, }; - auto [dst_part, part_lock] = dest_table_storage->cloneAndLoadDataPart( + auto [dst_part, part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk( src_part, TMP_PREFIX, dst_part_info, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index aa90fc43d52..fcb946c089c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2788,7 +2788,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry) auto obtain_part = [&] (PartDescriptionPtr & part_desc) { - /// Fetches with zero-copy-replication are cheap, but cloneAndLoadDataPart(OnSameDisk) will do full copy. + /// Fetches with zero-copy-replication are cheap, but cloneAndLoadDataPartOnSameDisk will do full copy. /// It's okay to check the setting for current table and disk for the source table, because src and dst part are on the same disk. bool prefer_fetch_from_other_replica = !part_desc->replica.empty() && storage_settings_ptr->allow_remote_fs_zero_copy_replication && part_desc->src_table_part && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport(); @@ -2807,7 +2807,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry) .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || ((our_zero_copy_enabled || source_zero_copy_enabled) && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport()), .metadata_version_to_write = metadata_snapshot->getMetadataVersion() }; - auto [res_part, temporary_part_lock] = cloneAndLoadDataPart( + auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk( part_desc->src_table_part, TMP_PREFIX + "clone_", part_desc->new_part_info, @@ -4888,7 +4888,7 @@ bool StorageReplicatedMergeTree::fetchPart( .keep_metadata_version = true, }; - auto [cloned_part, lock] = cloneAndLoadDataPart( + auto [cloned_part, lock] = cloneAndLoadDataPartOnSameDisk( part_to_clone, "tmp_clone_", part_info, @@ -8027,6 +8027,20 @@ void StorageReplicatedMergeTree::replacePartitionFrom( assert(replace == !LogEntry::ReplaceRangeEntry::isMovePartitionOrAttachFrom(drop_range)); + scope_guard intent_guard; + if (replace) + { + queue.addDropReplaceIntent(drop_range); + intent_guard = scope_guard{[this, my_drop_range = drop_range]() { queue.removeDropReplaceIntent(my_drop_range); }}; + + getContext()->getMergeList().cancelInPartition(getStorageID(), drop_range.partition_id, drop_range.max_block); + queue.waitForCurrentlyExecutingOpsInRange(drop_range); + { + auto pause_checking_parts = part_check_thread.pausePartsCheck(); + part_check_thread.cancelRemovedPartsCheck(drop_range); + } + } + String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range); std::set replaced_parts; @@ -8064,14 +8078,12 @@ void StorageReplicatedMergeTree::replacePartitionFrom( bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication || dynamic_cast(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; - IDataPartStorage::ClonePartParams clone_params { .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || (zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport()), .metadata_version_to_write = metadata_snapshot->getMetadataVersion() }; - - auto [dst_part, part_lock] = cloneAndLoadDataPart( + auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( src_part, TMP_PREFIX, dst_part_info, @@ -8079,10 +8091,9 @@ void StorageReplicatedMergeTree::replacePartitionFrom( clone_params, query_context->getReadSettings(), query_context->getWriteSettings()); - - dst_parts.emplace_back(std::move(dst_part)); - dst_parts_locks.emplace_back(std::move(part_lock)); src_parts.emplace_back(src_part); + dst_parts.emplace_back(dst_part); + dst_parts_locks.emplace_back(std::move(part_lock)); ephemeral_locks.emplace_back(std::move(*lock)); block_id_paths.emplace_back(block_id_path); part_checksums.emplace_back(hash_hex); @@ -8195,8 +8206,11 @@ void StorageReplicatedMergeTree::replacePartitionFrom( lock2.reset(); lock1.reset(); - /// We need to pull the DROP_RANGE before cleaning the replaced parts (otherwise CHeckThread may decide that parts are lost) + /// We need to pull the REPLACE_RANGE before cleaning the replaced parts (otherwise CHeckThread may decide that parts are lost) queue.pullLogsToQueue(getZooKeeperAndAssertNotReadonly(), {}, ReplicatedMergeTreeQueue::SYNC); + // No need to block operations further, especially that in case we have to wait for mutation to finish, the intent would block + // the execution of REPLACE_RANGE + intent_guard.reset(); parts_holder.clear(); cleanup_thread.wakeup(); @@ -8248,11 +8262,23 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta Coordination::Stat alter_partition_version_stat; zookeeper->get(alter_partition_version_path, &alter_partition_version_stat); - MergeTreePartInfo drop_range; std::optional delimiting_block_lock; + MergeTreePartInfo drop_range; getFakePartCoveringAllPartsInPartition(partition_id, drop_range, delimiting_block_lock, true); String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range); + queue.addDropReplaceIntent(drop_range); + // Let's copy drop_range to make sure it doesn't get modified, otherwise we might run into issue on removal + scope_guard intent_guard{[this, my_drop_range = drop_range]() { queue.removeDropReplaceIntent(my_drop_range); }}; + + getContext()->getMergeList().cancelInPartition(getStorageID(), drop_range.partition_id, drop_range.max_block); + + queue.waitForCurrentlyExecutingOpsInRange(drop_range); + { + auto pause_checking_parts = part_check_thread.pausePartsCheck(); + part_check_thread.cancelRemovedPartsCheck(drop_range); + } + DataPartPtr covering_part; DataPartsVector src_all_parts; { @@ -8320,7 +8346,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || (zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport()), .metadata_version_to_write = dest_metadata_snapshot->getMetadataVersion() }; - auto [dst_part, dst_part_lock] = dest_table_storage->cloneAndLoadDataPart( + auto [dst_part, dst_part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk( src_part, TMP_PREFIX, dst_part_info, @@ -8457,6 +8483,9 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta /// We need to pull the DROP_RANGE before cleaning the replaced parts (otherwise CHeckThread may decide that parts are lost) queue.pullLogsToQueue(getZooKeeperAndAssertNotReadonly(), {}, ReplicatedMergeTreeQueue::SYNC); + // No need to block operations further, especially that in case we have to wait for mutation to finish, the intent would block + // the execution of DROP_RANGE + intent_guard.reset(); parts_holder.clear(); cleanup_thread.wakeup(); diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 8434355ce46..f11d62e9136 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -2121,11 +2121,13 @@ def main() -> int: pr_info, dump_to_file=True, ) - update_mergeable_check( - commit, - pr_info, - job_report.check_name or _get_ext_check_name(args.job_name), - ) + if not pr_info.is_merge_queue(): + # in the merge queue mergeable status must be set only in FinishCheck (last job in wf) + update_mergeable_check( + commit, + pr_info, + job_report.check_name or _get_ext_check_name(args.job_name), + ) print(f"Job report url: [{check_url}]") prepared_events = prepare_tests_results_for_clickhouse( diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index eebc846f4b1..617f4c9d88c 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -28,21 +28,22 @@ def main(): statuses = get_commit_filtered_statuses(commit) trigger_mergeable_check(commit, statuses) - statuses = [s for s in statuses if s.context == CI_STATUS_NAME] - if not statuses: - return - # Take the latest status - status = statuses[-1] - if status.state == PENDING: - post_commit_status( - commit, - SUCCESS, - status.target_url, - "All checks finished", - CI_STATUS_NAME, - pr_info, - dump_to_file=True, - ) + if not pr_info.is_merge_queue(): + statuses = [s for s in statuses if s.context == CI_STATUS_NAME] + if not statuses: + return + # Take the latest status + status = statuses[-1] + if status.state == PENDING: + post_commit_status( + commit, + SUCCESS, + status.target_url, + "All checks finished", + CI_STATUS_NAME, + pr_info, + dump_to_file=True, + ) if __name__ == "__main__": diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 204284785c9..293004fc4f3 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -199,7 +199,6 @@ class PRInfo: EventType.MERGE_QUEUE in github_event ): # pull request and other similar events self.event_type = EventType.MERGE_QUEUE - # FIXME: need pr? we can parse it from ["head_ref": "refs/heads/gh-readonly-queue/test-merge-queue/pr-6751-4690229995a155e771c52e95fbd446d219c069bf"] self.number = 0 self.sha = github_event[EventType.MERGE_QUEUE]["head_sha"] self.base_ref = github_event[EventType.MERGE_QUEUE]["base_ref"] @@ -208,6 +207,8 @@ class PRInfo: self.base_name = github_event["repository"]["full_name"] # any_branch-name - the name of working branch name self.head_ref = github_event[EventType.MERGE_QUEUE]["head_ref"] + # parse underlying pr from ["head_ref": "refs/heads/gh-readonly-queue/test-merge-queue/pr-6751-4690229995a155e771c52e95fbd446d219c069bf"] + self.merged_pr = int(self.head_ref.split("/pr-")[-1].split("-")[0]) # UserName/ClickHouse or ClickHouse/ClickHouse self.head_name = self.base_name self.user_login = github_event["sender"]["login"] @@ -235,6 +236,8 @@ class PRInfo: if pull_request is None or pull_request["state"] == "closed": # it's merged PR to master self.number = 0 + if pull_request: + self.merged_pr = pull_request["number"] self.labels = set() self.pr_html_url = f"{repo_prefix}/commits/{ref}" self.base_ref = ref diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py new file mode 100644 index 00000000000..acff7ba541b --- /dev/null +++ b/tests/ci/sync_pr.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python + +"""Script for automatic sync PRs handling in private repos""" + +import argparse +import sys + +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from github_helper import GitHub +from commit_status_helper import get_commit, post_commit_status +from report import FAILURE, SUCCESS + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Script for handling sync PRs", + ) + parser.add_argument( + "--merge", + action="store_true", + help="merge sync pr", + ) + parser.add_argument( + "--status", + action="store_true", + help="check and set sync pr status", + ) + args = parser.parse_args() + return args + + +def merge_sync_pr(gh, sync_pr): + if not sync_pr: + print("Sync PR not found - exiting") + return + + if sync_pr.state == "closed": + print(f"Sync PR [{sync_pr.number}] already closed - exiting") + sys.exit(0) + + if sync_pr.state != "open": + print( + f"WARNING: Unknown Sync PR [{sync_pr.number}] state [{sync_pr.state}] - exiting" + ) + sys.exit(0) + + print(f"Trying to merge Sync PR [{sync_pr.number}]") + if sync_pr.draft: + gh.toggle_pr_draft(sync_pr) + sync_pr.merge() + + +def set_sync_status(gh, pr_info, sync_pr): + if not sync_pr or not sync_pr.mergeable: + post_commit_status( + get_commit(gh, pr_info.sha), FAILURE, "", "Sync PR failure", "A Sync" + ) + else: + post_commit_status(get_commit(gh, pr_info.sha), SUCCESS, "", "", "A Sync") + + +def main(): + args = parse_args() + + assert args.merge ^ args.status + + gh = GitHub(get_best_robot_token()) + + pr_info = PRInfo() + assert pr_info.merged_pr, "BUG. merged PR number could not been determined" + + prs = gh.get_pulls_from_search( + query=f"head:sync-upstream/pr/{pr_info.merged_pr} org:ClickHouse type:pr", + repo="ClickHouse/clickhouse-private", + ) + + sync_pr = None + + if len(prs) > 1: + print(f"WARNING: More than one PR found [{prs}] - exiting") + elif len(prs) == 0: + print("WARNING: No Sync PR found") + else: + sync_pr = prs[0] + + if args.merge: + merge_sync_pr(gh, sync_pr) + elif args.status: + set_sync_status(gh, pr_info, sync_pr) + + +if __name__ == "__main__": + main() diff --git a/tests/integration/test_attach_partition_using_copy/configs/remote_servers.xml b/tests/integration/test_attach_partition_using_copy/configs/remote_servers.xml deleted file mode 100644 index b40730e9f7d..00000000000 --- a/tests/integration/test_attach_partition_using_copy/configs/remote_servers.xml +++ /dev/null @@ -1,17 +0,0 @@ - - - - - true - - replica1 - 9000 - - - replica2 - 9000 - - - - - diff --git a/tests/integration/test_attach_partition_using_copy/test.py b/tests/integration/test_attach_partition_using_copy/test.py deleted file mode 100644 index df5378742ae..00000000000 --- a/tests/integration/test_attach_partition_using_copy/test.py +++ /dev/null @@ -1,187 +0,0 @@ -import pytest -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import assert_eq_with_retry - -cluster = ClickHouseCluster(__file__) - -replica1 = cluster.add_instance( - "replica1", with_zookeeper=True, main_configs=["configs/remote_servers.xml"] -) -replica2 = cluster.add_instance( - "replica2", with_zookeeper=True, main_configs=["configs/remote_servers.xml"] -) - - -@pytest.fixture(scope="module") -def start_cluster(): - try: - cluster.start() - yield cluster - except Exception as ex: - print(ex) - finally: - cluster.shutdown() - - -def cleanup(nodes): - for node in nodes: - node.query("DROP TABLE IF EXISTS source SYNC") - node.query("DROP TABLE IF EXISTS destination SYNC") - - -def create_source_table(node, table_name, replicated): - replica = node.name - engine = ( - f"ReplicatedMergeTree('/clickhouse/tables/1/{table_name}', '{replica}')" - if replicated - else "MergeTree()" - ) - node.query_with_retry( - """ - ATTACH TABLE {table_name} UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' - ( - price UInt32, - date Date, - postcode1 LowCardinality(String), - postcode2 LowCardinality(String), - type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), - is_new UInt8, - duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), - addr1 String, - addr2 String, - street LowCardinality(String), - locality LowCardinality(String), - town LowCardinality(String), - district LowCardinality(String), - county LowCardinality(String) - ) - ENGINE = {engine} - ORDER BY (postcode1, postcode2, addr1, addr2) - SETTINGS disk = disk(type = web, endpoint = 'https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/') - """.format( - table_name=table_name, engine=engine - ) - ) - - -def create_destination_table(node, table_name, replicated): - replica = node.name - engine = ( - f"ReplicatedMergeTree('/clickhouse/tables/1/{table_name}', '{replica}')" - if replicated - else "MergeTree()" - ) - node.query_with_retry( - """ - CREATE TABLE {table_name} - ( - price UInt32, - date Date, - postcode1 LowCardinality(String), - postcode2 LowCardinality(String), - type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), - is_new UInt8, - duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), - addr1 String, - addr2 String, - street LowCardinality(String), - locality LowCardinality(String), - town LowCardinality(String), - district LowCardinality(String), - county LowCardinality(String) - ) - ENGINE = {engine} - ORDER BY (postcode1, postcode2, addr1, addr2) - """.format( - table_name=table_name, engine=engine - ) - ) - - -def test_both_mergtree(start_cluster): - create_source_table(replica1, "source", False) - create_destination_table(replica1, "destination", False) - - replica1.query(f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source") - - assert_eq_with_retry( - replica1, - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC", - replica1.query( - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC" - ), - ) - - assert_eq_with_retry( - replica1, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" - ) - - cleanup([replica1]) - - -def test_all_replicated(start_cluster): - create_source_table(replica1, "source", True) - create_destination_table(replica1, "destination", True) - create_destination_table(replica2, "destination", True) - - replica1.query("SYSTEM SYNC REPLICA destination") - replica1.query(f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source") - - assert_eq_with_retry( - replica1, - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC", - replica1.query( - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC" - ), - ) - assert_eq_with_retry( - replica1, - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC", - replica2.query( - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC" - ), - ) - - assert_eq_with_retry( - replica1, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" - ) - - assert_eq_with_retry( - replica2, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" - ) - - cleanup([replica1, replica2]) - - -def test_only_destination_replicated(start_cluster): - create_source_table(replica1, "source", False) - create_destination_table(replica1, "destination", True) - create_destination_table(replica2, "destination", True) - - replica1.query("SYSTEM SYNC REPLICA destination") - replica1.query(f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source") - - assert_eq_with_retry( - replica1, - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC", - replica1.query( - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC" - ), - ) - assert_eq_with_retry( - replica1, - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC", - replica2.query( - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC" - ), - ) - - assert_eq_with_retry( - replica1, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" - ) - - assert_eq_with_retry( - replica2, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" - ) - - cleanup([replica1, replica2]) diff --git a/tests/integration/test_distributed_config/test.py b/tests/integration/test_distributed_config/test.py index bf4bb5a4335..e551e69b93f 100644 --- a/tests/integration/test_distributed_config/test.py +++ b/tests/integration/test_distributed_config/test.py @@ -31,7 +31,7 @@ def test_distibuted_settings(start_cluster): DETACH TABLE dist_1; """ ) - assert "flush_on_detach = 1" in node.query("SHOW CREATE dist_1") + assert "flush_on_detach = true" in node.query("SHOW CREATE dist_1") # flush_on_detach=true, so data_1 should have 1 row assert int(node.query("SELECT count() FROM data_1")) == 1 diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index 9584ace7f45..fdd81284b2a 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -5,7 +5,6 @@ import string import threading import time from multiprocessing.dummy import Pool -from helpers.test_tools import assert_eq_with_retry import pytest from helpers.client import QueryRuntimeException @@ -1746,9 +1745,9 @@ def test_move_while_merge(start_cluster): node1.query(f"DROP TABLE IF EXISTS {name} SYNC") -def test_move_across_policies_work_for_attach_not_work_for_move(start_cluster): +def test_move_across_policies_does_not_work(start_cluster): try: - name = "test_move_across_policies_work_for_attach_not_work_for_move" + name = "test_move_across_policies_does_not_work" node1.query( """ @@ -1784,18 +1783,25 @@ def test_move_across_policies_work_for_attach_not_work_for_move(start_cluster): except QueryRuntimeException: """All parts of partition 'all' are already on disk 'jbod2'.""" - node1.query( - """ALTER TABLE {name}2 ATTACH PARTITION tuple() FROM {name}""".format( - name=name - ) - ) - assert_eq_with_retry( - node1, - """SELECT * FROM {name}2""".format(name=name), + with pytest.raises( + QueryRuntimeException, + match=".*because disk does not belong to storage policy.*", + ): node1.query( - """SELECT * FROM {name}""".format(name=name), - ), - ) + """ALTER TABLE {name}2 ATTACH PARTITION tuple() FROM {name}""".format( + name=name + ) + ) + + with pytest.raises( + QueryRuntimeException, + match=".*because disk does not belong to storage policy.*", + ): + node1.query( + """ALTER TABLE {name}2 REPLACE PARTITION tuple() FROM {name}""".format( + name=name + ) + ) with pytest.raises( QueryRuntimeException, @@ -1807,6 +1813,10 @@ def test_move_across_policies_work_for_attach_not_work_for_move(start_cluster): ) ) + assert node1.query( + """SELECT * FROM {name}""".format(name=name) + ).splitlines() == ["1"] + finally: node1.query(f"DROP TABLE IF EXISTS {name} SYNC") node1.query(f"DROP TABLE IF EXISTS {name}2 SYNC") diff --git a/tests/performance/join_filter_pushdown_equivalent_sets.xml b/tests/performance/join_filter_pushdown_equivalent_sets.xml new file mode 100644 index 00000000000..caddcb295c9 --- /dev/null +++ b/tests/performance/join_filter_pushdown_equivalent_sets.xml @@ -0,0 +1,16 @@ + + CREATE TABLE test_table_1(id UInt64, value String) ENGINE=MergeTree ORDER BY id + CREATE TABLE test_table_2(id UInt64, value String) ENGINE=MergeTree ORDER BY id + + INSERT INTO test_table_1 SELECT number, number FROM numbers(5000000) + INSERT INTO test_table_2 SELECT number, number FROM numbers(5000000) + + SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id WHERE lhs.id = 5 FORMAT Null + SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id WHERE rhs.id = 5 FORMAT Null + SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id WHERE lhs.id = 5 AND rhs.id = 5 FORMAT Null + SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id WHERE lhs.id = 5 FORMAT Null + SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id WHERE rhs.id = 5 FORMAT Null + + DROP TABLE test_table_1 + DROP TABLE test_table_2 + diff --git a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect index ffd3e742cec..44f3ba9681a 100755 --- a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect +++ b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect @@ -20,7 +20,7 @@ expect_after { -i $any_spawn_id timeout { exit 1 } } -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file --highlight=0" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file" expect ":) " # Make a query diff --git a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect index 6253840c63c..ac69c18ce39 100755 --- a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect +++ b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect @@ -24,21 +24,30 @@ expect_after { -i $any_spawn_id timeout { exit 1 } } -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -mn --history_file=$history_file --highlight 0" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -mn --history_file=$history_file" expect "\n:) " -send -- "DROP TABLE IF EXISTS t01565;\r" +send -- "DROP TABLE IF EXISTS t01565;\n" +# NOTE: this is important for -mn mode, you should send "\r" only after reading echoed command +expect "\r\n" +send -- "\r" expect "\nOk." expect "\n:)" -send -- "CREATE TABLE t01565 (c0 String, c1 Int32) ENGINE = Memory() ;\r" +send -- "CREATE TABLE t01565 (c0 String, c1 Int32) ENGINE = Memory() ;\n" +expect "\r\n" +send -- "\r" expect "\nOk." expect "\n:) " -send -- "INSERT INTO t01565(c0, c1) VALUES (\"1\",1) ;\r" +send -- "INSERT INTO t01565(c0, c1) VALUES (\"1\",1) ;\n" +expect "\r\n" +send -- "\r" expect "\n:) " -send -- "INSERT INTO t01565(c0, c1) VALUES ('1', 1) ;\r" +send -- "INSERT INTO t01565(c0, c1) VALUES ('1', 1) ;\n" +expect "\r\n" +send -- "\r" expect "\nOk." expect "\n:) " diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index 436d06c5076..1b9755a74d5 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -180,12 +180,14 @@ Filter column: notEquals(__table1.number, 1_UInt8) > one condition of filter is pushed down before INNER JOIN Join Join -Filter column: notEquals(number, 1) +Filter column: and(notEquals(number, 1), notEquals(number, 2)) Join +Filter column: and(notEquals(b, 2), notEquals(b, 1)) > (analyzer) one condition of filter is pushed down before INNER JOIN Join Join -Filter column: notEquals(__table1.number, 1_UInt8) +Filter column: and(notEquals(__table1.number, 1_UInt8), notEquals(__table1.number, 2_UInt8)) +Filter column: and(notEquals(__table2.b, 2_UInt8), notEquals(__table2.b, 1_UInt8)) 3 3 > filter is pushed down before UNION Union diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index 5a517264243..864dd69412a 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -248,14 +248,14 @@ $CLICKHOUSE_CLIENT --allow_experimental_analyzer=0 -q " select number as a, r.b from numbers(4) as l any inner join ( select number + 2 as b from numbers(3) ) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0" | - grep -o "Join\|Filter column: notEquals(number, 1)" + grep -o "Join\|Filter column: and(notEquals(number, 1), notEquals(number, 2))\|Filter column: and(notEquals(b, 2), notEquals(b, 1))" echo "> (analyzer) one condition of filter is pushed down before INNER JOIN" $CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 -q " explain actions = 1 select number as a, r.b from numbers(4) as l any inner join ( select number + 2 as b from numbers(3) ) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0" | - grep -o "Join\|Filter column: notEquals(__table1.number, 1_UInt8)" + grep -o "Join\|Filter column: and(notEquals(__table1.number, 1_UInt8), notEquals(__table1.number, 2_UInt8))\|Filter column: and(notEquals(__table2.b, 2_UInt8), notEquals(__table2.b, 1_UInt8))" $CLICKHOUSE_CLIENT -q " select number as a, r.b from numbers(4) as l any inner join ( select number + 2 as b from numbers(3) diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh index f04ffdae229..ebd6490077e 100755 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh @@ -43,7 +43,7 @@ expect_after { -i \$any_spawn_id timeout { exit 1 } } -spawn bash -c "$* --highlight 0" +spawn bash -c "$*" expect ":) " # Make a query diff --git a/tests/queries/0_stateless/01702_system_query_log.reference b/tests/queries/0_stateless/01702_system_query_log.reference index 5498b5377ba..c653021aa5a 100644 --- a/tests/queries/0_stateless/01702_system_query_log.reference +++ b/tests/queries/0_stateless/01702_system_query_log.reference @@ -43,16 +43,16 @@ Alter ALTER TABLE sqllt.table UPDATE i = i + 1 WHERE 1; Alter ALTER TABLE sqllt.table DELETE WHERE i > 65535; Select -- not done, seems to hard, so I\'ve skipped queries of ALTER-X, where X is:\n-- PARTITION\n-- ORDER BY\n-- SAMPLE BY\n-- INDEX\n-- CONSTRAINT\n-- TTL\n-- USER\n-- QUOTA\n-- ROLE\n-- ROW POLICY\n-- SETTINGS PROFILE\n\nSELECT \'SYSTEM queries\'; System SYSTEM FLUSH LOGS; -System SYSTEM STOP MERGES sqllt.table; -System SYSTEM START MERGES sqllt.table; -System SYSTEM STOP TTL MERGES sqllt.table; -System SYSTEM START TTL MERGES sqllt.table; -System SYSTEM STOP MOVES sqllt.table; -System SYSTEM START MOVES sqllt.table; -System SYSTEM STOP FETCHES sqllt.table; -System SYSTEM START FETCHES sqllt.table; -System SYSTEM STOP REPLICATED SENDS sqllt.table; -System SYSTEM START REPLICATED SENDS sqllt.table; +System SYSTEM STOP MERGES sqllt.table +System SYSTEM START MERGES sqllt.table +System SYSTEM STOP TTL MERGES sqllt.table +System SYSTEM START TTL MERGES sqllt.table +System SYSTEM STOP MOVES sqllt.table +System SYSTEM START MOVES sqllt.table +System SYSTEM STOP FETCHES sqllt.table +System SYSTEM START FETCHES sqllt.table +System SYSTEM STOP REPLICATED SENDS sqllt.table +System SYSTEM START REPLICATED SENDS sqllt.table Select -- SYSTEM RELOAD DICTIONARY sqllt.dictionary; -- temporary out of order: Code: 210, Connection refused (localhost:9001) (version 21.3.1.1)\n-- DROP REPLICA\n-- haha, no\n-- SYSTEM KILL;\n-- SYSTEM SHUTDOWN;\n\n-- Since we don\'t really care about the actual output, suppress it with `FORMAT Null`.\nSELECT \'SHOW queries\'; Show SHOW CREATE TABLE sqllt.table FORMAT Null; Show SHOW CREATE DICTIONARY sqllt.dictionary FORMAT Null; diff --git a/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect b/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect index 30d725e6a2a..2d404b005c7 100755 --- a/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect +++ b/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect @@ -21,7 +21,7 @@ expect_after { -i $any_spawn_id timeout { exit 1 } } -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file --highlight=0" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file" expect ":) " # Make a query diff --git a/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql b/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql index 48e84246d1c..be4b64888ca 100644 --- a/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql +++ b/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql @@ -127,3 +127,14 @@ SELECT arrayMap(x -> splitByChar(toString(id), arrayMap(x -> toString(1), [NULL] DROP TABLE test_table; -- { echoOff } + +SELECT + groupArray(number) AS counts, + arraySum(arrayMap(x -> (x + 1), counts)) as hello, + arrayMap(x -> (x / hello), counts) AS res +FROM numbers(1000000) FORMAT Null; + +SELECT + arrayWithConstant(pow(10,6), 1) AS nums, + arrayMap(x -> x, nums) AS m, + arrayMap(x -> x + arraySum(m), m) AS res FORMAT Null; diff --git a/tests/queries/0_stateless/02494_query_cache_secrets.reference b/tests/queries/0_stateless/02494_query_cache_secrets.reference index 306374eed4b..82833f28369 100644 --- a/tests/queries/0_stateless/02494_query_cache_secrets.reference +++ b/tests/queries/0_stateless/02494_query_cache_secrets.reference @@ -1,2 +1,2 @@ A2193552DCF8A9F99AC35F86BC4D2FFD -SELECT hex(encrypt(\'aes-128-ecb\', \'[HIDDEN]\')) SETTINGS use_query_cache = 1 +SELECT hex(encrypt(\'aes-128-ecb\', \'[HIDDEN]\')) SETTINGS use_query_cache = true diff --git a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference index df8198bc856..866d6cb7ec3 100644 --- a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference +++ b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference @@ -7,4 +7,4 @@ 1 1 1 1 1 -1 1 +1 2 diff --git a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql index a299e50984f..ea52df5d4b4 100644 --- a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql +++ b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql @@ -1,3 +1,5 @@ +SET allow_experimental_analyzer = 1; + DROP TABLE IF EXISTS t1; CREATE TABLE t1 (key UInt8) ENGINE = Memory; diff --git a/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh b/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh index d3252b29eb7..fe26784dab4 100755 --- a/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh +++ b/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh @@ -5,33 +5,38 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Copy backups/with_broken_part.zip into the disk named "backups". -SRC_BACKUP_DIR=$CURDIR/backups -SRC_BACKUP_FILENAME=with_broken_part.zip +# Copies a test predefined backup from "/tests/queries/0_stateless/backups/" folder to the "backups" disk, +# returns the path to the backup relative to that disk. +function install_test_backup() +{ + local test_backup_filename="$1" + local test_backup_path="$CURDIR/backups/${test_backup_filename}" -BACKUPS_DISK=backups -BACKUPS_DIR=$($CLICKHOUSE_CLIENT --query "SELECT path FROM system.disks WHERE name='$BACKUPS_DISK'") + local backups_disk_root=$($CLICKHOUSE_CLIENT --query "SELECT path FROM system.disks WHERE name='backups'") -if [ -z "$BACKUPS_DIR" ]; then - echo Disk \'$BACKUPS_DISK\' not found - exit 1 -fi + if [ -z "${backups_disk_root}" ]; then + echo Disk \'${backups_disk_root}\' not found + exit 1 + fi -BACKUP_FILENAME=$CLICKHOUSE_DATABASE/${SRC_BACKUP_FILENAME} -BACKUP_NAME="Disk('$BACKUPS_DISK', '$BACKUP_FILENAME')" + local install_path=${backups_disk_root}/${CLICKHOUSE_DATABASE}/${test_backup_filename} + mkdir -p "$(dirname "${install_path}")" + ln -s "${test_backup_path}" "${install_path}" -mkdir -p "$(dirname "$BACKUPS_DIR/$BACKUP_FILENAME")" -ln -s "$SRC_BACKUP_DIR/$SRC_BACKUP_FILENAME" "$BACKUPS_DIR/$BACKUP_FILENAME" + echo "${CLICKHOUSE_DATABASE}/${test_backup_filename}" +} + +backup_name="$(install_test_backup with_broken_part.zip)" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS tbl" # First try to restore with the setting `restore_broken_parts_as_detached` set to false. -$CLICKHOUSE_CLIENT --query "RESTORE TABLE default.tbl AS tbl FROM $BACKUP_NAME" 2>&1 | tr -d \\n | grep "data.bin doesn't exist" | grep "while restoring part all_2_2_0" > /dev/null && echo "OK" || echo "FAILED" +$CLICKHOUSE_CLIENT --query "RESTORE TABLE default.tbl AS tbl FROM Disk('backups', '${backup_name}')" 2>&1 | tr -d \\n | grep "data.bin doesn't exist" | grep "while restoring part all_2_2_0" > /dev/null && echo "OK" || echo "FAILED" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS tbl" # Then try to restore with the setting `restore_broken_parts_as_detached` set to true. -$CLICKHOUSE_CLIENT --query "RESTORE TABLE default.tbl AS tbl FROM $BACKUP_NAME SETTINGS restore_broken_parts_as_detached = true" 2>/dev/null | awk -F '\t' '{print $2}' +$CLICKHOUSE_CLIENT --query "RESTORE TABLE default.tbl AS tbl FROM Disk('backups', '${backup_name}') SETTINGS restore_broken_parts_as_detached = true" 2>/dev/null | awk -F '\t' '{print $2}' $CLICKHOUSE_CLIENT --multiquery < t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND 2 2 2 2 3 3 3 33 \N \N \N \N +SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) ORDER BY t1.x; +2 2 2 2 +3 3 3 33 +\N \N \N \N +SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND (t1.x = t1.y OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x; +2 2 2 2 +3 3 3 33 +\N \N \N \N SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) AND t1.y <=> t2.y ORDER BY t1.x NULLS LAST; 2 2 2 2 \N \N \N \N diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql index 6a98a7bb57b..5458370db8c 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql @@ -14,6 +14,9 @@ SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) O SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND t2.x IS NULL) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) ORDER BY t1.x; +SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND (t1.x = t1.y OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x; + SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) AND t1.y <=> t2.y ORDER BY t1.x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t1.y IS NULL AND t2.x IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST; diff --git a/tests/queries/0_stateless/03001_backup_matview_after_modify_query.sh b/tests/queries/0_stateless/03001_backup_matview_after_modify_query.sh index d49f1c41c69..f857358a5ea 100755 --- a/tests/queries/0_stateless/03001_backup_matview_after_modify_query.sh +++ b/tests/queries/0_stateless/03001_backup_matview_after_modify_query.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash -# Tags: no-ordinary-database, no-replicated-database -# Tag no-ordinary-database: TO DO +# Tags: no-replicated-database CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.reference b/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.reference new file mode 100644 index 00000000000..04ceb193415 --- /dev/null +++ b/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.reference @@ -0,0 +1,4 @@ +RESTORED +2024-02-22 07:00:00 00 +2024-02-22 07:00:01 11 +2024-02-22 07:00:02 22 diff --git a/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.sh b/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.sh new file mode 100755 index 00000000000..3a3d0edc38f --- /dev/null +++ b/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Copies a test predefined backup from "/tests/queries/0_stateless/backups/" folder to the "backups" disk, +# returns the path to the backup relative to that disk. +function install_test_backup() +{ + local test_backup_filename="$1" + local test_backup_path="$CURDIR/backups/${test_backup_filename}" + + local backups_disk_root=$($CLICKHOUSE_CLIENT --query "SELECT path FROM system.disks WHERE name='backups'") + + if [ -z "${backups_disk_root}" ]; then + echo Disk \'${backups_disk_root}\' not found + exit 1 + fi + + local install_path=${backups_disk_root}/${CLICKHOUSE_DATABASE}/${test_backup_filename} + mkdir -p "$(dirname "${install_path}")" + ln -s "${test_backup_path}" "${install_path}" + + echo "${CLICKHOUSE_DATABASE}/${test_backup_filename}" +} + +backup_name="$(install_test_backup old_backup_with_matview_inner_table_metadata.zip)" + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS mv" +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS src" + +db="$CLICKHOUSE_DATABASE" +${CLICKHOUSE_CLIENT} -q "RESTORE DATABASE mydb AS ${db} FROM Disk('backups', '${backup_name}') SETTINGS allow_different_database_def=true" | grep -o "RESTORED" + +${CLICKHOUSE_CLIENT} -q "SELECT toDateTime(timestamp, 'UTC') AS ts, c12 FROM mv ORDER BY ts" + +$CLICKHOUSE_CLIENT --query "DROP TABLE mv" +$CLICKHOUSE_CLIENT --query "DROP TABLE src" diff --git a/tests/queries/0_stateless/03031_clickhouse_local_input.reference b/tests/queries/0_stateless/03031_clickhouse_local_input.reference new file mode 100644 index 00000000000..a6feeef100d --- /dev/null +++ b/tests/queries/0_stateless/03031_clickhouse_local_input.reference @@ -0,0 +1,7 @@ +# foo +foo +# !foo +# bar +bar +# defaults +bam diff --git a/tests/queries/0_stateless/03031_clickhouse_local_input.sh b/tests/queries/0_stateless/03031_clickhouse_local_input.sh new file mode 100755 index 00000000000..6f59e9b9703 --- /dev/null +++ b/tests/queries/0_stateless/03031_clickhouse_local_input.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +tmp_file="$CUR_DIR/$CLICKHOUSE_DATABASE.txt" +echo '# foo' +$CLICKHOUSE_LOCAL --engine_file_truncate_on_insert=1 -n -q "insert into function file('$tmp_file', 'LineAsString', 'x String') select * from input('x String') format LineAsString" << timeout )); then + echo "Timeout while waiting for operation ${operation_id} to come to status ${expected_status}. The current status is ${current_status}." + exit 1 + fi + sleep 0.1 + done +} + +# Making a backup. +backup_name="Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}')" +backup_operation_id=$(start_async "BACKUP TABLE tbl TO ${backup_name} ASYNC") +wait_status ${backup_operation_id} "BACKUP_CREATED" + +# Restoring from that backup. +restore_operation_id=$(start_async "RESTORE TABLE tbl AS tbl2 FROM ${backup_name} ASYNC") +wait_status ${restore_operation_id} "RESTORED" + +# Check the result of that restoration. +${CLICKHOUSE_CLIENT} --query "SELECT * FROM tbl2" + +${CLICKHOUSE_CLIENT} -nm --query " +DROP TABLE tbl; +DROP TABLE tbl2; +" diff --git a/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference new file mode 100644 index 00000000000..00740e6380f --- /dev/null +++ b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference @@ -0,0 +1,710 @@ +-- { echoOn } + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +Expression ((Project names + (Projection + ))) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: INNER + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table1.id UInt64 + __table1.value String + Filter column: equals(__table1.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table1.id UInt64 : 3 + ALIAS value :: 1 -> __table1.value String : 0 + FUNCTION equals(__table1.id : 3, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table2.id UInt64 + __table2.value String + Filter column: equals(__table2.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table2.id UInt64 : 3 + ALIAS value :: 1 -> __table2.value String : 0 + FUNCTION equals(__table2.id : 3, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +Expression ((Project names + (Projection + ))) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: INNER + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table1.id UInt64 + __table1.value String + Filter column: equals(__table1.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table1.id UInt64 : 3 + ALIAS value :: 1 -> __table1.value String : 0 + FUNCTION equals(__table1.id : 3, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table2.id UInt64 + __table2.value String + Filter column: equals(__table2.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table2.id UInt64 : 3 + ALIAS value :: 1 -> __table2.value String : 0 + FUNCTION equals(__table2.id : 3, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; +Expression ((Project names + (Projection + ))) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: INNER + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table1.id UInt64 + __table1.value String + Filter column: and(equals(__table1.id, 5_UInt8), equals(__table1.id, 6_UInt8)) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 2 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 3 + ALIAS id :: 0 -> __table1.id UInt64 : 4 + ALIAS value :: 1 -> __table1.value String : 0 + FUNCTION equals(__table1.id : 4, 6_UInt8 :: 2) -> equals(__table1.id, 6_UInt8) UInt8 : 1 + FUNCTION equals(__table1.id : 4, 5_UInt8 :: 3) -> equals(__table1.id, 5_UInt8) UInt8 : 2 + FUNCTION and(equals(__table1.id, 5_UInt8) :: 2, equals(__table1.id, 6_UInt8) :: 1) -> and(equals(__table1.id, 5_UInt8), equals(__table1.id, 6_UInt8)) UInt8 : 3 + Positions: 3 4 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table2.id UInt64 + __table2.value String + Filter column: and(equals(__table2.id, 6_UInt8), equals(__table2.id, 5_UInt8)) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 3 + ALIAS id :: 0 -> __table2.id UInt64 : 4 + ALIAS value :: 1 -> __table2.value String : 0 + FUNCTION equals(__table2.id : 4, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + FUNCTION equals(__table2.id : 4, 6_UInt8 :: 3) -> equals(__table2.id, 6_UInt8) UInt8 : 2 + FUNCTION and(equals(__table2.id, 6_UInt8) :: 2, equals(__table2.id, 5_UInt8) :: 1) -> and(equals(__table2.id, 6_UInt8), equals(__table2.id, 5_UInt8)) UInt8 : 3 + Positions: 3 4 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +Expression ((Project names + (Projection + ))) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: LEFT + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table1.id UInt64 + __table1.value String + Filter column: equals(__table1.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table1.id UInt64 : 3 + ALIAS value :: 1 -> __table1.value String : 0 + FUNCTION equals(__table1.id : 3, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table2.id UInt64 + __table2.value String + Filter column: equals(__table2.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table2.id UInt64 : 3 + ALIAS value :: 1 -> __table2.value String : 0 + FUNCTION equals(__table2.id : 3, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +Expression ((Project names + Projection)) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Filter ((WHERE + DROP unused columns after JOIN)) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Filter column: equals(__table2.id, 5_UInt8) (removed) + Actions: INPUT :: 0 -> __table1.id UInt64 : 0 + INPUT :: 1 -> __table1.value String : 1 + INPUT :: 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 4 + FUNCTION equals(__table2.id : 3, 5_UInt8 :: 4) -> equals(__table2.id, 5_UInt8) UInt8 : 5 + Positions: 5 0 1 2 3 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: LEFT + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table1.id UInt64 + __table1.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table1.id UInt64 : 2 + ALIAS value :: 1 -> __table1.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table2.id UInt64 + __table2.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table2.id UInt64 : 2 + ALIAS value :: 1 -> __table2.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +Expression ((Project names + Projection)) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Filter ((WHERE + DROP unused columns after JOIN)) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Filter column: equals(__table1.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT :: 1 -> __table1.value String : 1 + INPUT :: 2 -> __table2.value String : 2 + INPUT :: 3 -> __table2.id UInt64 : 3 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 4 + FUNCTION equals(__table1.id : 0, 5_UInt8 :: 4) -> equals(__table1.id, 5_UInt8) UInt8 : 5 + Positions: 5 0 1 2 3 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: RIGHT + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table1.id UInt64 + __table1.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table1.id UInt64 : 2 + ALIAS value :: 1 -> __table1.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table2.id UInt64 + __table2.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table2.id UInt64 : 2 + ALIAS value :: 1 -> __table2.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +Expression ((Project names + (Projection + ))) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: RIGHT + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table1.id UInt64 + __table1.value String + Filter column: equals(__table1.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table1.id UInt64 : 3 + ALIAS value :: 1 -> __table1.value String : 0 + FUNCTION equals(__table1.id : 3, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table2.id UInt64 + __table2.value String + Filter column: equals(__table2.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table2.id UInt64 : 3 + ALIAS value :: 1 -> __table2.value String : 0 + FUNCTION equals(__table2.id : 3, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +Expression ((Project names + Projection)) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Filter ((WHERE + DROP unused columns after JOIN)) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Filter column: equals(__table1.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT :: 1 -> __table1.value String : 1 + INPUT :: 2 -> __table2.value String : 2 + INPUT :: 3 -> __table2.id UInt64 : 3 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 4 + FUNCTION equals(__table1.id : 0, 5_UInt8 :: 4) -> equals(__table1.id, 5_UInt8) UInt8 : 5 + Positions: 5 0 1 2 3 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: FULL + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table1.id UInt64 + __table1.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table1.id UInt64 : 2 + ALIAS value :: 1 -> __table1.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table2.id UInt64 + __table2.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table2.id UInt64 : 2 + ALIAS value :: 1 -> __table2.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +Expression ((Project names + Projection)) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Filter ((WHERE + DROP unused columns after JOIN)) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Filter column: equals(__table2.id, 5_UInt8) (removed) + Actions: INPUT :: 0 -> __table1.id UInt64 : 0 + INPUT :: 1 -> __table1.value String : 1 + INPUT :: 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 4 + FUNCTION equals(__table2.id : 3, 5_UInt8 :: 4) -> equals(__table2.id, 5_UInt8) UInt8 : 5 + Positions: 5 0 1 2 3 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: FULL + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table1.id UInt64 + __table1.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table1.id UInt64 : 2 + ALIAS value :: 1 -> __table1.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table2.id UInt64 + __table2.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table2.id UInt64 : 2 + ALIAS value :: 1 -> __table2.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; +Expression ((Project names + Projection)) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Filter ((WHERE + DROP unused columns after JOIN)) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Filter column: and(equals(__table1.id, 5_UInt8), equals(__table2.id, 6_UInt8)) (removed) + Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT :: 1 -> __table1.value String : 1 + INPUT :: 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 4 + COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 5 + FUNCTION equals(__table1.id : 0, 5_UInt8 :: 4) -> equals(__table1.id, 5_UInt8) UInt8 : 6 + FUNCTION equals(__table2.id : 3, 6_UInt8 :: 5) -> equals(__table2.id, 6_UInt8) UInt8 : 4 + FUNCTION and(equals(__table1.id, 5_UInt8) :: 6, equals(__table2.id, 6_UInt8) :: 4) -> and(equals(__table1.id, 5_UInt8), equals(__table2.id, 6_UInt8)) UInt8 : 5 + Positions: 5 0 1 2 3 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: FULL + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table1.id UInt64 + __table1.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table1.id UInt64 : 2 + ALIAS value :: 1 -> __table1.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table2.id UInt64 + __table2.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table2.id UInt64 : 2 + ALIAS value :: 1 -> __table2.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; diff --git a/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.sql b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.sql new file mode 100644 index 00000000000..9627b55e633 --- /dev/null +++ b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.sql @@ -0,0 +1,131 @@ +SET allow_experimental_analyzer = 1; +SET optimize_move_to_prewhere = 0; + +DROP TABLE IF EXISTS test_table_1; +CREATE TABLE test_table_1 +( + id UInt64, + value String +) ENGINE=MergeTree ORDER BY id; + +CREATE TABLE test_table_2 +( + id UInt64, + value String +) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table_1 SELECT number, number FROM numbers(10); +INSERT INTO test_table_2 SELECT number, number FROM numbers(10); + +-- { echoOn } + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; + +-- { echoOff } + +DROP TABLE test_table_1; +DROP TABLE test_table_2; diff --git a/tests/queries/0_stateless/03080_incorrect_join_with_merge.sql b/tests/queries/0_stateless/03080_incorrect_join_with_merge.sql index 4985d3abfb6..7682e6ce866 100644 --- a/tests/queries/0_stateless/03080_incorrect_join_with_merge.sql +++ b/tests/queries/0_stateless/03080_incorrect_join_with_merge.sql @@ -1,5 +1,6 @@ -- https://github.com/ClickHouse/ClickHouse/issues/29838 SET allow_experimental_analyzer=1; +SET distributed_foreground_insert=1; CREATE TABLE first_table_lr ( diff --git a/tests/integration/test_attach_partition_using_copy/__init__.py b/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.reference similarity index 100% rename from tests/integration/test_attach_partition_using_copy/__init__.py rename to tests/queries/0_stateless/03093_bug37909_query_does_not_finish.reference diff --git a/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.sql b/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.sql new file mode 100644 index 00000000000..62fa3f437af --- /dev/null +++ b/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.sql @@ -0,0 +1,78 @@ +-- Bug 37909 + +SELECT + v_date AS vDate, + round(sum(v_share)) AS v_sum +FROM +( + WITH + ( + SELECT rand() % 10000 + ) AS dummy_1, + ( + SELECT rand() % 10000 + ) AS dummy_2, + ( + SELECT rand() % 10000 + ) AS dummy_3, + _v AS + ( + SELECT + xxHash64(rand()) % 100000 AS d_id, + toDate(parseDateTimeBestEffort('2022-01-01') + (rand() % 2600000)) AS v_date + FROM numbers(1000000) + ORDER BY d_id ASC + ), + _i AS + ( + SELECT xxHash64(rand()) % 40000 AS d_id + FROM numbers(1000000) + ), + not_i AS + ( + SELECT + NULL AS v_date, + d_id, + 0 AS v_share + FROM _i + LIMIT 100 + ) + SELECT * + FROM + ( + SELECT + d_id, + v_date, + v_share + FROM not_i + UNION ALL + SELECT + d_id, + v_date, + 1 AS v_share + FROM + ( + SELECT + d_id, + arrayJoin(groupArray(v_date)) AS v_date + FROM + ( + SELECT + v_date, + d_id + FROM _v + UNION ALL + SELECT + NULL AS v_date, + d_id + FROM _i + ) + GROUP BY d_id + ) + ) + WHERE (v_date >= '2022-05-08') AND (v_date <= '2022-06-07') +) +/* WHERE (v_date >= '2022-05-08') AND (v_date <= '2022-06-07') placing condition has same effect */ +GROUP BY vDate +ORDER BY vDate ASC +SETTINGS allow_experimental_analyzer = 1; -- the query times out if allow_experimental_analyzer = 0 diff --git a/tests/queries/0_stateless/03093_filter_push_down_crash.reference b/tests/queries/0_stateless/03093_filter_push_down_crash.reference new file mode 100644 index 00000000000..bf98540f4b3 --- /dev/null +++ b/tests/queries/0_stateless/03093_filter_push_down_crash.reference @@ -0,0 +1,5 @@ +1 \N 1 +1 \N 1 +1 \N 1 +1 \N 1 +1 \N 1 diff --git a/tests/queries/0_stateless/03093_filter_push_down_crash.sql.j2 b/tests/queries/0_stateless/03093_filter_push_down_crash.sql.j2 new file mode 100644 index 00000000000..2cbbd89ca0c --- /dev/null +++ b/tests/queries/0_stateless/03093_filter_push_down_crash.sql.j2 @@ -0,0 +1,11 @@ +{% for join_algorithm in ['default', 'full_sorting_merge', 'hash', 'partial_merge', 'grace_hash'] -%} + +SET join_algorithm = '{{ join_algorithm }}'; + +SELECT * +FROM (SELECT 1 AS key) AS t1 +JOIN (SELECT NULL, 1 AS key) AS t2 +ON t1.key = t2.key +WHERE t1.key ORDER BY key; + +{% endfor -%} diff --git a/tests/queries/0_stateless/03094_named_tuple_bug24607.reference b/tests/queries/0_stateless/03094_named_tuple_bug24607.reference new file mode 100644 index 00000000000..fb6ca6c5c3a --- /dev/null +++ b/tests/queries/0_stateless/03094_named_tuple_bug24607.reference @@ -0,0 +1 @@ +(1,'test') 1 diff --git a/tests/queries/0_stateless/03094_named_tuple_bug24607.sql b/tests/queries/0_stateless/03094_named_tuple_bug24607.sql new file mode 100644 index 00000000000..e3c97f3fe41 --- /dev/null +++ b/tests/queries/0_stateless/03094_named_tuple_bug24607.sql @@ -0,0 +1,4 @@ +SELECT + JSONExtract('{"a":1, "b":"test"}', 'Tuple(a UInt8, b String)') AS x, + x.a +SETTINGS allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/backups/old_backup_with_matview_inner_table_metadata.zip b/tests/queries/0_stateless/backups/old_backup_with_matview_inner_table_metadata.zip new file mode 100644 index 00000000000..a2476da7ded Binary files /dev/null and b/tests/queries/0_stateless/backups/old_backup_with_matview_inner_table_metadata.zip differ diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 30c2de2b507..9f7776f5201 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -183,6 +183,8 @@ CompiledExpressionCacheCount ComplexKeyCache ComplexKeyDirect ComplexKeyHashed +Composable +composable Config ConnectionDetails Const @@ -697,6 +699,7 @@ PCRE PRCP PREWHERE PROCESSLIST +PROXYv PSUN PagerDuty ParallelFormattingOutputFormatThreads