From fe44be522ee11822ac8b86c1ffab8560b5fb37d9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Sep 2021 05:48:24 +0300 Subject: [PATCH 1/3] Fix assert in table function `merge` with database regexp --- src/Interpreters/IdentifierSemantic.cpp | 4 +-- src/Interpreters/IdentifierSemantic.h | 2 +- src/Interpreters/JoinedTables.cpp | 2 +- .../TranslateQualifiedNamesVisitor.cpp | 2 +- .../evaluateConstantExpression.cpp | 18 ---------- src/Interpreters/evaluateConstantExpression.h | 2 -- .../RocksDB/StorageEmbeddedRocksDB.cpp | 6 ++-- src/Storages/RocksDB/StorageEmbeddedRocksDB.h | 2 +- src/Storages/StorageMerge.cpp | 34 ++++++++++++++++--- src/Storages/StorageMerge.h | 9 +++-- src/TableFunctions/TableFunctionMerge.cpp | 15 +++----- src/TableFunctions/TableFunctionMerge.h | 6 ++-- .../2024_merge_regexp_assert.reference | 0 .../0_stateless/2024_merge_regexp_assert.sql | 1 + 14 files changed, 52 insertions(+), 51 deletions(-) create mode 100644 tests/queries/0_stateless/2024_merge_regexp_assert.reference create mode 100644 tests/queries/0_stateless/2024_merge_regexp_assert.sql diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp index 098bf033399..1112b5c3bda 100644 --- a/src/Interpreters/IdentifierSemantic.cpp +++ b/src/Interpreters/IdentifierSemantic.cpp @@ -162,7 +162,7 @@ IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const { /// database.table.column if (doesIdentifierBelongTo(identifier, db_and_table.database, db_and_table.table)) - return ColumnMatch::DbAndTable; + return ColumnMatch::DBAndTable; /// alias.column if (doesIdentifierBelongTo(identifier, db_and_table.alias)) @@ -199,7 +199,7 @@ void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, const Da case ColumnMatch::TableAlias: to_strip = 1; break; - case ColumnMatch::DbAndTable: + case ColumnMatch::DBAndTable: to_strip = 2; break; default: diff --git a/src/Interpreters/IdentifierSemantic.h b/src/Interpreters/IdentifierSemantic.h index b36c1ad00dd..cabe755027c 100644 --- a/src/Interpreters/IdentifierSemantic.h +++ b/src/Interpreters/IdentifierSemantic.h @@ -31,7 +31,7 @@ struct IdentifierSemantic ColumnName, /// column qualified with column names list AliasedTableName, /// column qualified with table name (but table has an alias so its priority is lower than TableName) TableName, /// column qualified with table name - DbAndTable, /// column qualified with database and table name + DBAndTable, /// column qualified with database and table name TableAlias, /// column qualified with table alias Ambiguous, }; diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 271d7371425..3aae3982758 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -128,7 +128,7 @@ private: /// Table has an alias. We do not need to rewrite qualified names with table alias (match == ColumnMatch::TableName). auto match = IdentifierSemantic::canReferColumnToTable(identifier, table); if (match == IdentifierSemantic::ColumnMatch::AliasedTableName || - match == IdentifierSemantic::ColumnMatch::DbAndTable) + match == IdentifierSemantic::ColumnMatch::DBAndTable) { if (rewritten) throw Exception("Failed to rewrite distributed table names. Ambiguous column '" + identifier.name() + "'", diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index bf3bbf22b8c..2d1b6b3f239 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -334,7 +334,7 @@ void RestoreQualifiedNamesMatcher::Data::changeTable(ASTIdentifier & identifier) { case IdentifierSemantic::ColumnMatch::AliasedTableName: case IdentifierSemantic::ColumnMatch::TableName: - case IdentifierSemantic::ColumnMatch::DbAndTable: + case IdentifierSemantic::ColumnMatch::DBAndTable: IdentifierSemantic::setColumnLongName(identifier, remote_table); break; default: diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index c05118b7c6a..ae304906476 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -104,24 +104,6 @@ ASTPtr evaluateConstantExpressionForDatabaseName(const ASTPtr & node, ContextPtr return res; } -std::tuple evaluateDatabaseNameForMergeEngine(const ASTPtr & node, ContextPtr context) -{ - if (const auto * func = node->as(); func && func->name == "REGEXP") - { - if (func->arguments->children.size() != 1) - throw Exception("Arguments for REGEXP in Merge ENGINE should be 1", ErrorCodes::BAD_ARGUMENTS); - - auto * literal = func->arguments->children[0]->as(); - if (!literal || literal->value.safeGet().empty()) - throw Exception("Argument for REGEXP in Merge ENGINE should be a non empty String Literal", ErrorCodes::BAD_ARGUMENTS); - - return std::tuple{true, func->arguments->children[0]}; - } - - auto ast = evaluateConstantExpressionForDatabaseName(node, context); - return std::tuple{false, ast}; -} - namespace { diff --git a/src/Interpreters/evaluateConstantExpression.h b/src/Interpreters/evaluateConstantExpression.h index 3b817080fe0..b95982f5b99 100644 --- a/src/Interpreters/evaluateConstantExpression.h +++ b/src/Interpreters/evaluateConstantExpression.h @@ -53,6 +53,4 @@ ASTPtr evaluateConstantExpressionForDatabaseName(const ASTPtr & node, ContextPtr */ std::optional evaluateExpressionOverConstantCondition(const ASTPtr & node, const ExpressionActionsPtr & target_expr, size_t & limit); -// Evaluate database name or regexp for StorageMerge and TableFunction merge -std::tuple evaluateDatabaseNameForMergeEngine(const ASTPtr & node, ContextPtr context); } diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 459c0879cda..549d939c3cc 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -280,7 +280,7 @@ StorageEmbeddedRocksDB::StorageEmbeddedRocksDB(const StorageID & table_id_, { fs::create_directories(rocksdb_dir); } - initDb(); + initDB(); } void StorageEmbeddedRocksDB::truncate(const ASTPtr &, const StorageMetadataPtr & , ContextPtr, TableExclusiveLockHolder &) @@ -288,10 +288,10 @@ void StorageEmbeddedRocksDB::truncate(const ASTPtr &, const StorageMetadataPtr & rocksdb_ptr->Close(); fs::remove_all(rocksdb_dir); fs::create_directories(rocksdb_dir); - initDb(); + initDB(); } -void StorageEmbeddedRocksDB::initDb() +void StorageEmbeddedRocksDB::initDB() { rocksdb::Status status; rocksdb::Options base; diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index 3f1b3b49492..358da9835ce 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -65,6 +65,6 @@ private: RocksDBPtr rocksdb_ptr; String rocksdb_dir; - void initDb(); + void initDB(); }; } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index c1066329e6f..df39662de05 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -49,7 +49,7 @@ StorageMerge::StorageMerge( const String & comment, const String & source_database_name_or_regexp_, bool database_is_regexp_, - const DbToTableSetMap & source_databases_and_tables_, + const DBToTableSetMap & source_databases_and_tables_, ContextPtr context_) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) @@ -573,11 +573,14 @@ DatabaseTablesIteratorPtr StorageMerge::getDatabaseIterator(const String & datab { auto database = DatabaseCatalog::instance().getDatabase(database_name); - auto table_name_match = [this, &database_name](const String & table_name_) -> bool { + auto table_name_match = [this, database_name](const String & table_name_) -> bool + { if (source_databases_and_tables) { - const auto & source_tables = (*source_databases_and_tables).at(database_name); - return source_tables.count(table_name_); + if (auto it = source_databases_and_tables->find(database_name); it != source_databases_and_tables->end()) + return it->second.count(table_name_); + else + return false; } else return source_table_regexp->match(table_name_); @@ -742,6 +745,26 @@ IStorage::ColumnSizeByName StorageMerge::getColumnSizes() const return first_materialized_mysql->getColumnSizes(); } + +std::tuple StorageMerge::evaluateDatabaseName(const ASTPtr & node, ContextPtr context_) +{ + if (const auto * func = node->as(); func && func->name == "REGEXP") + { + if (func->arguments->children.size() != 1) + throw Exception("REGEXP in Merge ENGINE takes only one argument", ErrorCodes::BAD_ARGUMENTS); + + auto * literal = func->arguments->children[0]->as(); + if (!literal || literal->value.safeGet().empty()) + throw Exception("Argument for REGEXP in Merge ENGINE should be a non empty String Literal", ErrorCodes::BAD_ARGUMENTS); + + return {true, func->arguments->children[0]}; + } + + auto ast = evaluateConstantExpressionForDatabaseName(node, context_); + return {false, ast}; +} + + void registerStorageMerge(StorageFactory & factory) { factory.registerStorage("Merge", [](const StorageFactory::Arguments & args) @@ -757,10 +780,11 @@ void registerStorageMerge(StorageFactory & factory) " - name of source database and regexp for table names.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - auto [is_regexp, database_ast] = evaluateDatabaseNameForMergeEngine(engine_args[0], args.getLocalContext()); + auto [is_regexp, database_ast] = StorageMerge::evaluateDatabaseName(engine_args[0], args.getLocalContext()); if (!is_regexp) engine_args[0] = database_ast; + String source_database_name_or_regexp = database_ast->as().value.safeGet(); engine_args[1] = evaluateConstantExpressionAsLiteral(engine_args[1], args.getLocalContext()); diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 20460e95156..b6001815f85 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -48,12 +48,15 @@ public: bool mayBenefitFromIndexForIn( const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const override; + /// Evaluate database name or regexp for StorageMerge and TableFunction merge + static std::tuple evaluateDatabaseName(const ASTPtr & node, ContextPtr context); + private: - using DbToTableSetMap = std::map>; + using DBToTableSetMap = std::map>; std::optional source_database_regexp; std::optional source_table_regexp; - std::optional source_databases_and_tables; + std::optional source_databases_and_tables; String source_database_name_or_regexp; bool database_is_regexp = false; @@ -86,7 +89,7 @@ protected: const String & comment, const String & source_database_name_or_regexp_, bool database_is_regexp_, - const DbToTableSetMap & source_databases_and_tables_, + const DBToTableSetMap & source_databases_and_tables_, ContextPtr context_); StorageMerge( diff --git a/src/TableFunctions/TableFunctionMerge.cpp b/src/TableFunctions/TableFunctionMerge.cpp index 81dde4a12a4..f1ef4262d08 100644 --- a/src/TableFunctions/TableFunctionMerge.cpp +++ b/src/TableFunctions/TableFunctionMerge.cpp @@ -52,7 +52,7 @@ void TableFunctionMerge::parseArguments(const ASTPtr & ast_function, ContextPtr " - name of source database and regexp for table names.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - auto [is_regexp, database_ast] = evaluateDatabaseNameForMergeEngine(args[0], context); + auto [is_regexp, database_ast] = StorageMerge::evaluateDatabaseName(args[0], context); database_is_regexp = is_regexp; @@ -65,7 +65,7 @@ void TableFunctionMerge::parseArguments(const ASTPtr & ast_function, ContextPtr } -const TableFunctionMerge::DbToTableSetMap & TableFunctionMerge::getSourceDatabasesAndTables(ContextPtr context) const +const TableFunctionMerge::DBToTableSetMap & TableFunctionMerge::getSourceDatabasesAndTables(ContextPtr context) const { if (source_databases_and_tables) return *source_databases_and_tables; @@ -88,17 +88,10 @@ const TableFunctionMerge::DbToTableSetMap & TableFunctionMerge::getSourceDatabas auto databases = DatabaseCatalog::instance().getDatabases(); for (const auto & db : databases) - { if (database_re.match(db.first)) - { - auto source_tables = getMatchedTablesWithAccess(db.first, source_table_regexp, context); + (*source_databases_and_tables)[db.first] = getMatchedTablesWithAccess(db.first, source_table_regexp, context); - if (!source_tables.empty()) - (*source_databases_and_tables)[db.first] = source_tables; - } - } - - if ((*source_databases_and_tables).empty()) + if (source_databases_and_tables->empty()) throwNoTablesMatchRegexp(source_database_name_or_regexp, source_table_regexp); } diff --git a/src/TableFunctions/TableFunctionMerge.h b/src/TableFunctions/TableFunctionMerge.h index 73b61f8eb79..10221c8c72c 100644 --- a/src/TableFunctions/TableFunctionMerge.h +++ b/src/TableFunctions/TableFunctionMerge.h @@ -21,8 +21,8 @@ private: const char * getStorageTypeName() const override { return "Merge"; } using TableSet = std::set; - using DbToTableSetMap = std::map; - const DbToTableSetMap & getSourceDatabasesAndTables(ContextPtr context) const; + using DBToTableSetMap = std::map; + const DBToTableSetMap & getSourceDatabasesAndTables(ContextPtr context) const; ColumnsDescription getActualTableStructure(ContextPtr context) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; static TableSet getMatchedTablesWithAccess(const String & database_name, const String & table_regexp, const ContextPtr & context); @@ -30,7 +30,7 @@ private: String source_database_name_or_regexp; String source_table_regexp; bool database_is_regexp = false; - mutable std::optional source_databases_and_tables; + mutable std::optional source_databases_and_tables; }; diff --git a/tests/queries/0_stateless/2024_merge_regexp_assert.reference b/tests/queries/0_stateless/2024_merge_regexp_assert.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/2024_merge_regexp_assert.sql b/tests/queries/0_stateless/2024_merge_regexp_assert.sql new file mode 100644 index 00000000000..3a3c2920f0b --- /dev/null +++ b/tests/queries/0_stateless/2024_merge_regexp_assert.sql @@ -0,0 +1 @@ +SELECT a FROM merge(REGEXP('.'), 'query_log'); -- { serverError 47 } From 9dac348893dc32fb9be02122f5474a570788662a Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 25 Sep 2021 19:41:50 +0300 Subject: [PATCH 2/3] Update StorageMerge.cpp --- src/Storages/StorageMerge.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index df39662de05..aaa375d645e 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -35,6 +35,7 @@ namespace DB namespace ErrorCodes { + extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; extern const int ILLEGAL_PREWHERE; From 8493b014cd7ea7472c396b37f6a93df7191c402f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 27 Sep 2021 02:16:58 +0300 Subject: [PATCH 3/3] Improve test --- tests/queries/0_stateless/2024_merge_regexp_assert.sql | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/2024_merge_regexp_assert.sql b/tests/queries/0_stateless/2024_merge_regexp_assert.sql index 3a3c2920f0b..8ea4a77bbd8 100644 --- a/tests/queries/0_stateless/2024_merge_regexp_assert.sql +++ b/tests/queries/0_stateless/2024_merge_regexp_assert.sql @@ -1 +1,7 @@ -SELECT a FROM merge(REGEXP('.'), 'query_log'); -- { serverError 47 } +DROP TABLE IF EXISTS t; +CREATE TABLE t (b UInt8) ENGINE = Memory; +SELECT a FROM merge(REGEXP('.'), '^t$'); -- { serverError 47 } +SELECT a FROM merge(REGEXP('\0'), '^t$'); -- { serverError 47 } +SELECT a FROM merge(REGEXP('\0a'), '^t$'); -- { serverError 47 } +SELECT a FROM merge(REGEXP('\0a'), '^$'); -- { serverError 36 } +DROP TABLE t;