#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { namespace { /// Data for DDLDependencyVisitor. /// Used to visits ASTCreateQuery and extracts the names of all tables explicitly referenced in the create query. class DDLDependencyVisitorData { friend void tryVisitNestedSelect(const String & query, DDLDependencyVisitorData & data); public: DDLDependencyVisitorData(const ContextPtr & context_, const QualifiedTableName & table_name_, const ASTPtr & ast_) : create_query(ast_), table_name(table_name_), current_database(context_->getCurrentDatabase()), context(context_) { } /// Acquire the result of visiting the create query. TableNamesSet getDependencies() && { dependencies.erase(table_name); return std::move(dependencies); } bool needChildVisit(const ASTPtr & child) const { return !skip_asts.contains(child.get()); } void visit(const ASTPtr & ast) { if (auto * create = ast->as()) { visitCreateQuery(*create); } else if (auto * dictionary = ast->as()) { visitDictionaryDef(*dictionary); } else if (auto * expr = ast->as()) { visitTableExpression(*expr); } else if (const auto * function = ast->as()) { if (function->kind == ASTFunction::Kind::TABLE_ENGINE) visitTableEngine(*function); else visitFunction(*function); } } private: ASTPtr create_query; std::unordered_set skip_asts; QualifiedTableName table_name; String current_database; ContextPtr context; TableNamesSet dependencies; /// CREATE TABLE or CREATE DICTIONARY or CREATE VIEW or CREATE TEMPORARY TABLE or CREATE DATABASE query. void visitCreateQuery(const ASTCreateQuery & create) { QualifiedTableName to_table{create.to_table_id.database_name, create.to_table_id.table_name}; if (!to_table.table.empty()) { /// TO target_table (for materialized views) if (to_table.database.empty()) to_table.database = current_database; dependencies.emplace(to_table); } QualifiedTableName as_table{create.as_database, create.as_table}; if (!as_table.table.empty()) { /// AS table_name if (as_table.database.empty()) as_table.database = current_database; dependencies.emplace(as_table); } } /// The definition of a dictionary: SOURCE(CLICKHOUSE(...)) LAYOUT(...) LIFETIME(...) void visitDictionaryDef(const ASTDictionary & dictionary) { if (!dictionary.source || dictionary.source->name != "clickhouse" || !dictionary.source->elements) return; auto config = getDictionaryConfigurationFromAST(create_query->as(), context); auto info = getInfoIfClickHouseDictionarySource(config, context); /// We consider only dependencies on local tables. if (!info || !info->is_local) return; if (!info->table_name.table.empty()) { if (info->table_name.database.empty()) info->table_name.database = current_database; dependencies.emplace(std::move(info->table_name)); } else { /// We don't have a table name, we have a select query instead tryVisitNestedSelect(info->query, *this); } } /// ASTTableExpression represents a reference to a table in SELECT query. /// DDLDependencyVisitor should handle ASTTableExpression because some CREATE queries can contain SELECT queries after AS /// (for example, CREATE VIEW). void visitTableExpression(const ASTTableExpression & expr) { if (!expr.database_and_table_name) return; const ASTIdentifier * identifier = dynamic_cast(expr.database_and_table_name.get()); if (!identifier) return; auto table_identifier = identifier->createTable(); if (!table_identifier) return; QualifiedTableName qualified_name{table_identifier->getDatabaseName(), table_identifier->shortName()}; if (qualified_name.table.empty()) return; if (qualified_name.database.empty()) { /// It can be table/dictionary from default database or XML dictionary, but we cannot distinguish it here. qualified_name.database = current_database; } dependencies.emplace(qualified_name); } /// Finds dependencies of a table engine. void visitTableEngine(const ASTFunction & table_engine) { /// Dictionary(db_name.dictionary_name) if (table_engine.name == "Dictionary") addQualifiedNameFromArgument(table_engine, 0); /// Buffer('db_name', 'dest_table_name') if (table_engine.name == "Buffer") addDatabaseAndTableNameFromArguments(table_engine, 0, 1); /// Distributed(cluster_name, db_name, table_name, ...) if (table_engine.name == "Distributed") visitDistributedTableEngine(table_engine); } /// Distributed(cluster_name, database_name, table_name, ...) void visitDistributedTableEngine(const ASTFunction & table_engine) { /// We consider only dependencies on local tables. bool has_local_replicas = false; if (auto cluster_name = tryGetClusterNameFromArgument(table_engine, 0)) { auto cluster = context->tryGetCluster(*cluster_name); if (cluster && cluster->getLocalShardCount()) has_local_replicas = true; } if (has_local_replicas) addDatabaseAndTableNameFromArguments(table_engine, 1, 2); } /// Finds dependencies of a function. void visitFunction(const ASTFunction & function) { if (functionIsJoinGet(function.name) || functionIsDictGet(function.name)) { /// dictGet('dict_name', attr_names, id_expr) /// dictHas('dict_name', id_expr) /// joinGet(join_storage_table_name, `value_column`, join_keys) addQualifiedNameFromArgument(function, 0); } else if (functionIsInOrGlobalInOperator(function.name)) { /// x IN table_name. /// We set evaluate=false here because we don't want to evaluate a subquery in "x IN subquery". addQualifiedNameFromArgument(function, 1, /* evaluate= */ false); } else if (function.name == "dictionary") { /// dictionary(dict_name) addQualifiedNameFromArgument(function, 0); } else if (function.name == "remote" || function.name == "remoteSecure") { visitRemoteFunction(function, /* is_cluster_function= */ false); } else if (function.name == "cluster" || function.name == "clusterAllReplicas") { visitRemoteFunction(function, /* is_cluster_function= */ true); } } /// remote('addresses_expr', db_name.table_name, ...) /// remote('addresses_expr', 'db_name', 'table_name', ...) /// remote('addresses_expr', table_function(), ...) /// cluster('cluster_name', db_name.table_name, ...) /// cluster('cluster_name', 'db_name', 'table_name', ...) /// cluster('cluster_name', table_function(), ...) void visitRemoteFunction(const ASTFunction & function, bool is_cluster_function) { /// We consider dependencies on local tables only. bool has_local_replicas = false; if (is_cluster_function) { if (auto cluster_name = tryGetClusterNameFromArgument(function, 0)) { if (auto cluster = context->tryGetCluster(*cluster_name)) { if (cluster->getLocalShardCount()) has_local_replicas = true; } } } else { /// remote() and remoteSecure() are not fully supported. To properly support them we would need to check the first /// argument to decide whether the host & port pattern specified in the first argument contains the local host or not /// which is not trivial. For now we just always assume that the host & port pattern doesn't contain the local host. } if (!function.arguments) return; ASTs & args = function.arguments->children; if (args.size() < 2) return; const ASTFunction * table_function = nullptr; if (const auto * second_arg_as_function = args[1]->as(); second_arg_as_function && KnownTableFunctionNames::instance().exists(second_arg_as_function->name)) { table_function = second_arg_as_function; } if (has_local_replicas && !table_function) { /// We set `apply_current_database=false` here because if this argument is an identifier without dot, /// then it's not the name of a table within the current database, it's the name of a database, and /// the name of a table will be in the following argument. auto maybe_qualified_name = tryGetQualifiedNameFromArgument(function, 1, /* evaluate= */ true, /* apply_current_database= */ false); if (!maybe_qualified_name) return; auto & qualified_name = *maybe_qualified_name; if (qualified_name.database.empty()) { auto table = tryGetStringFromArgument(function, 2); if (!table) return; qualified_name.database = std::move(qualified_name.table); qualified_name.table = std::move(table).value(); } dependencies.insert(qualified_name); } if (!has_local_replicas && table_function) { /// `table function` will be executed remotely, so we won't check it or its arguments for dependencies. skip_asts.emplace(table_function); } } /// Gets an argument as a string, evaluates constants if necessary. std::optional tryGetStringFromArgument(const ASTFunction & function, size_t arg_idx, bool evaluate = true) const { if (!function.arguments) return {}; const ASTs & args = function.arguments->children; if (arg_idx >= args.size()) return {}; const auto & arg = args[arg_idx]; if (evaluate) { try { /// We're just searching for dependencies here, it's not safe to execute subqueries now. auto evaluated = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); const auto * literal = evaluated->as(); if (!literal || (literal->value.getType() != Field::Types::String)) return {}; return literal->value.safeGet(); } catch (...) { return {}; } } else { if (const auto * id = arg->as()) return id->name(); if (const auto * literal = arg->as()) { if (literal->value.getType() == Field::Types::String) return literal->value.safeGet(); } return {}; } } /// Gets an argument as a qualified table name. /// Accepts forms db_name.table_name (as an identifier) and 'db_name.table_name' (as a string). /// The function doesn't replace an empty database name with the current_database (the caller must do that). std::optional tryGetQualifiedNameFromArgument( const ASTFunction & function, size_t arg_idx, bool evaluate = true, bool apply_current_database = true) const { if (!function.arguments) return {}; const ASTs & args = function.arguments->children; if (arg_idx >= args.size()) return {}; const auto & arg = args[arg_idx]; QualifiedTableName qualified_name; if (const auto * identifier = dynamic_cast(arg.get())) { /// ASTIdentifier or ASTTableIdentifier auto table_identifier = identifier->createTable(); if (!table_identifier) return {}; qualified_name.database = table_identifier->getDatabaseName(); qualified_name.table = table_identifier->shortName(); } else { auto qualified_name_as_string = tryGetStringFromArgument(function, arg_idx, evaluate); if (!qualified_name_as_string) return {}; auto maybe_qualified_name = QualifiedTableName::tryParseFromString(*qualified_name_as_string); if (!maybe_qualified_name) return {}; qualified_name = std::move(maybe_qualified_name).value(); } if (qualified_name.database.empty() && apply_current_database) qualified_name.database = current_database; return qualified_name; } /// Adds a qualified table name from an argument to the collection of dependencies. /// Accepts forms db_name.table_name (as an identifier) and 'db_name.table_name' (as a string). void addQualifiedNameFromArgument(const ASTFunction & function, size_t arg_idx, bool evaluate = true) { if (auto qualified_name = tryGetQualifiedNameFromArgument(function, arg_idx, evaluate)) dependencies.emplace(std::move(qualified_name).value()); } /// Returns a database name and a table name extracted from two separate arguments. std::optional tryGetDatabaseAndTableNameFromArguments( const ASTFunction & function, size_t database_arg_idx, size_t table_arg_idx, bool apply_current_database = true) const { auto database = tryGetStringFromArgument(function, database_arg_idx); if (!database) return {}; auto table = tryGetStringFromArgument(function, table_arg_idx); if (!table || table->empty()) return {}; QualifiedTableName qualified_name; qualified_name.database = std::move(database).value(); qualified_name.table = std::move(table).value(); if (qualified_name.database.empty() && apply_current_database) qualified_name.database = current_database; return qualified_name; } /// Adds a database name and a table name from two separate arguments to the collection of dependencies. void addDatabaseAndTableNameFromArguments(const ASTFunction & function, size_t database_arg_idx, size_t table_arg_idx) { if (auto qualified_name = tryGetDatabaseAndTableNameFromArguments(function, database_arg_idx, table_arg_idx)) dependencies.emplace(std::move(qualified_name).value()); } std::optional tryGetClusterNameFromArgument(const ASTFunction & function, size_t arg_idx) const { if (!function.arguments) return {}; ASTs & args = function.arguments->children; if (arg_idx >= args.size()) return {}; auto cluster_name = ::DB::tryGetClusterName(*args[arg_idx]); if (cluster_name) return cluster_name; return tryGetStringFromArgument(function, arg_idx); } }; /// Visits ASTCreateQuery and extracts the names of all tables explicitly referenced in the create query. class DDLDependencyVisitor { public: using Data = DDLDependencyVisitorData; using Visitor = ConstInDepthNodeVisitor; static bool needChildVisit(const ASTPtr &, const ASTPtr & child, const Data & data) { return data.needChildVisit(child); } static void visit(const ASTPtr & ast, Data & data) { data.visit(ast); } }; void tryVisitNestedSelect(const String & query, DDLDependencyVisitorData & data) { try { ParserSelectWithUnionQuery parser; String description = fmt::format("Query for ClickHouse dictionary {}", data.table_name); String fixed_query = removeWhereConditionPlaceholder(query); ASTPtr select = parseQuery(parser, fixed_query, description, data.context->getSettingsRef().max_query_size, data.context->getSettingsRef().max_parser_depth); DDLDependencyVisitor::Visitor visitor{data}; visitor.visit(select); } catch (...) { tryLogCurrentException("DDLDependencyVisitor"); } } } TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & context, const QualifiedTableName & table_name, const ASTPtr & ast) { DDLDependencyVisitor::Data data{context, table_name, ast}; DDLDependencyVisitor::Visitor visitor{data}; visitor.visit(ast); return std::move(data).getDependencies(); } }