#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace fs = std::filesystem; namespace DB { namespace ErrorCodes { extern const int BACKUP_ENTRY_NOT_FOUND; extern const int CANNOT_RESTORE_TABLE; extern const int CANNOT_RESTORE_DATABASE; extern const int LOGICAL_ERROR; } namespace { /// Finding databases and tables in the backup which we're going to restore. constexpr const char * kFindingTablesInBackupStatus = "finding tables in backup"; /// Creating databases or finding them and checking their definitions. constexpr const char * kCreatingDatabasesStatus = "creating databases"; /// Creating tables or finding them and checking their definition. constexpr const char * kCreatingTablesStatus = "creating tables"; /// Inserting restored data to tables. constexpr const char * kInsertingDataToTablesStatus = "inserting data to tables"; /// Error status. constexpr const char * kErrorStatus = BackupCoordinationStatusSync::kErrorStatus; /// Uppercases the first character of a passed string. String toUpperFirst(const String & str) { String res = str; res[0] = std::toupper(res[0]); return res; } /// Outputs "table " or "temporary table " String tableNameWithTypeToString(const String & database_name, const String & table_name, bool first_upper) { String str; if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) str = fmt::format("temporary table {}", backQuoteIfNeed(table_name)); else str = fmt::format("table {}.{}", backQuoteIfNeed(database_name), backQuoteIfNeed(table_name)); if (first_upper) str[0] = std::toupper(str[0]); return str; } /// Whether a specified name corresponds one of the tables backuping ACL. bool isSystemAccessTableName(const QualifiedTableName & table_name) { if (table_name.database != DatabaseCatalog::SYSTEM_DATABASE) return false; return (table_name.table == "users") || (table_name.table == "roles") || (table_name.table == "settings_profiles") || (table_name.table == "row_policies") || (table_name.table == "quotas"); } } RestorerFromBackup::RestorerFromBackup( const ASTBackupQuery::Elements & restore_query_elements_, const RestoreSettings & restore_settings_, std::shared_ptr restore_coordination_, const BackupPtr & backup_, const ContextMutablePtr & context_) : restore_query_elements(restore_query_elements_) , restore_settings(restore_settings_) , restore_coordination(restore_coordination_) , backup(backup_) , context(context_) , create_table_timeout(context->getConfigRef().getUInt64("backups.create_table_timeout", 300000)) , log(&Poco::Logger::get("RestorerFromBackup")) { } RestorerFromBackup::~RestorerFromBackup() = default; RestorerFromBackup::DataRestoreTasks RestorerFromBackup::run(Mode mode) { try { /// run() can be called onle once. if (!current_status.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Already restoring"); /// Find other hosts working along with us to execute this ON CLUSTER query. all_hosts = BackupSettings::Util::filterHostIDs( restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num); /// Do renaming in the create queries according to the renaming config. renaming_map = makeRenamingMapFromBackupQuery(restore_query_elements); /// Calculate the root path in the backup for restoring, it's either empty or has the format "shards//replicas//". findRootPathsInBackup(); /// Find all the databases and tables which we will read from the backup. setStatus(kFindingTablesInBackupStatus); findDatabasesAndTablesInBackup(); /// Check access rights. checkAccessForObjectsFoundInBackup(); if (mode == Mode::CHECK_ACCESS_ONLY) return {}; /// Create databases using the create queries read from the backup. setStatus(kCreatingDatabasesStatus); createDatabases(); /// Create tables using the create queries read from the backup. setStatus(kCreatingTablesStatus); createTables(); /// All what's left is to insert data to tables. /// No more data restoring tasks are allowed after this point. setStatus(kInsertingDataToTablesStatus); return getDataRestoreTasks(); } catch (...) { try { /// Other hosts should know that we've encountered an error. setStatus(kErrorStatus, getCurrentExceptionMessage(false)); } catch (...) { } throw; } } void RestorerFromBackup::setStatus(const String & new_status, const String & message) { if (new_status == kErrorStatus) { LOG_ERROR(log, "{} failed with {}", toUpperFirst(current_status), message); if (restore_coordination) restore_coordination->setStatus(restore_settings.host_id, new_status, message); } else { LOG_TRACE(log, "{}", toUpperFirst(new_status)); current_status = new_status; if (restore_coordination) restore_coordination->setStatusAndWait(restore_settings.host_id, new_status, message, all_hosts); } } void RestorerFromBackup::findRootPathsInBackup() { size_t shard_num = 1; size_t replica_num = 1; if (!restore_settings.host_id.empty()) { std::tie(shard_num, replica_num) = BackupSettings::Util::findShardNumAndReplicaNum(restore_settings.cluster_host_ids, restore_settings.host_id); } root_paths_in_backup.clear(); /// Start with "" as the root path and then we will add shard- and replica-related part to it. fs::path root_path = "/"; root_paths_in_backup.push_back(root_path); /// Add shard-related part to the root path. Strings shards_in_backup = backup->listFiles(root_path / "shards"); if (shards_in_backup.empty()) { if (restore_settings.shard_num_in_backup > 1) throw Exception(ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "No shard #{} in backup", restore_settings.shard_num_in_backup); } else { String shard_name; if (restore_settings.shard_num_in_backup) shard_name = std::to_string(restore_settings.shard_num_in_backup); else if (shards_in_backup.size() == 1) shard_name = shards_in_backup.front(); else shard_name = std::to_string(shard_num); if (std::find(shards_in_backup.begin(), shards_in_backup.end(), shard_name) == shards_in_backup.end()) throw Exception(ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "No shard #{} in backup", shard_name); root_path = root_path / "shards" / shard_name; root_paths_in_backup.push_back(root_path); } /// Add replica-related part to the root path. Strings replicas_in_backup = backup->listFiles(root_path / "replicas"); if (replicas_in_backup.empty()) { if (restore_settings.replica_num_in_backup > 1) throw Exception(ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "No replica #{} in backup", restore_settings.replica_num_in_backup); } else { String replica_name; if (restore_settings.replica_num_in_backup) { replica_name = std::to_string(restore_settings.replica_num_in_backup); if (std::find(replicas_in_backup.begin(), replicas_in_backup.end(), replica_name) == replicas_in_backup.end()) throw Exception(ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "No replica #{} in backup", replica_name); } else { replica_name = std::to_string(replica_num); if (std::find(replicas_in_backup.begin(), replicas_in_backup.end(), replica_name) == replicas_in_backup.end()) replica_name = replicas_in_backup.front(); } root_path = root_path / "replicas" / replica_name; root_paths_in_backup.push_back(root_path); } /// Revert the list of root paths, because we need it in the following order: /// "/shards//replicas//" (first we search tables here) /// "/shards//" (then here) /// "/" (and finally here) std::reverse(root_paths_in_backup.begin(), root_paths_in_backup.end()); LOG_TRACE( log, "Will use paths in backup: {}", boost::algorithm::join( root_paths_in_backup | boost::adaptors::transformed([](const fs::path & path) -> String { return doubleQuoteString(String{path}); }), ", ")); } void RestorerFromBackup::findDatabasesAndTablesInBackup() { database_infos.clear(); table_infos.clear(); for (const auto & element : restore_query_elements) { switch (element.type) { case ASTBackupQuery::ElementType::TABLE: { findTableInBackup({element.database_name, element.table_name}, element.partitions); break; } case ASTBackupQuery::ElementType::TEMPORARY_TABLE: { findTableInBackup({DatabaseCatalog::TEMPORARY_DATABASE, element.table_name}, element.partitions); break; } case ASTBackupQuery::ElementType::DATABASE: { findDatabaseInBackup(element.database_name, element.except_tables); break; } case ASTBackupQuery::ElementType::ALL: { findEverythingInBackup(element.except_databases, element.except_tables); break; } } } LOG_INFO(log, "Will restore {} databases and {} tables", database_infos.size(), table_infos.size()); } void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name_in_backup, const std::optional & partitions) { bool is_temporary_table = (table_name_in_backup.database == DatabaseCatalog::TEMPORARY_DATABASE); std::optional metadata_path; std::optional root_path_in_use; for (const auto & root_path_in_backup : root_paths_in_backup) { fs::path try_metadata_path; if (is_temporary_table) { try_metadata_path = root_path_in_backup / "temporary_tables" / "metadata" / (escapeForFileName(table_name_in_backup.table) + ".sql"); } else { try_metadata_path = root_path_in_backup / "metadata" / escapeForFileName(table_name_in_backup.database) / (escapeForFileName(table_name_in_backup.table) + ".sql"); } if (backup->fileExists(try_metadata_path)) { metadata_path = try_metadata_path; root_path_in_use = root_path_in_backup; break; } } if (!metadata_path) throw Exception( ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "{} not found in backup", tableNameWithTypeToString(table_name_in_backup.database, table_name_in_backup.table, true)); fs::path data_path_in_backup; if (is_temporary_table) { data_path_in_backup = *root_path_in_use / "temporary_tables" / "data" / escapeForFileName(table_name_in_backup.table); } else { data_path_in_backup = *root_path_in_use / "data" / escapeForFileName(table_name_in_backup.database) / escapeForFileName(table_name_in_backup.table); } auto read_buffer = backup->readFile(*metadata_path)->getReadBuffer(); String create_query_str; readStringUntilEOF(create_query_str, *read_buffer); read_buffer.reset(); ParserCreateQuery create_parser; ASTPtr create_table_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); renameDatabaseAndTableNameInCreateQuery(create_table_query, renaming_map, context->getGlobalContext()); QualifiedTableName table_name = renaming_map.getNewTableName(table_name_in_backup); if (auto it = table_infos.find(table_name); it != table_infos.end()) { const TableInfo & table_info = it->second; if (table_info.create_table_query && (serializeAST(*table_info.create_table_query) != serializeAST(*create_table_query))) { throw Exception( ErrorCodes::CANNOT_RESTORE_TABLE, "Extracted two different create queries for the same {}: {} and {}", tableNameWithTypeToString(table_name.database, table_name.table, false), serializeAST(*table_info.create_table_query), serializeAST(*create_table_query)); } } TableInfo & res_table_info = table_infos[table_name]; res_table_info.create_table_query = create_table_query; res_table_info.is_predefined_table = DatabaseCatalog::instance().isPredefinedTable(StorageID{table_name.database, table_name.table}); res_table_info.data_path_in_backup = data_path_in_backup; res_table_info.dependencies = getDependenciesSetFromCreateQuery(context->getGlobalContext(), table_name, create_table_query); if (partitions) { if (!res_table_info.partitions) res_table_info.partitions.emplace(); insertAtEnd(*res_table_info.partitions, *partitions); } if (isSystemAccessTableName(table_name)) { if (!access_restore_task) access_restore_task = std::make_shared(backup, restore_settings, restore_coordination); access_restore_task->addDataPath(data_path_in_backup); } } void RestorerFromBackup::findDatabaseInBackup(const String & database_name_in_backup, const std::set & except_table_names) { std::optional metadata_path; std::unordered_set table_names_in_backup; for (const auto & root_path_in_backup : root_paths_in_backup) { fs::path try_metadata_path, try_tables_metadata_path; if (database_name_in_backup == DatabaseCatalog::TEMPORARY_DATABASE) { try_tables_metadata_path = root_path_in_backup / "temporary_tables" / "metadata"; } else { try_metadata_path = root_path_in_backup / "metadata" / (escapeForFileName(database_name_in_backup) + ".sql"); try_tables_metadata_path = root_path_in_backup / "metadata" / escapeForFileName(database_name_in_backup); } if (!metadata_path && !try_metadata_path.empty() && backup->fileExists(try_metadata_path)) metadata_path = try_metadata_path; Strings file_names = backup->listFiles(try_tables_metadata_path); for (const String & file_name : file_names) { if (!file_name.ends_with(".sql")) continue; String file_name_without_ext = file_name.substr(0, file_name.length() - strlen(".sql")); table_names_in_backup.insert(unescapeForFileName(file_name_without_ext)); } } if (!metadata_path && table_names_in_backup.empty()) throw Exception(ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Database {} not found in backup", backQuoteIfNeed(database_name_in_backup)); if (metadata_path) { auto read_buffer = backup->readFile(*metadata_path)->getReadBuffer(); String create_query_str; readStringUntilEOF(create_query_str, *read_buffer); read_buffer.reset(); ParserCreateQuery create_parser; ASTPtr create_database_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); renameDatabaseAndTableNameInCreateQuery(create_database_query, renaming_map, context->getGlobalContext()); String database_name = renaming_map.getNewDatabaseName(database_name_in_backup); DatabaseInfo & database_info = database_infos[database_name]; if (database_info.create_database_query && (serializeAST(*database_info.create_database_query) != serializeAST(*create_database_query))) { throw Exception( ErrorCodes::CANNOT_RESTORE_DATABASE, "Extracted two different create queries for the same database {}: {} and {}", backQuoteIfNeed(database_name), serializeAST(*database_info.create_database_query), serializeAST(*create_database_query)); } database_info.create_database_query = create_database_query; database_info.is_predefined_database = DatabaseCatalog::isPredefinedDatabase(database_name); } for (const String & table_name_in_backup : table_names_in_backup) { if (except_table_names.contains({database_name_in_backup, table_name_in_backup})) continue; findTableInBackup({database_name_in_backup, table_name_in_backup}, /* partitions= */ {}); } } void RestorerFromBackup::findEverythingInBackup(const std::set & except_database_names, const std::set & except_table_names) { std::unordered_set database_names_in_backup; for (const auto & root_path_in_backup : root_paths_in_backup) { Strings file_names = backup->listFiles(root_path_in_backup / "metadata"); for (String & file_name : file_names) { if (file_name.ends_with(".sql")) file_name.resize(file_name.length() - strlen(".sql")); database_names_in_backup.emplace(unescapeForFileName(file_name)); } if (backup->hasFiles(root_path_in_backup / "temporary_tables" / "metadata")) database_names_in_backup.emplace(DatabaseCatalog::TEMPORARY_DATABASE); } for (const String & database_name_in_backup : database_names_in_backup) { if (except_database_names.contains(database_name_in_backup)) continue; findDatabaseInBackup(database_name_in_backup, except_table_names); } } void RestorerFromBackup::checkAccessForObjectsFoundInBackup() const { AccessRightsElements required_access; for (const auto & [database_name, database_info] : database_infos) { if (database_info.is_predefined_database) continue; AccessFlags flags; if (restore_settings.create_database != RestoreDatabaseCreationMode::kMustExist) flags |= AccessType::CREATE_DATABASE; if (!flags) flags = AccessType::SHOW_DATABASES; required_access.emplace_back(flags, database_name); } for (const auto & [table_name, table_info] : table_infos) { /// Access required to restore ACL system tables is checked separately. if (table_info.is_predefined_table) continue; if (table_name.database == DatabaseCatalog::TEMPORARY_DATABASE) { if (restore_settings.create_table != RestoreTableCreationMode::kMustExist) required_access.emplace_back(AccessType::CREATE_TEMPORARY_TABLE); continue; } AccessFlags flags; const ASTCreateQuery & create = table_info.create_table_query->as(); if (restore_settings.create_table != RestoreTableCreationMode::kMustExist) { if (create.is_dictionary) flags |= AccessType::CREATE_DICTIONARY; else if (create.is_ordinary_view || create.is_materialized_view || create.is_live_view) flags |= AccessType::CREATE_VIEW; else flags |= AccessType::CREATE_TABLE; } if (!restore_settings.structure_only && !create.is_dictionary && !create.is_ordinary_view && backup->hasFiles(table_info.data_path_in_backup)) { flags |= AccessType::INSERT; } if (!flags) { if (create.is_dictionary) flags = AccessType::SHOW_DICTIONARIES; else flags = AccessType::SHOW_TABLES; } required_access.emplace_back(flags, table_name.database, table_name.table); } if (access_restore_task) insertAtEnd(required_access, access_restore_task->getRequiredAccess()); /// We convert to AccessRights and back to check access rights in a predictable way /// (some elements could be duplicated or not sorted). required_access = AccessRights{required_access}.getElements(); context->checkAccess(required_access); } void RestorerFromBackup::createDatabases() { for (const auto & [database_name, database_info] : database_infos) { bool need_create_database = (restore_settings.create_database != RestoreDatabaseCreationMode::kMustExist); if (database_info.is_predefined_database) need_create_database = false; /// Predefined databases always exist. if (need_create_database) { /// Execute CREATE DATABASE query. auto create_database_query = database_info.create_database_query; if (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists) { create_database_query = create_database_query->clone(); create_database_query->as().if_not_exists = true; } LOG_TRACE(log, "Creating database {}: {}", backQuoteIfNeed(database_name), serializeAST(*create_database_query)); InterpreterCreateQuery interpreter{create_database_query, context}; interpreter.setInternal(true); interpreter.execute(); } DatabasePtr database = DatabaseCatalog::instance().getDatabase(database_name); if (!restore_settings.allow_different_database_def && !database_info.is_predefined_database) { /// Check that the database's definition is the same as expected. ASTPtr create_database_query = database->getCreateDatabaseQuery(); adjustCreateQueryForBackup(create_database_query, context->getGlobalContext(), nullptr); ASTPtr expected_create_query = database_info.create_database_query; if (serializeAST(*create_database_query) != serializeAST(*expected_create_query)) { throw Exception( ErrorCodes::CANNOT_RESTORE_DATABASE, "The database {} has a different definition: {} " "comparing to its definition in the backup: {}", backQuoteIfNeed(database_name), serializeAST(*create_database_query), serializeAST(*expected_create_query)); } } } } void RestorerFromBackup::createTables() { while (true) { /// We need to create tables considering their dependencies. auto tables_to_create = findTablesWithoutDependencies(); if (tables_to_create.empty()) break; /// We've already created all the tables. for (const auto & table_name : tables_to_create) { auto & table_info = table_infos.at(table_name); DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_name.database); bool need_create_table = (restore_settings.create_table != RestoreTableCreationMode::kMustExist); if (table_info.is_predefined_table) need_create_table = false; /// Predefined tables always exist. if (need_create_table) { auto create_table_query = table_info.create_table_query; if (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists) { create_table_query = create_table_query->clone(); create_table_query->as().if_not_exists = true; } LOG_TRACE( log, "Creating {}: {}", tableNameWithTypeToString(table_name.database, table_name.table, false), serializeAST(*create_table_query)); /// Execute CREATE TABLE query (we call IDatabase::createTableRestoredFromBackup() to allow the database to do some /// database-specific things). database->createTableRestoredFromBackup( create_table_query, context, restore_coordination, std::chrono::duration_cast(create_table_timeout).count()); } table_info.created = true; auto resolved_id = (table_name.database == DatabaseCatalog::TEMPORARY_DATABASE) ? context->resolveStorageID(StorageID{"", table_name.table}, Context::ResolveExternal) : context->resolveStorageID(StorageID{table_name.database, table_name.table}, Context::ResolveGlobal); auto storage = database->getTable(resolved_id.table_name, context); table_info.storage = storage; table_info.table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); if (!restore_settings.allow_different_table_def && !table_info.is_predefined_table) { ASTPtr create_table_query = database->getCreateTableQuery(resolved_id.table_name, context); adjustCreateQueryForBackup(create_table_query, context->getGlobalContext(), nullptr); ASTPtr expected_create_query = table_info.create_table_query; if (serializeAST(*create_table_query) != serializeAST(*expected_create_query)) { throw Exception( ErrorCodes::CANNOT_RESTORE_TABLE, "{} has a different definition: {} " "comparing to its definition in the backup: {}", tableNameWithTypeToString(table_name.database, table_name.table, true), serializeAST(*create_table_query), serializeAST(*expected_create_query)); } } if (!restore_settings.structure_only) { const auto & data_path_in_backup = table_info.data_path_in_backup; const auto & partitions = table_info.partitions; storage->restoreDataFromBackup(*this, data_path_in_backup, partitions); } } } } /// Returns the list of tables without dependencies or those which dependencies have been created before. std::vector RestorerFromBackup::findTablesWithoutDependencies() const { std::vector tables_without_dependencies; bool all_tables_created = true; for (const auto & [key, table_info] : table_infos) { if (table_info.created) continue; /// Found a table which is not created yet. all_tables_created = false; /// Check if all dependencies have been created before. bool all_dependencies_met = true; for (const auto & dependency : table_info.dependencies) { auto it = table_infos.find(dependency); if ((it != table_infos.end()) && !it->second.created) { all_dependencies_met = false; break; } } if (all_dependencies_met) tables_without_dependencies.push_back(key); } if (!tables_without_dependencies.empty()) return tables_without_dependencies; if (all_tables_created) return {}; /// Cyclic dependency? We'll try to create those tables anyway but probably it's going to fail. std::vector tables_with_cyclic_dependencies; for (const auto & [key, table_info] : table_infos) { if (!table_info.created) tables_with_cyclic_dependencies.push_back(key); } /// Only show a warning here, proper exception will be thrown later on creating those tables. LOG_WARNING( log, "Some tables have cyclic dependency from each other: {}", boost::algorithm::join( tables_with_cyclic_dependencies | boost::adaptors::transformed([](const QualifiedTableName & table_name) -> String { return table_name.getFullName(); }), ", ")); return tables_with_cyclic_dependencies; } void RestorerFromBackup::addDataRestoreTask(DataRestoreTask && new_task) { if (current_status == kInsertingDataToTablesStatus) throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding data-restoring tasks is not allowed"); data_restore_tasks.push_back(std::move(new_task)); } void RestorerFromBackup::addDataRestoreTasks(DataRestoreTasks && new_tasks) { if (current_status == kInsertingDataToTablesStatus) throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding data-restoring tasks is not allowed"); insertAtEnd(data_restore_tasks, std::move(new_tasks)); } void RestorerFromBackup::checkPathInBackupIsRegisteredToRestoreAccess(const String & path) { if (!access_restore_task || !access_restore_task->hasDataPath(path)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Path to restore access was not added"); } RestorerFromBackup::DataRestoreTasks RestorerFromBackup::getDataRestoreTasks() { if (data_restore_tasks.empty() && !access_restore_task) return {}; LOG_TRACE(log, "Will insert data to tables"); /// Storages and table locks must exist while we're executing data restoring tasks. auto storages = std::make_shared>(); auto table_locks = std::make_shared>(); storages->reserve(table_infos.size()); table_locks->reserve(table_infos.size()); for (const auto & table_info : table_infos | boost::adaptors::map_values) { storages->push_back(table_info.storage); table_locks->push_back(table_info.table_lock); } DataRestoreTasks res_tasks; for (const auto & task : data_restore_tasks) res_tasks.push_back([task, storages, table_locks] { task(); }); if (access_restore_task) res_tasks.push_back([task = access_restore_task, access_control = &context->getAccessControl()] { task->restore(*access_control); }); return res_tasks; } void RestorerFromBackup::throwPartitionsNotSupported(const StorageID & storage_id, const String & table_engine) { throw Exception( ErrorCodes::CANNOT_RESTORE_TABLE, "Table engine {} doesn't support partitions, cannot table {}", table_engine, storage_id.getFullTableName()); } void RestorerFromBackup::throwTableIsNotEmpty(const StorageID & storage_id) { throw Exception( ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore the table {} because it already contains some data. You can set structure_only=true or " "allow_non_empty_tables=true to overcome that in the way you want", storage_id.getFullTableName()); } }