Improve gathering metadata for backup - part 2.

This commit is contained in:
Vitaly Baranov 2022-06-23 12:17:54 +02:00
parent 64b51a3772
commit 461a31f237
14 changed files with 146 additions and 142 deletions

View File

@ -21,7 +21,7 @@ namespace DB
namespace ErrorCodes
{
extern const int CANNOT_COLLECT_OBJECTS_FOR_BACKUP;
extern const int INCONSISTENT_METADATA_FOR_BACKUP;
extern const int CANNOT_BACKUP_TABLE;
extern const int TABLE_IS_DROPPED;
extern const int LOGICAL_ERROR;
@ -162,37 +162,37 @@ void BackupEntriesCollector::gatherMetadataAndCheckConsistency()
bool use_timeout = (timeout.count() >= 0);
auto start_time = std::chrono::steady_clock::now();
int pass = 1;
for (;;)
for (size_t pass = 1;; ++pass)
{
consistency = true;
/// Collect information about databases and tables specified in the BACKUP query.
gatherDatabasesMetadata();
gatherTablesMetadata();
/// We have to check consistency of collected information to protect from the case when some table or database is
/// renamed during this collecting making the collected information invalid.
checkConsistency();
if (consistency)
break;
/// Two passes is absolute minimum (see `previous_table_names` & `previous_database_names`).
auto elapsed = std::chrono::steady_clock::now() - start_time;
if ((pass >= 2) && use_timeout)
try
{
if (elapsed > timeout)
throw Exception(
ErrorCodes::CANNOT_COLLECT_OBJECTS_FOR_BACKUP,
"Couldn't collect tables and databases to make a backup (pass #{}, elapsed {})",
pass,
to_string(elapsed));
}
/// Collect information about databases and tables specified in the BACKUP query.
database_infos.clear();
table_infos.clear();
gatherDatabasesMetadata();
gatherTablesMetadata();
if (pass >= 2)
LOG_WARNING(log, "Couldn't collect tables and databases to make a backup (pass #{}, elapsed {})", pass, to_string(elapsed));
++pass;
/// We have to check consistency of collected information to protect from the case when some table or database is
/// renamed during this collecting making the collected information invalid.
auto comparing_error = compareWithPrevious();
if (!comparing_error)
break; /// no error, everything's fine
if (pass >= 2) /// Two passes is minimum (we need to compare with table names with previous ones to be sure we don't miss anything).
throw *comparing_error;
}
catch (Exception & e)
{
if (e.code() != ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP)
throw;
auto elapsed = std::chrono::steady_clock::now() - start_time;
e.addMessage("Couldn't gather tables and databases to make a backup (pass #{}, elapsed {})", pass, to_string(elapsed));
if (use_timeout && (elapsed > timeout))
throw;
else
LOG_WARNING(log, "{}", e.displayText());
}
}
LOG_INFO(log, "Will backup {} databases and {} tables", database_infos.size(), table_infos.size());
@ -200,8 +200,6 @@ void BackupEntriesCollector::gatherMetadataAndCheckConsistency()
void BackupEntriesCollector::gatherDatabasesMetadata()
{
database_infos.clear();
/// Collect information about databases and tables specified in the BACKUP query.
for (const auto & element : backup_query_elements)
{
@ -264,16 +262,11 @@ void BackupEntriesCollector::gatherDatabasesMetadata()
/* partitions= */ {},
/* all_tables= */ true,
/* except_table_names= */ element.except_tables);
if (!consistency)
return;
}
}
break;
}
}
if (!consistency)
return;
}
}
@ -318,20 +311,14 @@ void BackupEntriesCollector::gatherDatabaseMetadata(
}
catch (...)
{
/// The database has been dropped recently.
consistency = false;
return;
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Couldn't get a create query for database {}", database_name);
}
database_info.create_database_query = create_database_query;
const auto & create = create_database_query->as<const ASTCreateQuery &>();
if (create.getDatabase() != database_name)
{
/// The database has been renamed recently.
consistency = false;
return;
}
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with unexpected name {} for database {}", backQuoteIfNeed(create.getDatabase()), backQuoteIfNeed(database_name));
}
if (table_name)
@ -358,9 +345,6 @@ void BackupEntriesCollector::gatherDatabaseMetadata(
void BackupEntriesCollector::gatherTablesMetadata()
{
if (!consistency)
return;
table_infos.clear();
for (const auto & [database_name, database_info] : database_infos)
{
@ -382,12 +366,8 @@ void BackupEntriesCollector::gatherTablesMetadata()
return false;
};
auto db_tables = database->getTablesForBackup(filter_by_table_name, context, consistency);
auto db_tables = database->getTablesForBackup(filter_by_table_name, context);
if (!consistency)
return;
/// Check that all tables were found.
std::unordered_set<String> found_table_names;
for (const auto & db_table : db_tables)
{
@ -395,13 +375,14 @@ void BackupEntriesCollector::gatherTablesMetadata()
const auto & create = create_table_query->as<const ASTCreateQuery &>();
found_table_names.emplace(create.getTable());
if ((is_temporary_database && !create.temporary) || (!is_temporary_database && (create.getDatabase() != database_name)))
{
consistency = false;
return;
}
if (is_temporary_database && !create.temporary)
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a non-temporary create query for {}", tableNameWithTypeToString(database_name, create.getTable(), false));
if (!is_temporary_database && (create.getDatabase() != database_name))
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with unexpected database name {} for {}", backQuoteIfNeed(create.getDatabase()), tableNameWithTypeToString(database_name, create.getTable(), false));
}
/// Check that all tables were found.
for (const auto & [table_name, table_info] : database_info.tables)
{
if (table_info.throw_if_table_not_found && !found_table_names.contains(table_name))
@ -443,10 +424,7 @@ void BackupEntriesCollector::gatherTablesMetadata()
void BackupEntriesCollector::lockTablesForReading()
{
if (!consistency)
return;
for (auto & table_info : table_infos | boost::adaptors::map_values)
for (auto & [table_name, table_info] : table_infos)
{
auto storage = table_info.storage;
TableLockHolder table_lock;
@ -460,19 +438,15 @@ void BackupEntriesCollector::lockTablesForReading()
{
if (e.code() != ErrorCodes::TABLE_IS_DROPPED)
throw;
consistency = false;
return;
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "{} is dropped", tableNameWithTypeToString(table_name.database, table_name.table, true));
}
}
}
}
/// Check for consistency of collected information about databases and tables.
void BackupEntriesCollector::checkConsistency()
/// Check consistency of collected information about databases and tables.
std::optional<Exception> BackupEntriesCollector::compareWithPrevious()
{
if (!consistency)
return; /// Already inconsistent, no more checks necessary
/// We need to scan tables at least twice to be sure that we haven't missed any table which could be renamed
/// while we were scanning.
std::set<String> database_names;
@ -480,12 +454,62 @@ void BackupEntriesCollector::checkConsistency()
boost::range::copy(database_infos | boost::adaptors::map_keys, std::inserter(database_names, database_names.end()));
boost::range::copy(table_infos | boost::adaptors::map_keys, std::inserter(table_names, table_names.end()));
if ((previous_database_names != database_names) || (previous_table_names != table_names))
if (previous_database_names != database_names)
{
std::optional<Exception> comparing_error;
for (const auto & database_name : database_names)
{
if (!previous_database_names.contains(database_name))
{
comparing_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Database {} were added during scanning", backQuoteIfNeed(database_name)};
break;
}
}
if (!comparing_error)
{
for (const auto & database_name : previous_database_names)
{
if (!database_names.contains(database_name))
{
comparing_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Database {} were removed during scanning", backQuoteIfNeed(database_name)};
break;
}
}
}
assert(comparing_error);
previous_database_names = std::move(database_names);
previous_table_names = std::move(table_names);
consistency = false;
return comparing_error;
}
if (previous_table_names != table_names)
{
std::optional<Exception> comparing_error;
for (const auto & table_name : table_names)
{
if (!previous_table_names.contains(table_name))
{
comparing_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "{} were added during scanning", tableNameWithTypeToString(table_name.database, table_name.table, true)};
break;
}
}
if (!comparing_error)
{
for (const auto & table_name : previous_table_names)
{
if (!table_names.contains(table_name))
{
comparing_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "{} were removed during scanning", tableNameWithTypeToString(table_name.database, table_name.table, true)};
break;
}
}
}
assert(comparing_error);
previous_table_names = std::move(table_names);
return comparing_error;
}
return {};
}
/// Make backup entries for all the definitions of all the databases found.

View File

@ -98,7 +98,7 @@ private:
void gatherTablesMetadata();
void lockTablesForReading();
void checkConsistency();
std::optional<Exception> compareWithPrevious();
void makeBackupEntriesForDatabasesDefs();
void makeBackupEntriesForTablesDefs();
@ -147,7 +147,6 @@ private:
std::map<QualifiedTableName, TableInfo> table_infos;
std::set<String> previous_database_names;
std::set<QualifiedTableName> previous_table_names;
bool consistency = false;
BackupEntries backup_entries;
std::queue<std::function<void()>> post_tasks;

View File

@ -691,8 +691,7 @@ void RestorerFromBackup::createTables()
if (!restore_settings.allow_different_table_def)
{
ASTPtr create_table_query = database->getCreateTableQuery(resolved_id.table_name, context);
bool consistency = true;
storage->adjustCreateQueryForBackup(create_table_query, consistency);
storage->adjustCreateQueryForBackup(create_table_query);
ASTPtr expected_create_query = table_info.create_table_query;
if (serializeAST(*create_table_query) != serializeAST(*expected_create_query))
{

View File

@ -631,7 +631,7 @@
M(660, HDFS_ERROR) \
M(661, CANNOT_SEND_SIGNAL) \
M(662, FS_METADATA_ERROR) \
M(663, CANNOT_COLLECT_OBJECTS_FOR_BACKUP) \
M(663, INCONSISTENT_METADATA_FOR_BACKUP) \
M(664, ACCESS_STORAGE_DOESNT_ALLOW_BACKUP) \
\
M(999, KEEPER_EXCEPTION) \

View File

@ -19,6 +19,7 @@ namespace ErrorCodes
{
extern const int UNKNOWN_TABLE;
extern const int LOGICAL_ERROR;
extern const int INCONSISTENT_METADATA_FOR_BACKUP;
}
DatabaseMemory::DatabaseMemory(const String & name_, ContextPtr context_)
@ -145,11 +146,11 @@ void DatabaseMemory::alterTable(ContextPtr local_context, const StorageID & tabl
DatabaseCatalog::instance().updateLoadingDependencies(table_id, std::move(new_dependencies));
}
std::vector<std::pair<ASTPtr, StoragePtr>> DatabaseMemory::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context, bool & consistency) const
std::vector<std::pair<ASTPtr, StoragePtr>> DatabaseMemory::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const
{
/// We need a special processing for the temporary database.
if (getDatabaseName() != DatabaseCatalog::TEMPORARY_DATABASE)
return DatabaseWithOwnTablesBase::getTablesForBackup(filter, local_context, consistency);
return DatabaseWithOwnTablesBase::getTablesForBackup(filter, local_context);
std::vector<std::pair<ASTPtr, StoragePtr>> res;
@ -162,32 +163,22 @@ std::vector<std::pair<ASTPtr, StoragePtr>> DatabaseMemory::getTablesForBackup(co
if (!filter(table_name))
continue;
bool ok = false;
if (auto storage_id = local_context->tryResolveStorageID(StorageID{"", table_name}, Context::ResolveExternal))
{
/// Here `storage_id.table_name` looks like looks like "_tmp_ab9b15a3-fb43-4670-abec-14a0e9eb70f1"
/// it's not the real name of the table.
if (auto create_table_query = tryGetCreateTableQuery(storage_id.table_name, local_context))
{
const auto & create = create_table_query->as<const ASTCreateQuery &>();
if (create.getTable() == table_name)
{
storage->adjustCreateQueryForBackup(create_table_query, consistency);
if (consistency)
{
res.emplace_back(create_table_query, storage);
ok = true;
}
}
}
}
auto storage_id = local_context->tryResolveStorageID(StorageID{"", table_name}, Context::ResolveExternal);
if (!storage_id)
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Couldn't resolve the name of temporary table {}", backQuoteIfNeed(table_name));
if (!ok)
{
consistency = false;
return {};
}
/// Here `storage_id.table_name` looks like looks like "_tmp_ab9b15a3-fb43-4670-abec-14a0e9eb70f1"
/// it's not the real name of the table.
auto create_table_query = tryGetCreateTableQuery(storage_id.table_name, local_context);
if (!create_table_query)
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Couldn't get a create query for temporary table {}", backQuoteIfNeed(table_name));
const auto & create = create_table_query->as<const ASTCreateQuery &>();
if (create.getTable() != table_name)
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with unexpected name {} for temporary table {}", backQuoteIfNeed(create.getTable()), backQuoteIfNeed(table_name));
storage->adjustCreateQueryForBackup(create_table_query);
res.emplace_back(create_table_query, storage);
}
return res;

View File

@ -50,7 +50,7 @@ public:
void alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata) override;
std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context, bool & consistency) const override;
std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override;
private:
const String data_path;

View File

@ -25,6 +25,7 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
extern const int LOGICAL_ERROR;
extern const int CANNOT_GET_CREATE_TABLE_QUERY;
extern const int INCONSISTENT_METADATA_FOR_BACKUP;
}
void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemoryMetadata & metadata)
@ -322,34 +323,23 @@ StoragePtr DatabaseWithOwnTablesBase::getTableUnlocked(const String & table_name
backQuote(database_name), backQuote(table_name));
}
std::vector<std::pair<ASTPtr, StoragePtr>> DatabaseWithOwnTablesBase::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context, bool & consistency) const
std::vector<std::pair<ASTPtr, StoragePtr>> DatabaseWithOwnTablesBase::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const
{
std::vector<std::pair<ASTPtr, StoragePtr>> res;
for (auto it = getTablesIterator(local_context, filter); it->isValid(); it->next())
{
bool ok = false;
auto create_table_query = tryGetCreateTableQuery(it->name(), local_context);
if (!create_table_query)
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Couldn't get a create query for table {}.{}", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(it->name()));
if (auto create_table_query = tryGetCreateTableQuery(it->name(), local_context))
{
const auto & create = create_table_query->as<const ASTCreateQuery &>();
if (create.getTable() == it->name())
{
auto storage = it->table();
storage->adjustCreateQueryForBackup(create_table_query, consistency);
if (consistency)
{
res.emplace_back(create_table_query, storage);
ok = true;
}
}
}
const auto & create = create_table_query->as<const ASTCreateQuery &>();
if (create.getTable() != it->name())
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with unexpected name {} for table {}.{}", backQuoteIfNeed(create.getTable()), backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(it->name()));
if (!ok)
{
consistency = false;
return {};
}
auto storage = it->table();
storage->adjustCreateQueryForBackup(create_table_query);
res.emplace_back(create_table_query, storage);
}
return res;

View File

@ -36,7 +36,7 @@ public:
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;
std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context, bool & consistency) const override;
std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override;
void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr<IRestoreCoordination> restore_coordination, UInt64 timeout_ms) override;
void shutdown() override;

View File

@ -32,7 +32,7 @@ ASTPtr IDatabase::getCreateDatabaseQueryForBackup() const
return query;
}
std::vector<std::pair<ASTPtr, StoragePtr>> IDatabase::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &, bool &) const
std::vector<std::pair<ASTPtr, StoragePtr>> IDatabase::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const
{
/// Cannot restore any table because IDatabase doesn't own any tables.
throw Exception(ErrorCodes::CANNOT_BACKUP_TABLE,

View File

@ -336,7 +336,7 @@ public:
virtual ASTPtr getCreateDatabaseQueryForBackup() const;
/// Returns CREATE TABLE queries and corresponding tables prepared for writing to a backup.
virtual std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & context, bool & consistency) const;
virtual std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & context) const;
/// Creates a table restored from backup.
virtual void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr context, std::shared_ptr<IRestoreCoordination> restore_coordination, UInt64 timeout_ms);

View File

@ -24,6 +24,7 @@ namespace ErrorCodes
extern const int TABLE_IS_DROPPED;
extern const int NOT_IMPLEMENTED;
extern const int DEADLOCK_AVOIDED;
extern const int INCONSISTENT_METADATA_FOR_BACKUP;
}
bool IStorage::isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const
@ -248,7 +249,7 @@ bool IStorage::isStaticStorage() const
return false;
}
void IStorage::adjustCreateQueryForBackup(ASTPtr & create_query, bool &) const
void IStorage::adjustCreateQueryForBackup(ASTPtr & create_query) const
{
create_query = create_query->clone();
@ -260,6 +261,13 @@ void IStorage::adjustCreateQueryForBackup(ASTPtr & create_query, bool &) const
/// If this is a definition of a system table we'll remove columns and comment because they're reduntant for backups.
if (isSystemStorage())
{
if (!create.storage || !create.storage->engine)
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query without table engine for a system table {}", getStorageID().getFullTableName());
auto & engine = *(create.storage->engine);
if (!engine.name.starts_with("System"))
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with an unexpected table engine {} for a system table {}", engine.name, getStorageID().getFullTableName());
create.reset(create.columns_list);
create.reset(create.comment);
}

View File

@ -224,7 +224,7 @@ public:
bool isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const;
/// Modify a CREATE TABLE query to make a variant which must be written to a backup.
virtual void adjustCreateQueryForBackup(ASTPtr & create_query, bool & consistency) const;
virtual void adjustCreateQueryForBackup(ASTPtr & create_query) const;
/// Makes backup entries to backup the data of this storage.
virtual void backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional<ASTs> & partitions);

View File

@ -158,6 +158,7 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
extern const int CONCURRENT_ACCESS_NOT_SUPPORTED;
extern const int CHECKSUM_DOESNT_MATCH;
extern const int INCONSISTENT_METADATA_FOR_BACKUP;
}
namespace ActionLocks
@ -8253,9 +8254,9 @@ void StorageReplicatedMergeTree::createAndStoreFreezeMetadata(DiskPtr disk, Data
}
void StorageReplicatedMergeTree::adjustCreateQueryForBackup(ASTPtr & create_query, bool & consistency) const
void StorageReplicatedMergeTree::adjustCreateQueryForBackup(ASTPtr & create_query) const
{
MergeTreeData::adjustCreateQueryForBackup(create_query, consistency);
MergeTreeData::adjustCreateQueryForBackup(create_query);
/// Before storing the metadata in a backup we have to find a zookeeper path in its definition and turn the table's UUID in there
/// back into "{uuid}", and also we probably can remove the zookeeper path and replica name if they're default.
@ -8263,19 +8264,11 @@ void StorageReplicatedMergeTree::adjustCreateQueryForBackup(ASTPtr & create_quer
auto & create = create_query->as<ASTCreateQuery &>();
if (!create.storage || !create.storage->engine)
{
/// The CREATE query doesn't correspond to this storage.
consistency = false;
return;
}
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query without table engine for a replicated table {}", getStorageID().getFullTableName());
auto & engine = *(create.storage->engine);
if (!engine.name.starts_with("Replicated") || !engine.name.ends_with("MergeTree"))
{
/// The CREATE query doesn't correspond to this storage.
consistency = false;
return;
}
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with an unexpected table engine {} for a replicated table {}", engine.name, getStorageID().getFullTableName());
if (create.uuid == UUIDHelpers::Nil)
return;

View File

@ -233,7 +233,7 @@ public:
int getMetadataVersion() const { return metadata_version; }
/// Modify a CREATE TABLE query to make a variant which must be written to a backup.
void adjustCreateQueryForBackup(ASTPtr & create_query, bool & consistency) const override;
void adjustCreateQueryForBackup(ASTPtr & create_query) const override;
/// Makes backup entries to backup the data of the storage.
void backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional<ASTs> & partitions) override;