Merge pull request #62295 from vitlibar/fix-backup-and-restore-of-matview-in-ordinary-db

Fix BACKUP and RESTORE of a materialized view in Ordinary database
This commit is contained in:
Vitaly Baranov 2024-04-11 08:40:11 +00:00 committed by GitHub
commit 190ff5e7d6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 111 additions and 41 deletions

View File

@ -125,7 +125,7 @@ BackupEntries BackupEntriesCollector::run()
= BackupSettings::Util::filterHostIDs(backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num);
/// Do renaming in the create queries according to the renaming config.
renaming_map = makeRenamingMapFromBackupQuery(backup_query_elements);
renaming_map = BackupUtils::makeRenamingMap(backup_query_elements);
/// Calculate the root path for collecting backup entries, it's either empty or has the format "shards/<shard_num>/replicas/<replica_num>/".
calculateRootPathInBackup();
@ -570,17 +570,16 @@ std::vector<std::pair<ASTPtr, StoragePtr>> BackupEntriesCollector::findTablesInD
checkIsQueryCancelled();
auto filter_by_table_name = [my_database_info = &database_info](const String & table_name)
auto filter_by_table_name = [&](const String & table_name)
{
/// We skip inner tables of materialized views.
if (table_name.starts_with(".inner_id."))
if (BackupUtils::isInnerTable(database_name, table_name))
return false;
if (my_database_info->tables.contains(table_name))
if (database_info.tables.contains(table_name))
return true;
if (my_database_info->all_tables)
return !my_database_info->except_table_names.contains(table_name);
if (database_info.all_tables)
return !database_info.except_table_names.contains(table_name);
return false;
};

View File

@ -8,10 +8,10 @@
#include <Common/setThreadName.h>
namespace DB
namespace DB::BackupUtils
{
DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & elements)
DDLRenamingMap makeRenamingMap(const ASTBackupQuery::Elements & elements)
{
DDLRenamingMap map;
@ -120,4 +120,15 @@ bool compareRestoredDatabaseDef(const IAST & restored_database_create_query, con
return compareRestoredTableDef(restored_database_create_query, create_query_from_backup, global_context);
}
bool isInnerTable(const QualifiedTableName & table_name)
{
return isInnerTable(table_name.database, table_name.table);
}
bool isInnerTable(const String & /* database_name */, const String & table_name)
{
/// We skip inner tables of materialized views.
return table_name.starts_with(".inner.") || table_name.starts_with(".inner_id.");
}
}

View File

@ -9,9 +9,13 @@ namespace DB
class IBackup;
class AccessRightsElements;
class DDLRenamingMap;
struct QualifiedTableName;
namespace BackupUtils
{
/// Initializes a DDLRenamingMap from a BACKUP or RESTORE query.
DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & elements);
DDLRenamingMap makeRenamingMap(const ASTBackupQuery::Elements & elements);
/// Returns access required to execute BACKUP query.
AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & elements);
@ -20,4 +24,10 @@ AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements &
bool compareRestoredTableDef(const IAST & restored_table_create_query, const IAST & create_query_from_backup, const ContextPtr & global_context);
bool compareRestoredDatabaseDef(const IAST & restored_database_create_query, const IAST & create_query_from_backup, const ContextPtr & global_context);
/// Returns true if this table should be skipped while making a backup because it's an inner table.
bool isInnerTable(const QualifiedTableName & table_name);
bool isInnerTable(const String & database_name, const String & table_name);
}
}

View File

@ -564,7 +564,7 @@ void BackupsWorker::doBackup(
/// Checks access rights if this is not ON CLUSTER query.
/// (If this is ON CLUSTER query executeDDLQueryOnCluster() will check access rights later.)
auto required_access = getRequiredAccessToBackup(backup_query->elements);
auto required_access = BackupUtils::getRequiredAccessToBackup(backup_query->elements);
if (!on_cluster)
context->checkAccess(required_access);

View File

@ -124,7 +124,7 @@ void RestorerFromBackup::run(Mode mode)
restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num);
/// Do renaming in the create queries according to the renaming config.
renaming_map = makeRenamingMapFromBackupQuery(restore_query_elements);
renaming_map = BackupUtils::makeRenamingMap(restore_query_elements);
/// Calculate the root path in the backup for restoring, it's either empty or has the format "shards/<shard_num>/replicas/<replica_num>/".
findRootPathsInBackup();
@ -346,12 +346,12 @@ void RestorerFromBackup::findDatabasesAndTablesInBackup()
{
case ASTBackupQuery::ElementType::TABLE:
{
findTableInBackup({element.database_name, element.table_name}, element.partitions);
findTableInBackup({element.database_name, element.table_name}, /* skip_if_inner_table= */ false, element.partitions);
break;
}
case ASTBackupQuery::ElementType::TEMPORARY_TABLE:
{
findTableInBackup({DatabaseCatalog::TEMPORARY_DATABASE, element.table_name}, element.partitions);
findTableInBackup({DatabaseCatalog::TEMPORARY_DATABASE, element.table_name}, /* skip_if_inner_table= */ false, element.partitions);
break;
}
case ASTBackupQuery::ElementType::DATABASE:
@ -370,14 +370,14 @@ void RestorerFromBackup::findDatabasesAndTablesInBackup()
LOG_INFO(log, "Will restore {} databases and {} tables", getNumDatabases(), getNumTables());
}
void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name_in_backup, const std::optional<ASTs> & partitions)
void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name_in_backup, bool skip_if_inner_table, const std::optional<ASTs> & partitions)
{
schedule(
[this, table_name_in_backup, partitions]() { findTableInBackupImpl(table_name_in_backup, partitions); },
[this, table_name_in_backup, skip_if_inner_table, partitions]() { findTableInBackupImpl(table_name_in_backup, skip_if_inner_table, partitions); },
"Restore_FindTbl");
}
void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_name_in_backup, const std::optional<ASTs> & partitions)
void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_name_in_backup, bool skip_if_inner_table, const std::optional<ASTs> & partitions)
{
bool is_temporary_table = (table_name_in_backup.database == DatabaseCatalog::TEMPORARY_DATABASE);
@ -422,6 +422,10 @@ void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_
= *root_path_in_use / "data" / escapeForFileName(table_name_in_backup.database) / escapeForFileName(table_name_in_backup.table);
}
QualifiedTableName table_name = renaming_map.getNewTableName(table_name_in_backup);
if (skip_if_inner_table && BackupUtils::isInnerTable(table_name))
return;
auto read_buffer = backup->readFile(*metadata_path);
String create_query_str;
readStringUntilEOF(create_query_str, *read_buffer);
@ -432,8 +436,6 @@ void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_
renameDatabaseAndTableNameInCreateQuery(create_table_query, renaming_map, context->getGlobalContext());
String create_table_query_str = serializeAST(*create_table_query);
QualifiedTableName table_name = renaming_map.getNewTableName(table_name_in_backup);
bool is_predefined_table = DatabaseCatalog::instance().isPredefinedTable(StorageID{table_name.database, table_name.table});
auto table_dependencies = getDependenciesFromCreateQuery(context, table_name, create_table_query);
bool table_has_data = backup->hasFiles(data_path_in_backup);
@ -568,7 +570,7 @@ void RestorerFromBackup::findDatabaseInBackupImpl(const String & database_name_i
if (except_table_names.contains({database_name_in_backup, table_name_in_backup}))
continue;
findTableInBackup({database_name_in_backup, table_name_in_backup}, /* partitions= */ {});
findTableInBackup({database_name_in_backup, table_name_in_backup}, /* skip_if_inner_table= */ true, /* partitions= */ {});
}
}
@ -767,7 +769,7 @@ void RestorerFromBackup::checkDatabase(const String & database_name)
ASTPtr existing_database_def = database->getCreateDatabaseQuery();
ASTPtr database_def_from_backup = database_info.create_database_query;
if (!compareRestoredDatabaseDef(*existing_database_def, *database_def_from_backup, context->getGlobalContext()))
if (!BackupUtils::compareRestoredDatabaseDef(*existing_database_def, *database_def_from_backup, context->getGlobalContext()))
{
throw Exception(
ErrorCodes::CANNOT_RESTORE_DATABASE,
@ -938,7 +940,7 @@ void RestorerFromBackup::checkTable(const QualifiedTableName & table_name)
{
ASTPtr existing_table_def = database->getCreateTableQuery(resolved_id.table_name, context);
ASTPtr table_def_from_backup = table_info.create_table_query;
if (!compareRestoredTableDef(*existing_table_def, *table_def_from_backup, context->getGlobalContext()))
if (!BackupUtils::compareRestoredTableDef(*existing_table_def, *table_def_from_backup, context->getGlobalContext()))
{
throw Exception(
ErrorCodes::CANNOT_RESTORE_TABLE,

View File

@ -92,8 +92,8 @@ private:
void findRootPathsInBackup();
void findDatabasesAndTablesInBackup();
void findTableInBackup(const QualifiedTableName & table_name_in_backup, const std::optional<ASTs> & partitions);
void findTableInBackupImpl(const QualifiedTableName & table_name_in_backup, const std::optional<ASTs> & partitions);
void findTableInBackup(const QualifiedTableName & table_name_in_backup, bool skip_if_inner_table, const std::optional<ASTs> & partitions);
void findTableInBackupImpl(const QualifiedTableName & table_name_in_backup, bool skip_if_inner_table, const std::optional<ASTs> & partitions);
void findDatabaseInBackup(const String & database_name_in_backup, const std::set<DatabaseAndTableName> & except_table_names);
void findDatabaseInBackupImpl(const String & database_name_in_backup, const std::set<DatabaseAndTableName> & except_table_names);
void findEverythingInBackup(const std::set<String> & except_database_names, const std::set<DatabaseAndTableName> & except_table_names);

View File

@ -5,33 +5,38 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
# Copy backups/with_broken_part.zip into the disk named "backups".
SRC_BACKUP_DIR=$CURDIR/backups
SRC_BACKUP_FILENAME=with_broken_part.zip
# Copies a test predefined backup from "/tests/queries/0_stateless/backups/" folder to the "backups" disk,
# returns the path to the backup relative to that disk.
function install_test_backup()
{
local test_backup_filename="$1"
local test_backup_path="$CURDIR/backups/${test_backup_filename}"
BACKUPS_DISK=backups
BACKUPS_DIR=$($CLICKHOUSE_CLIENT --query "SELECT path FROM system.disks WHERE name='$BACKUPS_DISK'")
local backups_disk_root=$($CLICKHOUSE_CLIENT --query "SELECT path FROM system.disks WHERE name='backups'")
if [ -z "$BACKUPS_DIR" ]; then
echo Disk \'$BACKUPS_DISK\' not found
exit 1
fi
if [ -z "${backups_disk_root}" ]; then
echo Disk \'${backups_disk_root}\' not found
exit 1
fi
BACKUP_FILENAME=$CLICKHOUSE_DATABASE/${SRC_BACKUP_FILENAME}
BACKUP_NAME="Disk('$BACKUPS_DISK', '$BACKUP_FILENAME')"
local install_path=${backups_disk_root}/${CLICKHOUSE_DATABASE}/${test_backup_filename}
mkdir -p "$(dirname "${install_path}")"
ln -s "${test_backup_path}" "${install_path}"
mkdir -p "$(dirname "$BACKUPS_DIR/$BACKUP_FILENAME")"
ln -s "$SRC_BACKUP_DIR/$SRC_BACKUP_FILENAME" "$BACKUPS_DIR/$BACKUP_FILENAME"
echo "${CLICKHOUSE_DATABASE}/${test_backup_filename}"
}
backup_name="$(install_test_backup with_broken_part.zip)"
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS tbl"
# First try to restore with the setting `restore_broken_parts_as_detached` set to false.
$CLICKHOUSE_CLIENT --query "RESTORE TABLE default.tbl AS tbl FROM $BACKUP_NAME" 2>&1 | tr -d \\n | grep "data.bin doesn't exist" | grep "while restoring part all_2_2_0" > /dev/null && echo "OK" || echo "FAILED"
$CLICKHOUSE_CLIENT --query "RESTORE TABLE default.tbl AS tbl FROM Disk('backups', '${backup_name}')" 2>&1 | tr -d \\n | grep "data.bin doesn't exist" | grep "while restoring part all_2_2_0" > /dev/null && echo "OK" || echo "FAILED"
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS tbl"
# Then try to restore with the setting `restore_broken_parts_as_detached` set to true.
$CLICKHOUSE_CLIENT --query "RESTORE TABLE default.tbl AS tbl FROM $BACKUP_NAME SETTINGS restore_broken_parts_as_detached = true" 2>/dev/null | awk -F '\t' '{print $2}'
$CLICKHOUSE_CLIENT --query "RESTORE TABLE default.tbl AS tbl FROM Disk('backups', '${backup_name}') SETTINGS restore_broken_parts_as_detached = true" 2>/dev/null | awk -F '\t' '{print $2}'
$CLICKHOUSE_CLIENT --multiquery <<EOF
SELECT * FROM tbl ORDER BY x;

View File

@ -1,6 +1,5 @@
#!/usr/bin/env bash
# Tags: no-ordinary-database, no-replicated-database
# Tag no-ordinary-database: TO DO
# Tags: no-replicated-database
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

View File

@ -0,0 +1,4 @@
RESTORED
2024-02-22 07:00:00 00
2024-02-22 07:00:01 11
2024-02-22 07:00:02 22

View File

@ -0,0 +1,40 @@
#!/usr/bin/env bash
# Tags: no-fasttest
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
# Copies a test predefined backup from "/tests/queries/0_stateless/backups/" folder to the "backups" disk,
# returns the path to the backup relative to that disk.
function install_test_backup()
{
local test_backup_filename="$1"
local test_backup_path="$CURDIR/backups/${test_backup_filename}"
local backups_disk_root=$($CLICKHOUSE_CLIENT --query "SELECT path FROM system.disks WHERE name='backups'")
if [ -z "${backups_disk_root}" ]; then
echo Disk \'${backups_disk_root}\' not found
exit 1
fi
local install_path=${backups_disk_root}/${CLICKHOUSE_DATABASE}/${test_backup_filename}
mkdir -p "$(dirname "${install_path}")"
ln -s "${test_backup_path}" "${install_path}"
echo "${CLICKHOUSE_DATABASE}/${test_backup_filename}"
}
backup_name="$(install_test_backup old_backup_with_matview_inner_table_metadata.zip)"
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS mv"
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS src"
db="$CLICKHOUSE_DATABASE"
${CLICKHOUSE_CLIENT} -q "RESTORE DATABASE mydb AS ${db} FROM Disk('backups', '${backup_name}') SETTINGS allow_different_database_def=true" | grep -o "RESTORED"
${CLICKHOUSE_CLIENT} -q "SELECT toDateTime(timestamp, 'UTC') AS ts, c12 FROM mv ORDER BY ts"
$CLICKHOUSE_CLIENT --query "DROP TABLE mv"
$CLICKHOUSE_CLIENT --query "DROP TABLE src"