2022-04-17 12:11:43 +00:00
|
|
|
#pragma once
|
|
|
|
|
2022-04-25 14:33:25 +00:00
|
|
|
#include <optional>
|
2023-03-05 15:15:03 +00:00
|
|
|
#include <fmt/format.h>
|
2023-03-06 14:53:58 +00:00
|
|
|
#include <base/hex.h>
|
2023-03-05 15:15:03 +00:00
|
|
|
#include <Core/Types.h>
|
2022-04-17 12:11:43 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2022-07-08 20:13:27 +00:00
|
|
|
class Exception;
|
2022-07-05 18:57:01 +00:00
|
|
|
enum class AccessEntityType;
|
2022-07-05 07:39:52 +00:00
|
|
|
|
2022-07-06 09:09:31 +00:00
|
|
|
/// Replicas use this class to coordinate what they're writing to a backup while executing BACKUP ON CLUSTER.
|
|
|
|
/// There are two implementation of this interface: BackupCoordinationLocal and BackupCoordinationRemote.
|
|
|
|
/// BackupCoordinationLocal is used while executing BACKUP without ON CLUSTER and performs coordination in memory.
|
|
|
|
/// BackupCoordinationRemote is used while executing BACKUP with ON CLUSTER and performs coordination via ZooKeeper.
|
2022-04-17 12:11:43 +00:00
|
|
|
class IBackupCoordination
|
|
|
|
{
|
|
|
|
public:
|
2022-05-01 13:36:32 +00:00
|
|
|
virtual ~IBackupCoordination() = default;
|
|
|
|
|
2022-07-20 19:44:51 +00:00
|
|
|
/// Sets the current stage and waits for other hosts to come to this stage too.
|
|
|
|
virtual void setStage(const String & current_host, const String & new_stage, const String & message) = 0;
|
|
|
|
virtual void setError(const String & current_host, const Exception & exception) = 0;
|
|
|
|
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) = 0;
|
|
|
|
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;
|
2022-07-06 09:09:31 +00:00
|
|
|
|
2022-05-08 21:41:49 +00:00
|
|
|
struct PartNameAndChecksum
|
|
|
|
{
|
|
|
|
String part_name;
|
|
|
|
UInt128 checksum;
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Adds part names which a specified replica of a replicated table is going to put to the backup.
|
|
|
|
/// Multiple replicas of the replicated table call this function and then the added part names can be returned by call of the function
|
2022-05-23 12:05:35 +00:00
|
|
|
/// getReplicatedPartNames().
|
2022-05-08 21:41:49 +00:00
|
|
|
/// Checksums are used only to control that parts under the same names on different replicas are the same.
|
2022-06-24 19:29:38 +00:00
|
|
|
virtual void addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name,
|
2022-05-29 19:53:56 +00:00
|
|
|
const std::vector<PartNameAndChecksum> & part_names_and_checksums) = 0;
|
2022-05-08 21:41:49 +00:00
|
|
|
|
2022-05-29 19:53:56 +00:00
|
|
|
/// Returns the names of the parts which a specified replica of a replicated table should put to the backup.
|
|
|
|
/// This is the same list as it was added by call of the function addReplicatedPartNames() but without duplications and without
|
|
|
|
/// parts covered by another parts.
|
2022-06-24 19:29:38 +00:00
|
|
|
virtual Strings getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const = 0;
|
2022-05-23 12:05:35 +00:00
|
|
|
|
2022-07-05 07:39:52 +00:00
|
|
|
struct MutationInfo
|
|
|
|
{
|
|
|
|
String id;
|
|
|
|
String entry;
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Adds information about mutations of a replicated table.
|
|
|
|
virtual void addReplicatedMutations(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector<MutationInfo> & mutations) = 0;
|
|
|
|
|
2022-07-06 10:03:10 +00:00
|
|
|
/// Returns all mutations of a replicated table which are not finished for some data parts added by addReplicatedPartNames().
|
|
|
|
virtual std::vector<MutationInfo> getReplicatedMutations(const String & table_shared_id, const String & replica_name) const = 0;
|
2022-07-05 07:39:52 +00:00
|
|
|
|
2022-05-23 12:05:35 +00:00
|
|
|
/// Adds a data path in backup for a replicated table.
|
|
|
|
/// Multiple replicas of the replicated table call this function and then all the added paths can be returned by call of the function
|
2022-05-29 19:53:56 +00:00
|
|
|
/// getReplicatedDataPaths().
|
2022-06-24 19:29:38 +00:00
|
|
|
virtual void addReplicatedDataPath(const String & table_shared_id, const String & data_path) = 0;
|
2022-05-29 19:53:56 +00:00
|
|
|
|
|
|
|
/// Returns all the data paths in backup added for a replicated table (see also addReplicatedDataPath()).
|
2022-06-24 19:29:38 +00:00
|
|
|
virtual Strings getReplicatedDataPaths(const String & table_shared_id) const = 0;
|
2022-05-23 12:05:35 +00:00
|
|
|
|
2022-06-29 20:44:05 +00:00
|
|
|
/// Adds a path to access.txt file keeping access entities of a ReplicatedAccessStorage.
|
2022-07-05 18:57:01 +00:00
|
|
|
virtual void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path) = 0;
|
|
|
|
virtual Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const = 0;
|
2022-07-06 10:03:10 +00:00
|
|
|
|
2022-04-17 12:11:43 +00:00
|
|
|
struct FileInfo
|
|
|
|
{
|
|
|
|
String file_name;
|
|
|
|
|
|
|
|
UInt64 size = 0;
|
|
|
|
UInt128 checksum{0};
|
|
|
|
|
|
|
|
/// for incremental backups
|
|
|
|
UInt64 base_size = 0;
|
|
|
|
UInt128 base_checksum{0};
|
|
|
|
|
2022-04-19 09:02:34 +00:00
|
|
|
/// Name of the data file.
|
|
|
|
String data_file_name;
|
|
|
|
|
2022-04-17 12:11:43 +00:00
|
|
|
/// Suffix of an archive if the backup is stored as a series of archives.
|
|
|
|
String archive_suffix;
|
|
|
|
|
|
|
|
/// Position in the archive.
|
|
|
|
UInt64 pos_in_archive = static_cast<UInt64>(-1);
|
2023-03-05 15:15:03 +00:00
|
|
|
|
|
|
|
/// Note: this format doesn't allow to parse data back
|
|
|
|
/// It is useful only for debugging purposes
|
|
|
|
[[ maybe_unused ]] String describe()
|
|
|
|
{
|
|
|
|
String result;
|
|
|
|
result += fmt::format("file_name: {};\n", file_name);
|
|
|
|
result += fmt::format("size: {};\n", size);
|
|
|
|
result += fmt::format("checksum: {};\n", getHexUIntLowercase(checksum));
|
|
|
|
result += fmt::format("base_size: {};\n", base_size);
|
|
|
|
result += fmt::format("base_checksum: {};\n", getHexUIntLowercase(checksum));
|
|
|
|
result += fmt::format("data_file_name: {};\n", data_file_name);
|
|
|
|
result += fmt::format("archive_suffix: {};\n", archive_suffix);
|
|
|
|
result += fmt::format("pos_in_archive: {};\n", pos_in_archive);
|
|
|
|
return result;
|
|
|
|
}
|
2022-04-17 12:11:43 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/// Adds file information.
|
2022-04-19 09:02:34 +00:00
|
|
|
/// If specified checksum+size are new for this IBackupContentsInfo the function sets `is_data_file_required`.
|
|
|
|
virtual void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) = 0;
|
2022-04-17 12:11:43 +00:00
|
|
|
|
|
|
|
void addFileInfo(const FileInfo & file_info)
|
|
|
|
{
|
2022-04-19 09:02:34 +00:00
|
|
|
bool is_data_file_required;
|
|
|
|
addFileInfo(file_info, is_data_file_required);
|
2022-04-17 12:11:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Updates some fields (currently only `archive_suffix`) of a stored file's information.
|
|
|
|
virtual void updateFileInfo(const FileInfo & file_info) = 0;
|
|
|
|
|
2022-04-19 18:15:27 +00:00
|
|
|
virtual std::vector<FileInfo> getAllFileInfos() const = 0;
|
2022-06-06 09:50:20 +00:00
|
|
|
virtual Strings listFiles(const String & directory, bool recursive) const = 0;
|
|
|
|
virtual bool hasFiles(const String & directory) const = 0;
|
2022-04-17 12:11:43 +00:00
|
|
|
|
2022-04-19 09:02:34 +00:00
|
|
|
using SizeAndChecksum = std::pair<UInt64, UInt128>;
|
|
|
|
|
2022-04-19 18:15:27 +00:00
|
|
|
virtual std::optional<FileInfo> getFileInfo(const String & file_name) const = 0;
|
|
|
|
virtual std::optional<FileInfo> getFileInfo(const SizeAndChecksum & size_and_checksum) const = 0;
|
2022-04-17 12:11:43 +00:00
|
|
|
|
|
|
|
/// Generates a new archive suffix, e.g. "001", "002", "003", ...
|
|
|
|
virtual String getNextArchiveSuffix() = 0;
|
|
|
|
|
|
|
|
/// Returns the list of all the archive suffixes which were generated.
|
2022-04-19 18:15:27 +00:00
|
|
|
virtual Strings getAllArchiveSuffixes() const = 0;
|
2023-02-10 11:04:05 +00:00
|
|
|
|
|
|
|
/// This function is used to check if concurrent backups are running
|
|
|
|
/// other than the backup passed to the function
|
2023-02-16 08:30:27 +00:00
|
|
|
virtual bool hasConcurrentBackups(const std::atomic<size_t> & num_active_backups) const = 0;
|
2022-04-17 12:11:43 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|