2018-03-21 20:21:34 +00:00
|
|
|
#pragma once
|
2020-02-27 16:47:40 +00:00
|
|
|
#include <map>
|
|
|
|
#include <optional>
|
|
|
|
#include <city.h>
|
2021-10-02 07:13:14 +00:00
|
|
|
#include <base/types.h>
|
2020-02-27 16:47:40 +00:00
|
|
|
#include <Disks/IDisk.h>
|
2018-03-21 20:21:34 +00:00
|
|
|
#include <IO/ReadBuffer.h>
|
|
|
|
#include <IO/WriteBuffer.h>
|
|
|
|
|
|
|
|
|
|
|
|
class SipHash;
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
/// Checksum of one file.
|
|
|
|
struct MergeTreeDataPartChecksum
|
|
|
|
{
|
|
|
|
using uint128 = CityHash_v1_0_2::uint128;
|
|
|
|
|
|
|
|
UInt64 file_size {};
|
|
|
|
uint128 file_hash {};
|
|
|
|
|
|
|
|
bool is_compressed = false;
|
|
|
|
UInt64 uncompressed_size {};
|
|
|
|
uint128 uncompressed_hash {};
|
|
|
|
|
|
|
|
MergeTreeDataPartChecksum() {}
|
|
|
|
MergeTreeDataPartChecksum(UInt64 file_size_, uint128 file_hash_) : file_size(file_size_), file_hash(file_hash_) {}
|
|
|
|
MergeTreeDataPartChecksum(UInt64 file_size_, uint128 file_hash_, UInt64 uncompressed_size_, uint128 uncompressed_hash_)
|
|
|
|
: file_size(file_size_), file_hash(file_hash_), is_compressed(true),
|
|
|
|
uncompressed_size(uncompressed_size_), uncompressed_hash(uncompressed_hash_) {}
|
|
|
|
|
|
|
|
void checkEqual(const MergeTreeDataPartChecksum & rhs, bool have_uncompressed, const String & name) const;
|
2020-02-27 16:47:40 +00:00
|
|
|
void checkSize(const DiskPtr & disk, const String & path) const;
|
2018-03-21 20:21:34 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/** Checksums of all non-temporary files.
|
|
|
|
* For compressed files, the check sum and the size of the decompressed data are stored to not depend on the compression method.
|
|
|
|
*/
|
|
|
|
struct MergeTreeDataPartChecksums
|
|
|
|
{
|
|
|
|
using Checksum = MergeTreeDataPartChecksum;
|
|
|
|
|
|
|
|
/// The order is important.
|
|
|
|
using FileChecksums = std::map<String, Checksum>;
|
|
|
|
FileChecksums files;
|
|
|
|
|
|
|
|
void addFile(const String & file_name, UInt64 file_size, Checksum::uint128 file_hash);
|
|
|
|
|
|
|
|
void add(MergeTreeDataPartChecksums && rhs_checksums);
|
|
|
|
|
2021-02-10 14:12:49 +00:00
|
|
|
bool has(const String & file_name) const { return files.find(file_name) != files.end(); }
|
|
|
|
|
2018-03-21 20:21:34 +00:00
|
|
|
bool empty() const
|
|
|
|
{
|
|
|
|
return files.empty();
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Checks that the set of columns and their checksums are the same. If not, throws an exception.
|
|
|
|
/// If have_uncompressed, for compressed files it compares the checksums of the decompressed data.
|
|
|
|
/// Otherwise, it compares only the checksums of the files.
|
|
|
|
void checkEqual(const MergeTreeDataPartChecksums & rhs, bool have_uncompressed) const;
|
|
|
|
|
2018-03-21 23:30:20 +00:00
|
|
|
static bool isBadChecksumsErrorCode(int code);
|
|
|
|
|
2018-03-21 20:21:34 +00:00
|
|
|
/// Checks that the directory contains all the needed files of the correct size. Does not check the checksum.
|
2020-02-27 16:47:40 +00:00
|
|
|
void checkSizes(const DiskPtr & disk, const String & path) const;
|
2018-03-21 20:21:34 +00:00
|
|
|
|
|
|
|
/// Returns false if the checksum is too old.
|
|
|
|
bool read(ReadBuffer & in);
|
|
|
|
/// Assume that header with version (the first line) is read
|
|
|
|
bool read(ReadBuffer & in, size_t format_version);
|
2020-03-23 02:12:31 +00:00
|
|
|
bool readV2(ReadBuffer & in);
|
|
|
|
bool readV3(ReadBuffer & in);
|
|
|
|
bool readV4(ReadBuffer & from);
|
2018-03-21 20:21:34 +00:00
|
|
|
|
2020-03-09 01:50:33 +00:00
|
|
|
void write(WriteBuffer & to) const;
|
2018-03-21 20:21:34 +00:00
|
|
|
|
|
|
|
/// Checksum from the set of checksums of .bin files (for deduplication).
|
|
|
|
void computeTotalChecksumDataOnly(SipHash & hash) const;
|
|
|
|
|
2018-05-21 13:49:54 +00:00
|
|
|
/// SipHash of all all files hashes represented as hex string
|
|
|
|
String getTotalChecksumHex() const;
|
|
|
|
|
2021-02-10 14:12:49 +00:00
|
|
|
Checksum::uint128 getTotalChecksumUInt128() const;
|
|
|
|
|
2018-03-21 23:30:20 +00:00
|
|
|
String getSerializedString() const;
|
2018-03-21 20:21:34 +00:00
|
|
|
static MergeTreeDataPartChecksums deserializeFrom(const String & s);
|
2018-07-08 03:56:24 +00:00
|
|
|
|
|
|
|
UInt64 getTotalSizeOnDisk() const;
|
2018-03-21 20:21:34 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/// A kind of MergeTreeDataPartChecksums intended to be stored in ZooKeeper (to save its RAM)
|
2018-10-03 17:10:23 +00:00
|
|
|
/// MinimalisticDataPartChecksums and MergeTreeDataPartChecksums have the same serialization format
|
2018-03-22 21:20:20 +00:00
|
|
|
/// for versions less than MINIMAL_VERSION_WITH_MINIMALISTIC_CHECKSUMS.
|
2018-03-21 20:21:34 +00:00
|
|
|
struct MinimalisticDataPartChecksums
|
|
|
|
{
|
|
|
|
UInt64 num_compressed_files = 0;
|
|
|
|
UInt64 num_uncompressed_files = 0;
|
|
|
|
|
|
|
|
using uint128 = MergeTreeDataPartChecksum::uint128;
|
|
|
|
uint128 hash_of_all_files {};
|
|
|
|
uint128 hash_of_uncompressed_files {};
|
|
|
|
uint128 uncompressed_hash_of_compressed_files {};
|
|
|
|
|
2018-09-11 14:41:04 +00:00
|
|
|
bool operator==(const MinimalisticDataPartChecksums & other) const
|
|
|
|
{
|
|
|
|
return num_compressed_files == other.num_compressed_files
|
|
|
|
&& num_uncompressed_files == other.num_uncompressed_files
|
|
|
|
&& hash_of_all_files == other.hash_of_all_files
|
|
|
|
&& hash_of_uncompressed_files == other.hash_of_uncompressed_files
|
|
|
|
&& uncompressed_hash_of_compressed_files == other.uncompressed_hash_of_compressed_files;
|
|
|
|
}
|
|
|
|
|
2018-03-21 20:21:34 +00:00
|
|
|
/// Is set only for old formats
|
2018-12-11 13:30:20 +00:00
|
|
|
std::optional<MergeTreeDataPartChecksums> full_checksums;
|
2018-03-21 20:21:34 +00:00
|
|
|
|
|
|
|
static constexpr size_t MINIMAL_VERSION_WITH_MINIMALISTIC_CHECKSUMS = 5;
|
|
|
|
|
|
|
|
MinimalisticDataPartChecksums() = default;
|
|
|
|
void computeTotalChecksums(const MergeTreeDataPartChecksums & full_checksums);
|
|
|
|
|
|
|
|
bool deserialize(ReadBuffer & in);
|
2018-12-11 13:30:20 +00:00
|
|
|
void deserializeWithoutHeader(ReadBuffer & in);
|
2018-03-21 20:21:34 +00:00
|
|
|
static MinimalisticDataPartChecksums deserializeFrom(const String & s);
|
|
|
|
|
|
|
|
void serialize(WriteBuffer & to) const;
|
2018-12-11 13:30:20 +00:00
|
|
|
void serializeWithoutHeader(WriteBuffer & to) const;
|
2020-04-22 06:22:14 +00:00
|
|
|
String getSerializedString() const;
|
2018-03-21 20:21:34 +00:00
|
|
|
static String getSerializedString(const MergeTreeDataPartChecksums & full_checksums, bool minimalistic);
|
|
|
|
|
2018-12-11 13:30:20 +00:00
|
|
|
void checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const;
|
|
|
|
void checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const;
|
|
|
|
void checkEqualImpl(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const;
|
2018-03-21 20:21:34 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
}
|