2017-04-01 09:19:00 +00:00
|
|
|
#include <IO/ReadHelpers.h>
|
|
|
|
#include <IO/WriteHelpers.h>
|
|
|
|
#include <IO/CompressedStream.h>
|
|
|
|
#include <IO/CompressedReadBuffer.h>
|
|
|
|
#include <IO/CompressedWriteBuffer.h>
|
|
|
|
#include <IO/ReadBufferFromString.h>
|
|
|
|
#include <IO/WriteBufferFromString.h>
|
|
|
|
#include <IO/ReadBufferFromFile.h>
|
2017-08-31 15:40:34 +00:00
|
|
|
#include <IO/HashingWriteBuffer.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Core/Defines.h>
|
|
|
|
#include <Common/SipHash.h>
|
|
|
|
#include <Common/escapeForFileName.h>
|
|
|
|
#include <Common/StringUtils.h>
|
|
|
|
#include <Storages/MergeTree/MergeTreeDataPart.h>
|
|
|
|
#include <Storages/MergeTree/MergeTreeData.h>
|
2017-03-24 13:52:50 +00:00
|
|
|
#include <Columns/ColumnNullable.h>
|
2016-02-14 04:58:47 +00:00
|
|
|
|
|
|
|
#include <Poco/File.h>
|
2017-05-16 15:40:32 +00:00
|
|
|
#include <Poco/Path.h>
|
|
|
|
#include <Poco/DirectoryIterator.h>
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-05-16 15:40:32 +00:00
|
|
|
#include <common/logger_useful.h>
|
2016-02-14 04:58:47 +00:00
|
|
|
|
|
|
|
#define MERGE_TREE_MARK_SIZE (2 * sizeof(size_t))
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const int CHECKSUM_DOESNT_MATCH;
|
|
|
|
extern const int FILE_DOESNT_EXIST;
|
|
|
|
extern const int NO_FILE_IN_DATA_PART;
|
|
|
|
extern const int EXPECTED_END_OF_FILE;
|
|
|
|
extern const int BAD_SIZE_OF_FILE_IN_DATA_PART;
|
2017-09-11 17:55:41 +00:00
|
|
|
extern const int CORRUPTED_DATA;
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const int FORMAT_VERSION_TOO_OLD;
|
|
|
|
extern const int UNKNOWN_FORMAT;
|
|
|
|
extern const int UNEXPECTED_FILE_IN_DATA_PART;
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void MergeTreeDataPartChecksum::checkEqual(const MergeTreeDataPartChecksum & rhs, bool have_uncompressed, const String & name) const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
if (is_compressed && have_uncompressed)
|
|
|
|
{
|
|
|
|
if (!rhs.is_compressed)
|
|
|
|
throw Exception("No uncompressed checksum for file " + name, ErrorCodes::CHECKSUM_DOESNT_MATCH);
|
|
|
|
if (rhs.uncompressed_size != uncompressed_size)
|
|
|
|
throw Exception("Unexpected uncompressed size of file " + name + " in data part", ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART);
|
|
|
|
if (rhs.uncompressed_hash != uncompressed_hash)
|
|
|
|
throw Exception("Checksum mismatch for uncompressed file " + name + " in data part", ErrorCodes::CHECKSUM_DOESNT_MATCH);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (rhs.file_size != file_size)
|
|
|
|
throw Exception("Unexpected size of file " + name + " in data part", ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART);
|
|
|
|
if (rhs.file_hash != file_hash)
|
|
|
|
throw Exception("Checksum mismatch for file " + name + " in data part", ErrorCodes::CHECKSUM_DOESNT_MATCH);
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void MergeTreeDataPartChecksum::checkSize(const String & path) const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
Poco::File file(path);
|
|
|
|
if (!file.exists())
|
|
|
|
throw Exception(path + " doesn't exist", ErrorCodes::FILE_DOESNT_EXIST);
|
|
|
|
size_t size = file.getSize();
|
|
|
|
if (size != file_size)
|
|
|
|
throw Exception(path + " has unexpected size: " + toString(size) + " instead of " + toString(file_size),
|
|
|
|
ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART);
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & rhs, bool have_uncompressed) const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
for (const auto & it : rhs.files)
|
|
|
|
{
|
|
|
|
const String & name = it.first;
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (!files.count(name))
|
|
|
|
throw Exception("Unexpected file " + name + " in data part", ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART);
|
|
|
|
}
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
for (const auto & it : files)
|
|
|
|
{
|
|
|
|
const String & name = it.first;
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
auto jt = rhs.files.find(name);
|
|
|
|
if (jt == rhs.files.end())
|
|
|
|
throw Exception("No file " + name + " in data part", ErrorCodes::NO_FILE_IN_DATA_PART);
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
it.second.checkEqual(jt->second, have_uncompressed, name);
|
|
|
|
}
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void MergeTreeDataPartChecksums::checkSizes(const String & path) const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
for (const auto & it : files)
|
|
|
|
{
|
|
|
|
const String & name = it.first;
|
|
|
|
it.second.checkSize(path + name);
|
|
|
|
}
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool MergeTreeDataPartChecksums::read(ReadBuffer & in)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
files.clear();
|
|
|
|
|
|
|
|
assertString("checksums format version: ", in);
|
|
|
|
int format_version;
|
|
|
|
readText(format_version, in);
|
|
|
|
assertChar('\n', in);
|
|
|
|
|
2017-09-06 01:57:27 +00:00
|
|
|
switch (format_version)
|
|
|
|
{
|
|
|
|
case 1:
|
|
|
|
return false;
|
|
|
|
case 2:
|
|
|
|
return read_v2(in);
|
|
|
|
case 3:
|
|
|
|
return read_v3(in);
|
|
|
|
case 4:
|
|
|
|
return read_v4(in);
|
|
|
|
default:
|
|
|
|
throw Exception("Bad checksums format version: " + DB::toString(format_version), ErrorCodes::UNKNOWN_FORMAT);
|
|
|
|
}
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool MergeTreeDataPartChecksums::read_v2(ReadBuffer & in)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t count;
|
|
|
|
|
|
|
|
readText(count, in);
|
|
|
|
assertString(" files:\n", in);
|
|
|
|
|
|
|
|
for (size_t i = 0; i < count; ++i)
|
|
|
|
{
|
|
|
|
String name;
|
|
|
|
Checksum sum;
|
|
|
|
|
|
|
|
readString(name, in);
|
|
|
|
assertString("\n\tsize: ", in);
|
|
|
|
readText(sum.file_size, in);
|
|
|
|
assertString("\n\thash: ", in);
|
|
|
|
readText(sum.file_hash.first, in);
|
|
|
|
assertString(" ", in);
|
|
|
|
readText(sum.file_hash.second, in);
|
|
|
|
assertString("\n\tcompressed: ", in);
|
|
|
|
readText(sum.is_compressed, in);
|
|
|
|
if (sum.is_compressed)
|
|
|
|
{
|
|
|
|
assertString("\n\tuncompressed size: ", in);
|
|
|
|
readText(sum.uncompressed_size, in);
|
|
|
|
assertString("\n\tuncompressed hash: ", in);
|
|
|
|
readText(sum.uncompressed_hash.first, in);
|
|
|
|
assertString(" ", in);
|
|
|
|
readText(sum.uncompressed_hash.second, in);
|
|
|
|
}
|
|
|
|
assertChar('\n', in);
|
|
|
|
|
|
|
|
files.insert(std::make_pair(name, sum));
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool MergeTreeDataPartChecksums::read_v3(ReadBuffer & in)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t count;
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
readVarUInt(count, in);
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
for (size_t i = 0; i < count; ++i)
|
|
|
|
{
|
|
|
|
String name;
|
|
|
|
Checksum sum;
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
readBinary(name, in);
|
|
|
|
readVarUInt(sum.file_size, in);
|
2017-06-21 01:24:05 +00:00
|
|
|
readPODBinary(sum.file_hash, in);
|
2017-04-01 07:20:54 +00:00
|
|
|
readBinary(sum.is_compressed, in);
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (sum.is_compressed)
|
|
|
|
{
|
|
|
|
readVarUInt(sum.uncompressed_size, in);
|
2017-06-21 01:24:05 +00:00
|
|
|
readPODBinary(sum.uncompressed_hash, in);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
files.emplace(std::move(name), sum);
|
|
|
|
}
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
return true;
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool MergeTreeDataPartChecksums::read_v4(ReadBuffer & from)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
CompressedReadBuffer in{from};
|
|
|
|
return read_v3(in);
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void MergeTreeDataPartChecksums::write(WriteBuffer & to) const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
writeString("checksums format version: 4\n", to);
|
|
|
|
|
2017-10-12 23:56:28 +00:00
|
|
|
CompressedWriteBuffer out{to, CompressionSettings(CompressionMethod::LZ4), 1 << 16};
|
2017-04-01 07:20:54 +00:00
|
|
|
writeVarUInt(files.size(), out);
|
|
|
|
|
|
|
|
for (const auto & it : files)
|
|
|
|
{
|
|
|
|
const String & name = it.first;
|
|
|
|
const Checksum & sum = it.second;
|
|
|
|
|
|
|
|
writeBinary(name, out);
|
|
|
|
writeVarUInt(sum.file_size, out);
|
2017-06-21 01:24:05 +00:00
|
|
|
writePODBinary(sum.file_hash, out);
|
2017-04-01 07:20:54 +00:00
|
|
|
writeBinary(sum.is_compressed, out);
|
|
|
|
|
|
|
|
if (sum.is_compressed)
|
|
|
|
{
|
|
|
|
writeVarUInt(sum.uncompressed_size, out);
|
2017-06-21 01:24:05 +00:00
|
|
|
writePODBinary(sum.uncompressed_hash, out);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
}
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
2017-06-21 01:24:05 +00:00
|
|
|
void MergeTreeDataPartChecksums::addFile(const String & file_name, size_t file_size, MergeTreeDataPartChecksum::uint128 file_hash)
|
2016-02-14 04:58:47 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
files[file_name] = Checksum(file_size, file_hash);
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
2016-11-03 12:00:44 +00:00
|
|
|
void MergeTreeDataPartChecksums::add(MergeTreeDataPartChecksums && rhs_checksums)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
for (auto & checksum : rhs_checksums.files)
|
|
|
|
files[std::move(checksum.first)] = std::move(checksum.second);
|
2016-11-03 12:00:44 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
rhs_checksums.files.clear();
|
2016-11-03 12:00:44 +00:00
|
|
|
}
|
|
|
|
|
2017-08-09 21:07:01 +00:00
|
|
|
/// Checksum computed from the set of control sums of .bin files.
|
2016-02-14 04:58:47 +00:00
|
|
|
void MergeTreeDataPartChecksums::summaryDataChecksum(SipHash & hash) const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
/// We use fact that iteration is in deterministic (lexicographical) order.
|
|
|
|
for (const auto & it : files)
|
|
|
|
{
|
|
|
|
const String & name = it.first;
|
|
|
|
const Checksum & sum = it.second;
|
|
|
|
|
|
|
|
if (!endsWith(name, ".bin"))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
size_t len = name.size();
|
|
|
|
hash.update(reinterpret_cast<const char *>(&len), sizeof(len));
|
|
|
|
hash.update(name.data(), len);
|
|
|
|
hash.update(reinterpret_cast<const char *>(&sum.uncompressed_size), sizeof(sum.uncompressed_size));
|
|
|
|
hash.update(reinterpret_cast<const char *>(&sum.uncompressed_hash), sizeof(sum.uncompressed_hash));
|
|
|
|
}
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
String MergeTreeDataPartChecksums::toString() const
|
|
|
|
{
|
2017-07-31 21:39:24 +00:00
|
|
|
WriteBufferFromOwnString out;
|
|
|
|
write(out);
|
|
|
|
return out.str();
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
MergeTreeDataPartChecksums MergeTreeDataPartChecksums::parse(const String & s)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
ReadBufferFromString in(s);
|
|
|
|
MergeTreeDataPartChecksums res;
|
|
|
|
if (!res.read(in))
|
|
|
|
throw Exception("Checksums format is too old", ErrorCodes::FORMAT_VERSION_TOO_OLD);
|
|
|
|
assertEOF(in);
|
|
|
|
return res;
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-03-24 13:52:50 +00:00
|
|
|
const MergeTreeDataPartChecksums::Checksum * MergeTreeDataPart::tryGetBinChecksum(const String & name) const
|
2016-02-14 04:58:47 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
if (checksums.empty())
|
2017-03-24 13:52:50 +00:00
|
|
|
return nullptr;
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
const auto & files = checksums.files;
|
|
|
|
const auto bin_file_name = escapeForFileName(name) + ".bin";
|
2017-03-24 13:52:50 +00:00
|
|
|
auto it = files.find(bin_file_name);
|
|
|
|
|
|
|
|
return (it == files.end()) ? nullptr : &it->second;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-08-31 15:40:34 +00:00
|
|
|
static ReadBufferFromFile openForReading(const String & path)
|
|
|
|
{
|
|
|
|
return ReadBufferFromFile(path, std::min(static_cast<Poco::File::FileSize>(DBMS_DEFAULT_BUFFER_SIZE), Poco::File(path).getSize()));
|
|
|
|
}
|
|
|
|
|
|
|
|
void MergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & storage, const String & part_path)
|
|
|
|
{
|
|
|
|
size_t minmax_idx_size = storage.minmax_idx_column_types.size();
|
|
|
|
min_values.resize(minmax_idx_size);
|
|
|
|
max_values.resize(minmax_idx_size);
|
|
|
|
for (size_t i = 0; i < minmax_idx_size; ++i)
|
|
|
|
{
|
|
|
|
String file_name = part_path + "minmax_" + escapeForFileName(storage.minmax_idx_columns[i]) + ".idx";
|
|
|
|
ReadBufferFromFile file = openForReading(file_name);
|
|
|
|
const DataTypePtr & type = storage.minmax_idx_column_types[i];
|
|
|
|
type->deserializeBinary(min_values[i], file);
|
|
|
|
type->deserializeBinary(max_values[i], file);
|
|
|
|
}
|
|
|
|
initialized = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void MergeTreeDataPart::MinMaxIndex::store(const MergeTreeData & storage, const String & part_path, Checksums & checksums) const
|
|
|
|
{
|
|
|
|
for (size_t i = 0; i < storage.minmax_idx_columns.size(); ++i)
|
|
|
|
{
|
|
|
|
String file_name = "minmax_" + escapeForFileName(storage.minmax_idx_columns[i]) + ".idx";
|
|
|
|
const DataTypePtr & type = storage.minmax_idx_column_types[i];
|
|
|
|
|
|
|
|
WriteBufferFromFile out(part_path + file_name);
|
|
|
|
HashingWriteBuffer out_hashing(out);
|
|
|
|
type->serializeBinary(min_values[i], out_hashing);
|
|
|
|
type->serializeBinary(max_values[i], out_hashing);
|
2017-10-24 14:11:53 +00:00
|
|
|
out_hashing.next();
|
2017-08-31 15:40:34 +00:00
|
|
|
checksums.files[file_name].file_size = out_hashing.count();
|
|
|
|
checksums.files[file_name].file_hash = out_hashing.getHash();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void MergeTreeDataPart::MinMaxIndex::update(const Block & block, const Names & column_names)
|
2017-08-16 19:24:50 +00:00
|
|
|
{
|
2017-08-21 15:35:29 +00:00
|
|
|
if (!initialized)
|
|
|
|
{
|
2017-08-31 15:40:34 +00:00
|
|
|
min_values.resize(column_names.size());
|
|
|
|
max_values.resize(column_names.size());
|
2017-08-21 15:35:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t i = 0; i < column_names.size(); ++i)
|
|
|
|
{
|
|
|
|
Field min_value;
|
|
|
|
Field max_value;
|
|
|
|
const ColumnWithTypeAndName & column = block.getByName(column_names[i]);
|
|
|
|
column.column->getExtremes(min_value, max_value);
|
|
|
|
|
|
|
|
if (!initialized)
|
|
|
|
{
|
2017-08-31 15:40:34 +00:00
|
|
|
min_values[i] = Field(min_value);
|
|
|
|
max_values[i] = Field(max_value);
|
2017-08-21 15:35:29 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2017-08-31 15:40:34 +00:00
|
|
|
min_values[i] = std::min(min_values[i], min_value);
|
|
|
|
max_values[i] = std::max(max_values[i], max_value);
|
2017-08-21 15:35:29 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
initialized = true;
|
2017-08-16 19:24:50 +00:00
|
|
|
}
|
|
|
|
|
2017-08-31 15:40:34 +00:00
|
|
|
void MergeTreeDataPart::MinMaxIndex::merge(const MinMaxIndex & other)
|
2017-08-16 19:24:50 +00:00
|
|
|
{
|
2017-08-21 15:35:29 +00:00
|
|
|
if (!other.initialized)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (!initialized)
|
|
|
|
{
|
2017-08-31 15:40:34 +00:00
|
|
|
min_values.assign(other.min_values);
|
|
|
|
max_values.assign(other.max_values);
|
2017-08-21 15:35:29 +00:00
|
|
|
initialized = true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2017-08-31 15:40:34 +00:00
|
|
|
for (size_t i = 0; i < min_values.size(); ++i)
|
2017-08-21 15:35:29 +00:00
|
|
|
{
|
2017-08-31 15:40:34 +00:00
|
|
|
min_values[i] = std::min(min_values[i], other.min_values[i]);
|
|
|
|
max_values[i] = std::max(max_values[i], other.max_values[i]);
|
2017-08-21 15:35:29 +00:00
|
|
|
}
|
|
|
|
}
|
2017-08-16 19:24:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-08-25 20:41:45 +00:00
|
|
|
MergeTreeDataPart::MergeTreeDataPart(MergeTreeData & storage_, const String & name_)
|
|
|
|
: storage(storage_), name(name_), info(MergeTreePartInfo::fromPartName(name_, storage.format_version))
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2017-03-24 13:52:50 +00:00
|
|
|
/// Returns the size of .bin file for column `name` if found, zero otherwise.
|
|
|
|
size_t MergeTreeDataPart::getColumnCompressedSize(const String & name) const
|
|
|
|
{
|
|
|
|
const Checksum * checksum = tryGetBinChecksum(name);
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Probably a logic error, not sure if this can ever happen if checksums are not empty
|
2017-03-24 13:52:50 +00:00
|
|
|
return checksum ? checksum->file_size : 0;
|
|
|
|
}
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-03-24 13:52:50 +00:00
|
|
|
size_t MergeTreeDataPart::getColumnUncompressedSize(const String & name) const
|
|
|
|
{
|
|
|
|
const Checksum * checksum = tryGetBinChecksum(name);
|
|
|
|
return checksum ? checksum->uncompressed_size : 0;
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
2017-03-24 13:52:50 +00:00
|
|
|
|
2016-02-14 04:58:47 +00:00
|
|
|
/** Returns the name of a column with minimum compressed size (as returned by getColumnSize()).
|
2017-06-05 20:43:23 +00:00
|
|
|
* If no checksums are present returns the name of the first physically existing column.
|
|
|
|
*/
|
2017-02-11 17:43:28 +00:00
|
|
|
String MergeTreeDataPart::getColumnNameWithMinumumCompressedSize() const
|
2016-02-14 04:58:47 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
const auto & columns = storage.getColumnsList();
|
|
|
|
const std::string * minimum_size_column = nullptr;
|
2017-06-05 20:43:23 +00:00
|
|
|
size_t minimum_size = std::numeric_limits<size_t>::max();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
for (const auto & column : columns)
|
|
|
|
{
|
|
|
|
if (!hasColumnFiles(column.name))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
const auto size = getColumnCompressedSize(column.name);
|
|
|
|
if (size < minimum_size)
|
|
|
|
{
|
|
|
|
minimum_size = size;
|
|
|
|
minimum_size_column = &column.name;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!minimum_size_column)
|
2017-08-17 10:28:57 +00:00
|
|
|
throw Exception("Could not find a column of minimum size in MergeTree, part " + getFullPath(), ErrorCodes::LOGICAL_ERROR);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
return *minimum_size_column;
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-03-24 13:52:50 +00:00
|
|
|
String MergeTreeDataPart::getFullPath() const
|
|
|
|
{
|
2017-07-28 21:25:24 +00:00
|
|
|
if (relative_path.empty())
|
|
|
|
throw Exception("Part relative_path cannot be empty. This is bug.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
2017-05-16 15:40:32 +00:00
|
|
|
return storage.full_path + relative_path + "/";
|
|
|
|
}
|
|
|
|
|
|
|
|
String MergeTreeDataPart::getNameWithPrefix() const
|
|
|
|
{
|
|
|
|
String res = Poco::Path(relative_path).getFileName();
|
|
|
|
|
|
|
|
if (res.empty())
|
|
|
|
throw Exception("relative_path " + relative_path + " of part " + name + " is invalid or not set", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
return res;
|
2017-03-24 13:52:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-08-21 15:35:29 +00:00
|
|
|
DayNum_t MergeTreeDataPart::getMinDate() const
|
|
|
|
{
|
|
|
|
if (storage.minmax_idx_date_column_pos != -1)
|
2017-08-31 15:40:34 +00:00
|
|
|
return DayNum_t(minmax_idx.min_values[storage.minmax_idx_date_column_pos].get<UInt64>());
|
2017-08-21 15:35:29 +00:00
|
|
|
else
|
|
|
|
return DayNum_t();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
DayNum_t MergeTreeDataPart::getMaxDate() const
|
|
|
|
{
|
|
|
|
if (storage.minmax_idx_date_column_pos != -1)
|
2017-08-31 15:40:34 +00:00
|
|
|
return DayNum_t(minmax_idx.max_values[storage.minmax_idx_date_column_pos].get<UInt64>());
|
2017-08-21 15:35:29 +00:00
|
|
|
else
|
|
|
|
return DayNum_t();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-02-14 04:58:47 +00:00
|
|
|
MergeTreeDataPart::~MergeTreeDataPart()
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
if (is_temp)
|
|
|
|
{
|
|
|
|
try
|
|
|
|
{
|
2017-03-24 13:52:50 +00:00
|
|
|
std::string path = getFullPath();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
Poco::File dir(path);
|
|
|
|
if (!dir.exists())
|
|
|
|
return;
|
|
|
|
|
2017-05-16 15:40:32 +00:00
|
|
|
if (!startsWith(getNameWithPrefix(), "tmp"))
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
LOG_ERROR(storage.log, "~DataPart() should remove part " << path
|
|
|
|
<< " but its name doesn't start with tmp. Too suspicious, keeping the part.");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
dir.remove(true);
|
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
|
|
|
tryLogCurrentException(__PRETTY_FUNCTION__);
|
|
|
|
}
|
|
|
|
}
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
size_t MergeTreeDataPart::calcTotalSize(const String & from)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
Poco::File cur(from);
|
|
|
|
if (cur.isFile())
|
|
|
|
return cur.getSize();
|
|
|
|
std::vector<std::string> files;
|
|
|
|
cur.list(files);
|
|
|
|
size_t res = 0;
|
2017-10-03 19:04:56 +00:00
|
|
|
for (const auto & file : files)
|
|
|
|
res += calcTotalSize(from + file);
|
2017-04-01 07:20:54 +00:00
|
|
|
return res;
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void MergeTreeDataPart::remove() const
|
|
|
|
{
|
2017-07-28 21:25:24 +00:00
|
|
|
if (relative_path.empty())
|
|
|
|
throw Exception("Part relative_path cannot be empty. This is bug.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
String from = storage.full_path + relative_path;
|
2017-05-26 00:47:06 +00:00
|
|
|
String to = storage.full_path + "tmp_delete_" + name;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
Poco::File from_dir{from};
|
|
|
|
Poco::File to_dir{to};
|
|
|
|
|
|
|
|
if (to_dir.exists())
|
|
|
|
{
|
|
|
|
LOG_WARNING(storage.log, "Directory " << to << " (to which part must be renamed before removing) already exists."
|
|
|
|
" Most likely this is due to unclean restart. Removing it.");
|
|
|
|
|
|
|
|
try
|
|
|
|
{
|
|
|
|
to_dir.remove(true);
|
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
|
|
|
LOG_ERROR(storage.log, "Cannot remove directory " << to << ". Check owner and access rights.");
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
try
|
|
|
|
{
|
|
|
|
from_dir.renameTo(to);
|
|
|
|
}
|
|
|
|
catch (const Poco::FileNotFoundException & e)
|
|
|
|
{
|
2017-11-02 14:34:22 +00:00
|
|
|
LOG_ERROR(storage.log, "Directory " << from << " (part to remove) doesn't exist or one of nested files has gone."
|
2017-04-01 07:20:54 +00:00
|
|
|
" Most likely this is due to manual removing. This should be discouraged. Ignoring.");
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
to_dir.remove(true);
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
2017-05-16 15:40:32 +00:00
|
|
|
|
|
|
|
void MergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) const
|
2016-02-14 04:58:47 +00:00
|
|
|
{
|
2017-05-16 15:40:32 +00:00
|
|
|
String from = getFullPath();
|
|
|
|
String to = storage.full_path + new_relative_path + "/";
|
|
|
|
|
|
|
|
Poco::File from_file(from);
|
|
|
|
if (!from_file.exists())
|
|
|
|
throw Exception("Part directory " + from + " doesn't exists. Most likely it is logical error.", ErrorCodes::FILE_DOESNT_EXIST);
|
|
|
|
|
|
|
|
Poco::File to_file(to);
|
|
|
|
if (to_file.exists())
|
|
|
|
{
|
|
|
|
if (remove_new_dir_if_exists)
|
|
|
|
{
|
|
|
|
Names files;
|
|
|
|
Poco::File(from).list(files);
|
|
|
|
|
|
|
|
LOG_WARNING(storage.log, "Part directory " << to << " already exists"
|
2017-05-24 20:19:29 +00:00
|
|
|
<< " and contains " << files.size() << " files. Removing it.");
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-05-16 15:40:32 +00:00
|
|
|
to_file.remove(true);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
throw Exception("part directory " + to + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-09-11 22:40:51 +00:00
|
|
|
from_file.setLastModified(Poco::Timestamp::fromEpochTime(time(nullptr)));
|
2017-05-16 15:40:32 +00:00
|
|
|
from_file.renameTo(to);
|
|
|
|
relative_path = new_relative_path;
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
2017-05-16 15:40:32 +00:00
|
|
|
|
2016-02-14 04:58:47 +00:00
|
|
|
void MergeTreeDataPart::renameAddPrefix(bool to_detached, const String & prefix) const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
unsigned try_no = 0;
|
|
|
|
auto dst_name = [&, this] { return (to_detached ? "detached/" : "") + prefix + name + (try_no ? "_try" + DB::toString(try_no) : ""); };
|
|
|
|
|
|
|
|
if (to_detached)
|
|
|
|
{
|
|
|
|
/** If you need to unhook a part, and directory into which we want to rename it already exists,
|
|
|
|
* we will rename to the directory with the name to which the suffix is added in the form of "_tryN".
|
|
|
|
* This is done only in the case of `to_detached`, because it is assumed that in this case the exact name does not matter.
|
|
|
|
* No more than 10 attempts are made so that there are not too many junk directories left.
|
|
|
|
*/
|
|
|
|
while (try_no < 10 && Poco::File(storage.full_path + dst_name()).exists())
|
|
|
|
{
|
|
|
|
LOG_WARNING(storage.log, "Directory " << dst_name() << " (to detach to) is already exist."
|
|
|
|
" Will detach to directory with '_tryN' suffix.");
|
|
|
|
++try_no;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
renameTo(dst_name());
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
2017-08-16 19:22:49 +00:00
|
|
|
|
2017-08-16 19:24:50 +00:00
|
|
|
void MergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checksums, bool check_consistency)
|
2017-08-16 19:22:49 +00:00
|
|
|
{
|
|
|
|
loadColumns(require_columns_checksums);
|
|
|
|
loadChecksums(require_columns_checksums);
|
|
|
|
loadIndex();
|
2017-10-24 14:11:53 +00:00
|
|
|
loadRowsCount(); /// Must be called after loadIndex() as it uses the value of `marks_count`.
|
2017-08-18 19:46:26 +00:00
|
|
|
loadPartitionAndMinMaxIndex();
|
2017-08-16 19:22:49 +00:00
|
|
|
if (check_consistency)
|
|
|
|
checkConsistency(require_columns_checksums);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-02-14 04:58:47 +00:00
|
|
|
void MergeTreeDataPart::loadIndex()
|
|
|
|
{
|
2017-10-24 14:11:53 +00:00
|
|
|
if (!marks_count)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
if (columns.empty())
|
|
|
|
throw Exception("No columns in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART);
|
|
|
|
|
2017-10-24 14:11:53 +00:00
|
|
|
marks_count = Poco::File(getFullPath() + escapeForFileName(columns.front().name) + ".mrk")
|
2017-04-01 07:20:54 +00:00
|
|
|
.getSize() / MERGE_TREE_MARK_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t key_size = storage.sort_descr.size();
|
|
|
|
|
|
|
|
if (key_size)
|
|
|
|
{
|
|
|
|
index.clear();
|
|
|
|
index.resize(key_size);
|
|
|
|
|
|
|
|
for (size_t i = 0; i < key_size; ++i)
|
|
|
|
{
|
2017-09-01 18:21:01 +00:00
|
|
|
index[i] = storage.primary_key_data_types[i]->createColumn();
|
2017-10-24 14:11:53 +00:00
|
|
|
index[i]->reserve(marks_count);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2017-03-24 13:52:50 +00:00
|
|
|
String index_path = getFullPath() + "primary.idx";
|
2017-08-30 20:23:29 +00:00
|
|
|
ReadBufferFromFile index_file = openForReading(index_path);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-10-24 14:11:53 +00:00
|
|
|
for (size_t i = 0; i < marks_count; ++i)
|
2017-04-01 07:20:54 +00:00
|
|
|
for (size_t j = 0; j < key_size; ++j)
|
2017-09-01 18:21:01 +00:00
|
|
|
storage.primary_key_data_types[j]->deserializeBinary(*index[j].get(), index_file);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
for (size_t i = 0; i < key_size; ++i)
|
2017-10-24 14:11:53 +00:00
|
|
|
if (index[i]->size() != marks_count)
|
2017-04-01 07:20:54 +00:00
|
|
|
throw Exception("Cannot read all data from index file " + index_path
|
2017-10-24 14:11:53 +00:00
|
|
|
+ "(expected size: " + toString(marks_count) + ", read: " + toString(index[i]->size()) + ")",
|
2017-04-01 07:20:54 +00:00
|
|
|
ErrorCodes::CANNOT_READ_ALL_DATA);
|
|
|
|
|
|
|
|
if (!index_file.eof())
|
|
|
|
throw Exception("Index file " + index_path + " is unexpectedly long", ErrorCodes::EXPECTED_END_OF_FILE);
|
|
|
|
}
|
|
|
|
|
2017-03-24 13:52:50 +00:00
|
|
|
size_in_bytes = calcTotalSize(getFullPath());
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
2017-08-18 19:46:26 +00:00
|
|
|
void MergeTreeDataPart::loadPartitionAndMinMaxIndex()
|
2017-08-16 19:24:50 +00:00
|
|
|
{
|
2017-09-07 16:21:06 +00:00
|
|
|
if (storage.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
|
2017-08-25 20:41:45 +00:00
|
|
|
{
|
|
|
|
DayNum_t min_date;
|
|
|
|
DayNum_t max_date;
|
|
|
|
MergeTreePartInfo::parseMinMaxDatesFromPartName(name, min_date, max_date);
|
2017-08-21 15:35:29 +00:00
|
|
|
|
2017-08-25 20:41:45 +00:00
|
|
|
const auto & date_lut = DateLUT::instance();
|
2017-09-11 17:55:41 +00:00
|
|
|
partition = MergeTreePartition(date_lut.toNumYYYYMM(min_date));
|
2017-08-31 15:40:34 +00:00
|
|
|
minmax_idx = MinMaxIndex(min_date, max_date);
|
2017-08-25 20:41:45 +00:00
|
|
|
}
|
|
|
|
else
|
2017-08-30 20:23:29 +00:00
|
|
|
{
|
2017-08-31 15:40:34 +00:00
|
|
|
String full_path = getFullPath();
|
|
|
|
partition.load(storage, full_path);
|
|
|
|
minmax_idx.load(storage, full_path);
|
2017-08-30 20:23:29 +00:00
|
|
|
}
|
2017-09-11 17:55:41 +00:00
|
|
|
|
|
|
|
String calculated_partition_id = partition.getID(storage);
|
|
|
|
if (calculated_partition_id != info.partition_id)
|
|
|
|
throw Exception(
|
|
|
|
"While loading part " + getFullPath() + ": calculated partition ID: " + calculated_partition_id
|
|
|
|
+ " differs from partition ID in part name: " + info.partition_id,
|
|
|
|
ErrorCodes::CORRUPTED_DATA);
|
2017-08-16 19:24:50 +00:00
|
|
|
}
|
|
|
|
|
2016-02-14 04:58:47 +00:00
|
|
|
void MergeTreeDataPart::loadChecksums(bool require)
|
|
|
|
{
|
2017-03-24 13:52:50 +00:00
|
|
|
String path = getFullPath() + "checksums.txt";
|
2017-04-01 07:20:54 +00:00
|
|
|
if (!Poco::File(path).exists())
|
|
|
|
{
|
|
|
|
if (require)
|
|
|
|
throw Exception("No checksums.txt in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART);
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
2017-08-30 20:23:29 +00:00
|
|
|
ReadBufferFromFile file = openForReading(path);
|
2017-04-01 07:20:54 +00:00
|
|
|
if (checksums.read(file))
|
|
|
|
assertEOF(file);
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
2017-10-24 14:11:53 +00:00
|
|
|
void MergeTreeDataPart::loadRowsCount()
|
|
|
|
{
|
|
|
|
if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
|
|
|
|
{
|
|
|
|
String path = getFullPath() + "count.txt";
|
|
|
|
if (!Poco::File(path).exists())
|
|
|
|
throw Exception("No count.txt in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART);
|
|
|
|
|
|
|
|
ReadBufferFromFile file = openForReading(path);
|
|
|
|
readIntText(rows_count, file);
|
|
|
|
assertEOF(file);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
size_t rows_approx = storage.index_granularity * marks_count;
|
|
|
|
|
|
|
|
for (const NameAndTypePair & column : columns)
|
|
|
|
{
|
|
|
|
ColumnPtr column_col = column.type->createColumn();
|
|
|
|
const auto checksum = tryGetBinChecksum(column.name);
|
|
|
|
|
|
|
|
/// Should be fixed non-nullable column
|
|
|
|
if (!checksum || !column_col->isFixed() || column_col->isNullable())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
size_t sizeof_field = column_col->sizeOfField();
|
|
|
|
rows_count = checksum->uncompressed_size / sizeof_field;
|
|
|
|
|
|
|
|
if (checksum->uncompressed_size % sizeof_field != 0)
|
|
|
|
{
|
|
|
|
throw Exception(
|
|
|
|
"Column " + column.name + " has indivisible uncompressed size " + toString(checksum->uncompressed_size)
|
|
|
|
+ ", sizeof " + toString(sizeof_field),
|
|
|
|
ErrorCodes::LOGICAL_ERROR);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(rows_count <= rows_approx && rows_approx < rows_count + storage.index_granularity))
|
|
|
|
throw Exception(
|
|
|
|
"Unexpected size of column " + column.name + ": " + toString(rows_count) + " rows",
|
|
|
|
ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
throw Exception("Data part doesn't contain fixed size column (even Date column)", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-02-14 04:58:47 +00:00
|
|
|
void MergeTreeDataPart::accumulateColumnSizes(ColumnToSize & column_to_size) const
|
|
|
|
{
|
2017-07-28 17:34:02 +00:00
|
|
|
std::shared_lock<std::shared_mutex> part_lock(columns_lock);
|
2017-04-01 07:20:54 +00:00
|
|
|
for (const NameAndTypePair & column : *storage.columns)
|
2017-03-24 13:52:50 +00:00
|
|
|
if (Poco::File(getFullPath() + escapeForFileName(column.name) + ".bin").exists())
|
|
|
|
column_to_size[column.name] += Poco::File(getFullPath() + escapeForFileName(column.name) + ".bin").getSize();
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void MergeTreeDataPart::loadColumns(bool require)
|
|
|
|
{
|
2017-03-24 13:52:50 +00:00
|
|
|
String path = getFullPath() + "columns.txt";
|
2017-04-01 07:20:54 +00:00
|
|
|
if (!Poco::File(path).exists())
|
|
|
|
{
|
|
|
|
if (require)
|
|
|
|
throw Exception("No columns.txt in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART);
|
|
|
|
|
|
|
|
/// If there is no file with a list of columns, write it down.
|
2017-05-12 13:44:11 +00:00
|
|
|
for (const NameAndTypePair & column : storage.getColumnsList())
|
2017-03-24 13:52:50 +00:00
|
|
|
if (Poco::File(getFullPath() + escapeForFileName(column.name) + ".bin").exists())
|
2017-04-01 07:20:54 +00:00
|
|
|
columns.push_back(column);
|
|
|
|
|
|
|
|
if (columns.empty())
|
|
|
|
throw Exception("No columns in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART);
|
|
|
|
|
|
|
|
{
|
|
|
|
WriteBufferFromFile out(path + ".tmp", 4096);
|
|
|
|
columns.writeText(out);
|
|
|
|
}
|
|
|
|
Poco::File(path + ".tmp").renameTo(path);
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-08-30 20:23:29 +00:00
|
|
|
ReadBufferFromFile file = openForReading(path);
|
2017-04-01 07:20:54 +00:00
|
|
|
columns.readText(file);
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
2017-08-16 19:22:49 +00:00
|
|
|
void MergeTreeDataPart::checkConsistency(bool require_part_metadata)
|
2016-02-14 04:58:47 +00:00
|
|
|
{
|
2017-03-24 13:52:50 +00:00
|
|
|
String path = getFullPath();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
if (!checksums.empty())
|
|
|
|
{
|
|
|
|
if (!storage.sort_descr.empty() && !checksums.files.count("primary.idx"))
|
|
|
|
throw Exception("No checksum for primary.idx", ErrorCodes::NO_FILE_IN_DATA_PART);
|
|
|
|
|
|
|
|
if (require_part_metadata)
|
|
|
|
{
|
|
|
|
for (const NameAndTypePair & it : columns)
|
|
|
|
{
|
|
|
|
String name = escapeForFileName(it.name);
|
|
|
|
if (!checksums.files.count(name + ".mrk") ||
|
|
|
|
!checksums.files.count(name + ".bin"))
|
|
|
|
throw Exception("No .mrk or .bin file checksum for column " + name, ErrorCodes::NO_FILE_IN_DATA_PART);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-09-07 16:21:06 +00:00
|
|
|
if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
|
2017-08-30 20:23:29 +00:00
|
|
|
{
|
2017-10-24 14:11:53 +00:00
|
|
|
if (!checksums.files.count("count.txt"))
|
|
|
|
throw Exception("No checksum for count.txt", ErrorCodes::NO_FILE_IN_DATA_PART);
|
|
|
|
|
2017-09-01 20:33:17 +00:00
|
|
|
if (storage.partition_expr && !checksums.files.count("partition.dat"))
|
2017-08-30 20:23:29 +00:00
|
|
|
throw Exception("No checksum for partition.dat", ErrorCodes::NO_FILE_IN_DATA_PART);
|
|
|
|
|
|
|
|
for (const String & col_name : storage.minmax_idx_columns)
|
|
|
|
{
|
|
|
|
if (!checksums.files.count("minmax_" + escapeForFileName(col_name) + ".idx"))
|
|
|
|
throw Exception("No minmax idx file checksum for column " + col_name, ErrorCodes::NO_FILE_IN_DATA_PART);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-03-24 13:52:50 +00:00
|
|
|
checksums.checkSizes(path);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2017-08-30 20:23:29 +00:00
|
|
|
auto check_file_not_empty = [&path](const String & file_path)
|
|
|
|
{
|
|
|
|
Poco::File file(file_path);
|
|
|
|
if (!file.exists() || file.getSize() == 0)
|
|
|
|
throw Exception("Part " + path + " is broken: " + file_path + " is empty", ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART);
|
|
|
|
return file.getSize();
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Check that the primary key index is not empty.
|
2017-04-01 07:20:54 +00:00
|
|
|
if (!storage.sort_descr.empty())
|
2017-08-30 20:23:29 +00:00
|
|
|
check_file_not_empty(path + "primary.idx");
|
|
|
|
|
2017-09-07 16:21:06 +00:00
|
|
|
if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2017-10-24 14:11:53 +00:00
|
|
|
check_file_not_empty(path + "count.txt");
|
|
|
|
|
2017-09-01 20:33:17 +00:00
|
|
|
if (storage.partition_expr)
|
2017-08-30 20:23:29 +00:00
|
|
|
check_file_not_empty(path + "partition.dat");
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-08-30 20:23:29 +00:00
|
|
|
for (const String & col_name : storage.minmax_idx_columns)
|
|
|
|
check_file_not_empty(path + "minmax_" + escapeForFileName(col_name) + ".idx");
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Check that all marks are nonempty and have the same size.
|
2017-08-30 20:23:29 +00:00
|
|
|
auto check_marks = [&path](const NamesAndTypesList & columns, const std::string & extension)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
ssize_t marks_size = -1;
|
|
|
|
for (const NameAndTypePair & it : columns)
|
|
|
|
{
|
2017-03-24 13:52:50 +00:00
|
|
|
Poco::File marks_file(path + escapeForFileName(it.name) + extension);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
/// When you add a new column to the table, the .mrk files are not created. We will not delete anything.
|
|
|
|
if (!marks_file.exists())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (marks_size == -1)
|
|
|
|
{
|
|
|
|
marks_size = marks_file.getSize();
|
|
|
|
|
|
|
|
if (0 == marks_size)
|
|
|
|
throw Exception("Part " + path + " is broken: " + marks_file.path() + " is empty.",
|
|
|
|
ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (static_cast<ssize_t>(marks_file.getSize()) != marks_size)
|
|
|
|
throw Exception("Part " + path + " is broken: marks have different sizes.",
|
|
|
|
ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2017-08-30 20:23:29 +00:00
|
|
|
check_marks(columns, ".mrk");
|
|
|
|
check_marks(columns, ".null.mrk");
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool MergeTreeDataPart::hasColumnFiles(const String & column) const
|
|
|
|
{
|
2017-03-24 13:52:50 +00:00
|
|
|
String prefix = getFullPath();
|
2017-04-01 07:20:54 +00:00
|
|
|
String escaped_column = escapeForFileName(column);
|
|
|
|
return Poco::File(prefix + escaped_column + ".bin").exists() &&
|
|
|
|
Poco::File(prefix + escaped_column + ".mrk").exists();
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-01-17 20:49:16 +00:00
|
|
|
size_t MergeTreeDataPart::getIndexSizeInBytes() const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t res = 0;
|
|
|
|
for (const ColumnPtr & column : index)
|
|
|
|
res += column->byteSize();
|
|
|
|
return res;
|
2017-01-17 20:49:16 +00:00
|
|
|
}
|
|
|
|
|
2017-01-17 20:54:32 +00:00
|
|
|
size_t MergeTreeDataPart::getIndexSizeInAllocatedBytes() const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t res = 0;
|
|
|
|
for (const ColumnPtr & column : index)
|
2017-07-13 16:49:09 +00:00
|
|
|
res += column->allocatedBytes();
|
2017-04-01 07:20:54 +00:00
|
|
|
return res;
|
2017-01-17 20:54:32 +00:00
|
|
|
}
|
|
|
|
|
2017-09-11 22:40:51 +00:00
|
|
|
String MergeTreeDataPart::stateToString(MergeTreeDataPart::State state)
|
|
|
|
{
|
|
|
|
switch (state)
|
|
|
|
{
|
|
|
|
case State::Temporary:
|
|
|
|
return "Temporary";
|
|
|
|
case State::PreCommitted:
|
|
|
|
return "PreCommitted";
|
|
|
|
case State::Committed:
|
|
|
|
return "Committed";
|
|
|
|
case State::Outdated:
|
|
|
|
return "Outdated";
|
|
|
|
case State::Deleting:
|
|
|
|
return "Deleting";
|
|
|
|
default:
|
|
|
|
throw Exception("Unknown part state " + std::to_string(static_cast<int>(state)), ErrorCodes::LOGICAL_ERROR);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
String MergeTreeDataPart::stateString() const
|
|
|
|
{
|
|
|
|
return stateToString(state);
|
|
|
|
}
|
|
|
|
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|