Less number of "stat" calls for each INSERT into MergeTree #2281

This commit is contained in:
Alexey Milovidov 2018-07-08 06:56:24 +03:00
parent 3445c78190
commit a2faa511cc
6 changed files with 27 additions and 9 deletions

View File

@ -1314,7 +1314,7 @@ void MergeTreeData::AlterDataPartTransaction::commit()
file.remove();
}
mutable_part.bytes_on_disk = MergeTreeData::DataPart::calculateTotalSizeOnDisk(path);
mutable_part.bytes_on_disk = new_checksums.getTotalSizeOnDisk();
/// TODO: we can skip resetting caches when the column is added.
data_part->storage.context.dropCaches();

View File

@ -479,8 +479,6 @@ void MergeTreeDataPart::loadIndex()
index.assign(std::make_move_iterator(loaded_index.begin()), std::make_move_iterator(loaded_index.end()));
}
bytes_on_disk = calculateTotalSizeOnDisk(getFullPath());
}
void MergeTreeDataPart::loadPartitionAndMinMaxIndex()
@ -514,16 +512,25 @@ void MergeTreeDataPart::loadPartitionAndMinMaxIndex()
void MergeTreeDataPart::loadChecksums(bool require)
{
String path = getFullPath() + "checksums.txt";
if (!Poco::File(path).exists())
Poco::File checksums_file(path);
if (checksums_file.exists())
{
ReadBufferFromFile file = openForReading(path);
if (checksums.read(file))
{
assertEOF(file);
bytes_on_disk = checksums.getTotalSizeOnDisk();
}
else
bytes_on_disk = calculateTotalSizeOnDisk(getFullPath());
}
else
{
if (require)
throw Exception("No checksums.txt in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART);
return;
bytes_on_disk = calculateTotalSizeOnDisk(getFullPath());
}
ReadBufferFromFile file = openForReading(path);
if (checksums.read(file))
assertEOF(file);
}
void MergeTreeDataPart::loadRowsCount()

View File

@ -90,6 +90,7 @@ struct MergeTreeDataPart
size_t marks_count = 0;
std::atomic<UInt64> bytes_on_disk {0}; /// 0 - if not counted;
/// Is used from several threads without locks (it is changed with ALTER).
/// May not contain size of checksums.txt and columns.txt
time_t modification_time = 0;
/// When the part is removed from the working set. Changes once.
mutable std::atomic<time_t> remove_time { std::numeric_limits<time_t>::max() };

View File

@ -87,6 +87,14 @@ void MergeTreeDataPartChecksums::checkSizes(const String & path) const
}
}
UInt64 MergeTreeDataPartChecksums::getTotalSizeOnDisk() const
{
UInt64 res = 0;
for (const auto & it : files)
res += it.second.file_size;
return res;
}
bool MergeTreeDataPartChecksums::read(ReadBuffer & in, size_t format_version)
{
switch (format_version)

View File

@ -84,6 +84,8 @@ struct MergeTreeDataPartChecksums
String getSerializedString() const;
static MergeTreeDataPartChecksums deserializeFrom(const String & s);
UInt64 getTotalSizeOnDisk() const;
};

View File

@ -341,7 +341,7 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart(
new_part->columns = *total_column_list;
new_part->index.assign(std::make_move_iterator(index_columns.begin()), std::make_move_iterator(index_columns.end()));
new_part->checksums = checksums;
new_part->bytes_on_disk = MergeTreeData::DataPart::calculateTotalSizeOnDisk(new_part->getFullPath());
new_part->bytes_on_disk = checksums.getTotalSizeOnDisk();
}
void MergedBlockOutputStream::init()