mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-10-05 08:00:51 +00:00
polymorphic parts (development) columns sizes
This commit is contained in:
parent
be0e13d28f
commit
31ffad0fb0
@ -366,6 +366,7 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks
|
||||
loadChecksums(require_columns_checksums);
|
||||
loadIndexGranularity();
|
||||
loadIndex(); /// Must be called after loadIndexGranularity as it uses the value of `index_granularity`
|
||||
loadColumnSizes();
|
||||
loadRowsCount(); /// Must be called after loadIndex() as it uses the value of `index_granularity`.
|
||||
loadPartitionAndMinMaxIndex();
|
||||
loadTTLInfos();
|
||||
@ -588,6 +589,28 @@ void IMergeTreeDataPart::loadColumns(bool require)
|
||||
sample_block.insert({it.type, it.name});
|
||||
}
|
||||
|
||||
void IMergeTreeDataPart::loadColumnSizes()
|
||||
{
|
||||
size_t columns_num = columns.size();
|
||||
|
||||
if (columns_num == 0)
|
||||
throw Exception("No columns in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART);
|
||||
|
||||
auto column_sizes_path = getFullPath() + "columns_sizes.txt";
|
||||
auto columns_sizes_file = Poco::File(column_sizes_path);
|
||||
if (!columns_sizes_file.exists())
|
||||
{
|
||||
LOG_WARNING(storage.log, "No file column_sizes.txt in part " + name);
|
||||
return;
|
||||
}
|
||||
|
||||
ReadBufferFromFile buffer(column_sizes_path, columns_sizes_file.getSize());
|
||||
auto it = columns.begin();
|
||||
for (size_t i = 0; i < columns_num; ++i, ++it)
|
||||
readPODBinary(columns_sizes[it->name], buffer);
|
||||
assertEOF(buffer);
|
||||
}
|
||||
|
||||
|
||||
UInt64 IMergeTreeDataPart::calculateTotalSizeOnDisk(const String & from)
|
||||
{
|
||||
@ -638,6 +661,7 @@ void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_
|
||||
relative_path = new_relative_path;
|
||||
}
|
||||
|
||||
|
||||
void IMergeTreeDataPart::remove() const
|
||||
{
|
||||
if (!isStoredOnDisk())
|
||||
|
@ -299,6 +299,8 @@ public:
|
||||
*/
|
||||
mutable std::mutex alter_mutex;
|
||||
|
||||
ColumnSizeByName columns_sizes;
|
||||
|
||||
/// For data in RAM ('index')
|
||||
UInt64 getIndexSizeInBytes() const;
|
||||
UInt64 getIndexSizeInAllocatedBytes() const;
|
||||
@ -317,6 +319,7 @@ public:
|
||||
static UInt64 calculateTotalSizeOnDisk(const String & from);
|
||||
|
||||
protected:
|
||||
|
||||
void removeIfNeeded();
|
||||
virtual void checkConsistency(bool require_part_metadata) const;
|
||||
|
||||
@ -344,6 +347,8 @@ private:
|
||||
|
||||
void loadPartitionAndMinMaxIndex();
|
||||
|
||||
void loadColumnSizes();
|
||||
|
||||
String getRelativePathForDetachedPart(const String & prefix) const;
|
||||
};
|
||||
|
||||
|
@ -88,6 +88,11 @@ public:
|
||||
return Columns(std::make_move_iterator(index_columns.begin()), std::make_move_iterator(index_columns.end()));
|
||||
}
|
||||
|
||||
const MergeTreeData::ColumnSizeByName & getColumnsSizes() const
|
||||
{
|
||||
return columns_sizes;
|
||||
}
|
||||
|
||||
void initSkipIndices();
|
||||
void initPrimaryIndex();
|
||||
void calculateAndSerializePrimaryIndex(const Block & primary_index_block, size_t rows);
|
||||
@ -143,6 +148,8 @@ protected:
|
||||
bool data_written = false;
|
||||
bool primary_index_initialized = false;
|
||||
bool skip_indices_initialized = false;
|
||||
|
||||
MergeTreeData::ColumnSizeByName columns_sizes;
|
||||
};
|
||||
|
||||
using MergeTreeWriterPtr = std::unique_ptr<IMergeTreeDataPartWriter>;
|
||||
|
@ -186,28 +186,6 @@ void MergeTreeDataPartCompact::loadIndexGranularity()
|
||||
index_granularity.setInitialized();
|
||||
}
|
||||
|
||||
void MergeTreeDataPartCompact::loadColumnSizes()
|
||||
{
|
||||
size_t columns_num = columns.size();
|
||||
|
||||
if (columns_num == 0)
|
||||
throw Exception("No columns in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART);
|
||||
|
||||
auto column_sizes_path = getFullPath() + "columns_sizes.txt";
|
||||
auto columns_sizes_file = Poco::File(column_sizes_path);
|
||||
if (!columns_sizes_file.exists())
|
||||
{
|
||||
LOG_WARNING(storage.log, "No file column_sizes.txt in part " + name);
|
||||
return;
|
||||
}
|
||||
|
||||
ReadBufferFromFile buffer(column_sizes_path, columns_sizes_file.getSize());
|
||||
auto it = columns.begin();
|
||||
for (size_t i = 0; i < columns_num; ++i, ++it)
|
||||
readPODBinary(columns_sizes[it->name], buffer);
|
||||
assertEOF(buffer);
|
||||
}
|
||||
|
||||
void MergeTreeDataPartCompact::checkConsistency(bool require_part_metadata)
|
||||
{
|
||||
UNUSED(require_part_metadata);
|
||||
|
@ -83,12 +83,7 @@ private:
|
||||
/// Loads marks index granularity into memory
|
||||
void loadIndexGranularity() override;
|
||||
|
||||
void loadColumnSizes();
|
||||
|
||||
|
||||
void checkConsistency(bool require_part_metadata);
|
||||
|
||||
ColumnSizeByName columns_sizes;
|
||||
};
|
||||
|
||||
|
||||
|
@ -104,11 +104,7 @@ void MergeTreeDataPartWriterCompact::writeBlock(const Block & block)
|
||||
|
||||
writeIntBinary(rows_to_write, stream->marks);
|
||||
for (const auto & it : columns_list)
|
||||
{
|
||||
writeIntBinary(stream->plain_hashing.count(), stream->marks);
|
||||
writeIntBinary(stream->compressed.offset(), stream->marks);
|
||||
next_row = writeColumnSingleGranule(block.getByName(it.name), current_row, rows_to_write);
|
||||
}
|
||||
|
||||
++from_mark;
|
||||
current_row = next_row;
|
||||
@ -125,6 +121,13 @@ size_t MergeTreeDataPartWriterCompact::writeColumnSingleGranule(const ColumnWith
|
||||
std::cerr << "(writeColumnSingleGranule) from_row: " << from_row << "\n";
|
||||
std::cerr << "(writeColumnSingleGranule) number_of_rows: " << number_of_rows << "\n";
|
||||
|
||||
/// FIXME compressed size does not work
|
||||
size_t old_compressed_size = stream->compressed_buf.getCompressedBytes() + stream->plain_hashing.count();
|
||||
size_t old_uncompressed_size = stream->compressed.count();
|
||||
|
||||
writeIntBinary(stream->plain_hashing.count(), stream->marks);
|
||||
writeIntBinary(stream->compressed.offset(), stream->marks);
|
||||
|
||||
IDataType::SerializeBinaryBulkStatePtr state;
|
||||
IDataType::SerializeBinaryBulkSettings serialize_settings;
|
||||
|
||||
@ -136,6 +139,12 @@ size_t MergeTreeDataPartWriterCompact::writeColumnSingleGranule(const ColumnWith
|
||||
column.type->serializeBinaryBulkWithMultipleStreams(*column.column, from_row, number_of_rows, serialize_settings, state);
|
||||
column.type->serializeBinaryBulkStateSuffix(serialize_settings, state);
|
||||
|
||||
/// FIXME compressed size does not work
|
||||
size_t compressed_size = stream->compressed_buf.getCompressedBytes() + stream->plain_hashing.count();
|
||||
size_t uncompressed_size = stream->compressed.count();
|
||||
|
||||
columns_sizes[column.name].add(ColumnSize{0, compressed_size - old_compressed_size, uncompressed_size - old_uncompressed_size});
|
||||
|
||||
return from_row + number_of_rows;
|
||||
}
|
||||
|
||||
@ -155,6 +164,10 @@ void MergeTreeDataPartWriterCompact::finishDataSerialization(IMergeTreeDataPart:
|
||||
}
|
||||
}
|
||||
|
||||
size_t marks_size = stream->marks.count();
|
||||
for (auto it = columns_sizes.begin(); it != columns_sizes.end(); ++it)
|
||||
it->second.marks = marks_size;
|
||||
|
||||
stream->finalize();
|
||||
if (sync)
|
||||
stream->sync();
|
||||
|
@ -132,6 +132,22 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart(
|
||||
checksums.files["ttl.txt"].file_hash = out_hashing.getHash();
|
||||
}
|
||||
|
||||
const auto & columns_sizes = writer->getColumnsSizes();
|
||||
if (!columns_sizes.empty())
|
||||
{
|
||||
WriteBufferFromFile out(part_path + "columns_sizes.txt", 4096);
|
||||
HashingWriteBuffer out_hashing(out);
|
||||
for (const auto & column : columns_list)
|
||||
{
|
||||
auto it = columns_sizes.find(column.name);
|
||||
if (it == columns_sizes.end())
|
||||
throw Exception("Not found size for column " + column.name, ErrorCodes::LOGICAL_ERROR);
|
||||
writePODBinary(it->second, out_hashing);
|
||||
checksums.files["columns_sizes.txt"].file_size = out_hashing.count();
|
||||
checksums.files["columns_sizes.txt"].file_hash = out_hashing.getHash();
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
/// Write a file with a description of columns.
|
||||
WriteBufferFromFile out(part_path + "columns.txt", 4096);
|
||||
@ -151,6 +167,7 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart(
|
||||
new_part->checksums = checksums;
|
||||
new_part->bytes_on_disk = checksums.getTotalSizeOnDisk();
|
||||
new_part->index_granularity = writer->getIndexGranularity();
|
||||
new_part->columns_sizes = columns_sizes;
|
||||
std::cerr << "(writeSuffixAndFinalizePart) part: " << new_part->getFullPath() << "\n";
|
||||
std::cerr << "(writeSuffixAndFinalizePart) marks_count: " << new_part->index_granularity.getMarksCount() << "\n";
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user