mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-10-04 23:50:50 +00:00
polymorphic parts (development)
This commit is contained in:
parent
e1d13ea5b0
commit
426c62aafe
@ -291,9 +291,14 @@ void IMergeTreeDataPartWriter::calculateAndSerializeSkipIndices(
|
||||
void IMergeTreeDataPartWriter::finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & checksums)
|
||||
{
|
||||
std::cerr << "finishPrimaryIndexSerialization called...\n";
|
||||
|
||||
bool write_final_mark = (with_final_mark && data_written);
|
||||
if (write_final_mark && compute_granularity)
|
||||
index_granularity.appendMark(0);
|
||||
|
||||
if (index_stream)
|
||||
{
|
||||
if (with_final_mark && data_written)
|
||||
if (write_final_mark)
|
||||
{
|
||||
for (size_t j = 0; j < index_columns.size(); ++j)
|
||||
{
|
||||
@ -301,9 +306,6 @@ void IMergeTreeDataPartWriter::finishPrimaryIndexSerialization(MergeTreeData::Da
|
||||
index_types[j]->serializeBinary(last_index_row[j], *index_stream);
|
||||
}
|
||||
|
||||
if (compute_granularity)
|
||||
index_granularity.appendMark(0);
|
||||
|
||||
last_index_row.clear();
|
||||
}
|
||||
|
||||
|
@ -65,32 +65,6 @@ static bool arrayHasNoElementsRead(const IColumn & column)
|
||||
return last_offset != 0;
|
||||
}
|
||||
|
||||
IMergeTreeReader::MarksPtr IMergeTreeReader::loadMarks(const String & mrk_path, const LoadFunc & load_func)
|
||||
{
|
||||
MarksPtr marks;
|
||||
if (mark_cache)
|
||||
{
|
||||
auto key = mark_cache->hash(mrk_path);
|
||||
if (settings.save_marks_in_cache)
|
||||
{
|
||||
marks = mark_cache->getOrSet(key, load_func);
|
||||
}
|
||||
else
|
||||
{
|
||||
marks = mark_cache->get(key);
|
||||
if (!marks)
|
||||
marks = load_func();
|
||||
}
|
||||
}
|
||||
else
|
||||
marks = load_func();
|
||||
|
||||
if (!marks)
|
||||
throw Exception("Failed to load marks: " + mrk_path, ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
return marks;
|
||||
}
|
||||
|
||||
|
||||
void IMergeTreeReader::fillMissingColumns(Block & res, bool & should_reorder, bool & should_evaluate_missing_defaults, size_t num_rows)
|
||||
{
|
||||
|
@ -53,15 +53,10 @@ public:
|
||||
return all_mark_ranges.back().begin;
|
||||
}
|
||||
|
||||
using MarksPtr = MarkCache::MappedPtr;
|
||||
|
||||
MergeTreeData::DataPartPtr data_part;
|
||||
|
||||
protected:
|
||||
|
||||
using LoadFunc = std::function<MarksPtr()>;
|
||||
MarksPtr loadMarks(const String & mrk_path, const LoadFunc & load_func);
|
||||
|
||||
/// avg_value_size_hints are used to reduce the number of reallocations when creating columns of variable size.
|
||||
ValueSizeMap avg_value_size_hints;
|
||||
/// Stores states for IDataType::deserializeBinaryBulk
|
||||
|
@ -154,8 +154,6 @@ void MergeTreeDataPartWriterCompact::finishDataSerialization(IMergeTreeDataPart:
|
||||
writeIntBinary(stream->plain_hashing.count(), stream->marks);
|
||||
writeIntBinary(stream->compressed.offset(), stream->marks);
|
||||
}
|
||||
if (compute_granularity)
|
||||
index_granularity.appendMark(0);
|
||||
}
|
||||
|
||||
stream->finalize();
|
||||
|
51
dbms/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
Normal file
51
dbms/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
Normal file
@ -0,0 +1,51 @@
|
||||
#include <Storages/MergeTree/MergeTreeMarksLoader.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
MergeTreeMarksLoader::MergeTreeMarksLoader(
|
||||
MarkCache * mark_cache_,
|
||||
const String & mrk_path_,
|
||||
const LoadFunc & load_func_,
|
||||
bool save_marks_in_cache_,
|
||||
size_t columns_num_)
|
||||
: mark_cache(mark_cache_)
|
||||
, mrk_path(mrk_path_)
|
||||
, load_func(load_func_)
|
||||
, save_marks_in_cache(save_marks_in_cache_)
|
||||
, columns_num(columns_num_) {}
|
||||
|
||||
const MarkInCompressedFile & MergeTreeMarksLoader::getMark(size_t row_index, size_t column_index)
|
||||
{
|
||||
if (!marks)
|
||||
loadMarks();
|
||||
if (column_index >= columns_num)
|
||||
throw Exception("", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
return (*marks)[row_index * columns_num + column_index];
|
||||
}
|
||||
|
||||
void MergeTreeMarksLoader::loadMarks()
|
||||
{
|
||||
if (mark_cache)
|
||||
{
|
||||
auto key = mark_cache->hash(mrk_path);
|
||||
if (save_marks_in_cache)
|
||||
{
|
||||
marks = mark_cache->getOrSet(key, load_func);
|
||||
}
|
||||
else
|
||||
{
|
||||
marks = mark_cache->get(key);
|
||||
if (!marks)
|
||||
marks = load_func();
|
||||
}
|
||||
}
|
||||
else
|
||||
marks = load_func();
|
||||
|
||||
if (!marks)
|
||||
throw Exception("Failed to load marks: " + mrk_path, ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
}
|
35
dbms/src/Storages/MergeTree/MergeTreeMarksLoader.h
Normal file
35
dbms/src/Storages/MergeTree/MergeTreeMarksLoader.h
Normal file
@ -0,0 +1,35 @@
|
||||
#include <Storages/MarkCache.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class MergeTreeMarksLoader
|
||||
{
|
||||
public:
|
||||
using MarksPtr = MarkCache::MappedPtr;
|
||||
using LoadFunc = std::function<MarksPtr()>;
|
||||
|
||||
MergeTreeMarksLoader() {}
|
||||
|
||||
MergeTreeMarksLoader(MarkCache * mark_cache_,
|
||||
const String & mrk_path_,
|
||||
const LoadFunc & load_func_,
|
||||
bool save_marks_in_cache_,
|
||||
size_t columns_num_ = 1);
|
||||
|
||||
const MarkInCompressedFile & getMark(size_t row_index, size_t column_index = 0);
|
||||
|
||||
bool initialized() const { return marks != nullptr; }
|
||||
|
||||
private:
|
||||
MarkCache * mark_cache = nullptr;
|
||||
String mrk_path;
|
||||
LoadFunc load_func;
|
||||
bool save_marks_in_cache = false;
|
||||
size_t columns_num;
|
||||
MarksPtr marks;
|
||||
|
||||
void loadMarks();
|
||||
};
|
||||
|
||||
}
|
@ -19,6 +19,7 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(const MergeTreeData::DataPartPtr
|
||||
, uncompressed_cache_, mark_cache_, mark_ranges_
|
||||
, settings_, avg_value_size_hints_)
|
||||
{
|
||||
initMarksLoader();
|
||||
size_t buffer_size = settings.max_read_buffer_size;
|
||||
|
||||
if (uncompressed_cache)
|
||||
@ -121,13 +122,14 @@ void MergeTreeReaderCompact::readData(
|
||||
}
|
||||
|
||||
|
||||
void MergeTreeReaderCompact::loadMarks()
|
||||
void MergeTreeReaderCompact::initMarksLoader()
|
||||
{
|
||||
const auto & index_granularity_info = data_part->index_granularity_info;
|
||||
size_t marks_count = data_part->getMarksCount();
|
||||
std::string mrk_path = index_granularity_info.getMarksFilePath(path + NAME_OF_FILE_WITH_DATA);
|
||||
size_t columns_num = data_part->columns.size();
|
||||
|
||||
auto load_func = [&]() -> MarkCache::MappedPtr
|
||||
auto load = [&]() -> MarkCache::MappedPtr
|
||||
{
|
||||
size_t file_size = Poco::File(mrk_path).getSize();
|
||||
|
||||
@ -140,7 +142,6 @@ void MergeTreeReaderCompact::loadMarks()
|
||||
/// Memory for marks must not be accounted as memory usage for query, because they are stored in shared cache.
|
||||
auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock();
|
||||
|
||||
size_t columns_num = data_part->columns.size();
|
||||
|
||||
auto res = std::make_shared<MarksInCompressedFile>(marks_count * columns_num);
|
||||
|
||||
@ -168,25 +169,14 @@ void MergeTreeReaderCompact::loadMarks()
|
||||
return res;
|
||||
};
|
||||
|
||||
std::cerr << "(MergeTreeReaderCompact::loadMarks) table: " << storage.getTableName() << ", part: " << path << "\n";
|
||||
std::cerr << "(MergeTreeReaderCompact::loadMarks) start marks load..." << "\n";
|
||||
|
||||
auto marks_array = IMergeTreeReader::loadMarks(mrk_path, load_func);
|
||||
marks = MarksInCompressedFileCompact(marks_array, columns.size());
|
||||
marks_loader = MergeTreeMarksLoader{mark_cache, mrk_path, load, settings.save_marks_in_cache, columns_num};
|
||||
|
||||
std::cerr << "(MergeTreeReaderCompact::loadMarks) end marks load..." << "\n";
|
||||
}
|
||||
|
||||
const MarkInCompressedFile & MergeTreeReaderCompact::getMark(size_t row, size_t col)
|
||||
void MergeTreeReaderCompact::seekToMark(size_t row_index, size_t column_index)
|
||||
{
|
||||
if (!marks.initialized())
|
||||
loadMarks();
|
||||
return marks.getMark(row, col);
|
||||
}
|
||||
|
||||
void MergeTreeReaderCompact::seekToMark(size_t row, size_t col)
|
||||
{
|
||||
MarkInCompressedFile mark = getMark(row, col);
|
||||
MarkInCompressedFile mark = marks_loader.getMark(row_index, column_index);
|
||||
|
||||
std::cerr << "(MergeTreeReaderCompact::seekToMark) mark: (" << mark.offset_in_compressed_file << ", " << mark.offset_in_decompressed_block << "\n";
|
||||
|
||||
@ -201,7 +191,7 @@ void MergeTreeReaderCompact::seekToMark(size_t row, size_t col)
|
||||
{
|
||||
/// Better diagnostics.
|
||||
if (e.code() == ErrorCodes::ARGUMENT_OUT_OF_BOUND)
|
||||
e.addMessage("(while seeking to mark (" + toString(row) + ", " + toString(col) + ")");
|
||||
e.addMessage("(while seeking to mark (" + toString(row_index) + ", " + toString(column_index) + ")");
|
||||
|
||||
throw;
|
||||
}
|
||||
|
@ -8,38 +8,6 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class MarksInCompressedFileCompact
|
||||
{
|
||||
public:
|
||||
using MarksPtr = MarkCache::MappedPtr;
|
||||
|
||||
MarksInCompressedFileCompact() = default;
|
||||
|
||||
MarksInCompressedFileCompact(const MarksPtr & data_, size_t columns_num_)
|
||||
: data(data_), columns_num(columns_num_) {}
|
||||
|
||||
const MarkInCompressedFile & getMark(size_t index, size_t column) const
|
||||
{
|
||||
return (*data)[index * columns_num + column];
|
||||
}
|
||||
|
||||
char * getRowAddress(size_t index) const
|
||||
{
|
||||
return reinterpret_cast<char *>(data->data() + index * columns_num);
|
||||
}
|
||||
|
||||
size_t getRowSize() const
|
||||
{
|
||||
return sizeof(MarkInCompressedFile) * columns_num;
|
||||
}
|
||||
|
||||
bool initialized() { return data != nullptr; }
|
||||
|
||||
private:
|
||||
MarksPtr data;
|
||||
size_t columns_num;
|
||||
};
|
||||
|
||||
/// Reads the data between pairs of marks in the same part. When reading consecutive ranges, avoids unnecessary seeks.
|
||||
/// When ranges are almost consecutive, seeks are fast because they are performed inside the buffer.
|
||||
/// Avoids loading the marks file if it is not needed (e.g. when reading the whole part).
|
||||
@ -63,9 +31,9 @@ private:
|
||||
std::unique_ptr<CachedCompressedReadBuffer> cached_buffer;
|
||||
std::unique_ptr<CompressedReadBufferFromFile> non_cached_buffer;
|
||||
|
||||
MarksInCompressedFileCompact marks;
|
||||
MergeTreeMarksLoader marks_loader;
|
||||
|
||||
void loadMarks();
|
||||
void initMarksLoader();
|
||||
void seekToStart();
|
||||
void seekToMark(size_t row, size_t col);
|
||||
const MarkInCompressedFile & getMark(size_t row, size_t col);
|
||||
|
@ -32,6 +32,8 @@ MergeTreeReaderStream::MergeTreeReaderStream(
|
||||
|
||||
/// Care should be taken to not load marks when the part is empty (marks_count == 0).
|
||||
|
||||
initMarksLoader();
|
||||
|
||||
for (const auto & mark_range : all_mark_ranges)
|
||||
{
|
||||
size_t left_mark = mark_range.begin;
|
||||
@ -41,10 +43,10 @@ MergeTreeReaderStream::MergeTreeReaderStream(
|
||||
/// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks.
|
||||
|
||||
/// If the end of range is inside the block, we will need to read it too.
|
||||
if (right_mark < marks_count && getMark(right_mark).offset_in_decompressed_block > 0)
|
||||
if (right_mark < marks_count && marks_loader.getMark(right_mark).offset_in_decompressed_block > 0)
|
||||
{
|
||||
while (right_mark < marks_count
|
||||
&& getMark(right_mark).offset_in_compressed_file == getMark(mark_range.end).offset_in_compressed_file)
|
||||
&& marks_loader.getMark(right_mark).offset_in_compressed_file == marks_loader.getMark(mark_range.end).offset_in_compressed_file)
|
||||
{
|
||||
++right_mark;
|
||||
}
|
||||
@ -55,13 +57,13 @@ MergeTreeReaderStream::MergeTreeReaderStream(
|
||||
/// If there are no marks after the end of range, just use file size
|
||||
if (right_mark >= marks_count
|
||||
|| (right_mark + 1 == marks_count
|
||||
&& getMark(right_mark).offset_in_compressed_file == getMark(mark_range.end).offset_in_compressed_file))
|
||||
&& marks_loader.getMark(right_mark).offset_in_compressed_file == marks_loader.getMark(mark_range.end).offset_in_compressed_file))
|
||||
{
|
||||
mark_range_bytes = file_size - (left_mark < marks_count ? getMark(left_mark).offset_in_compressed_file : 0);
|
||||
mark_range_bytes = file_size - (left_mark < marks_count ? marks_loader.getMark(left_mark).offset_in_compressed_file : 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
mark_range_bytes = getMark(right_mark).offset_in_compressed_file - getMark(left_mark).offset_in_compressed_file;
|
||||
mark_range_bytes = marks_loader.getMark(right_mark).offset_in_compressed_file - marks_loader.getMark(left_mark).offset_in_compressed_file;
|
||||
}
|
||||
|
||||
max_mark_range_bytes = std::max(max_mark_range_bytes, mark_range_bytes);
|
||||
@ -101,16 +103,11 @@ MergeTreeReaderStream::MergeTreeReaderStream(
|
||||
}
|
||||
|
||||
|
||||
const MarkInCompressedFile & MergeTreeReaderStream::getMark(size_t index)
|
||||
void MergeTreeReaderStream::initMarksLoader()
|
||||
{
|
||||
if (!marks)
|
||||
loadMarks();
|
||||
return (*marks)[index];
|
||||
}
|
||||
if (marks_loader.initialized())
|
||||
return;
|
||||
|
||||
|
||||
void MergeTreeReaderStream::loadMarks()
|
||||
{
|
||||
std::string mrk_path = index_granularity_info->getMarksFilePath(path_prefix);
|
||||
|
||||
auto load = [&]() -> MarkCache::MappedPtr
|
||||
@ -153,31 +150,13 @@ void MergeTreeReaderStream::loadMarks()
|
||||
return res;
|
||||
};
|
||||
|
||||
if (mark_cache)
|
||||
{
|
||||
auto key = mark_cache->hash(mrk_path);
|
||||
if (save_marks_in_cache)
|
||||
{
|
||||
marks = mark_cache->getOrSet(key, load);
|
||||
}
|
||||
else
|
||||
{
|
||||
marks = mark_cache->get(key);
|
||||
if (!marks)
|
||||
marks = load();
|
||||
}
|
||||
}
|
||||
else
|
||||
marks = load();
|
||||
|
||||
if (!marks)
|
||||
throw Exception("Failed to load marks: " + mrk_path, ErrorCodes::LOGICAL_ERROR);
|
||||
marks_loader = MergeTreeMarksLoader{mark_cache, mrk_path, load, save_marks_in_cache};
|
||||
}
|
||||
|
||||
|
||||
void MergeTreeReaderStream::seekToMark(size_t index)
|
||||
{
|
||||
MarkInCompressedFile mark = getMark(index);
|
||||
MarkInCompressedFile mark = marks_loader.getMark(index);
|
||||
|
||||
try
|
||||
{
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Compression/CachedCompressedReadBuffer.h>
|
||||
#include <Compression/CompressedReadBufferFromFile.h>
|
||||
#include <Storages/MergeTree/MergeTreeReaderSettings.h>
|
||||
#include <Storages/MergeTree/MergeTreeMarksLoader.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -33,7 +34,7 @@ private:
|
||||
/// NOTE: lazily loads marks from the marks cache.
|
||||
const MarkInCompressedFile & getMark(size_t index);
|
||||
|
||||
void loadMarks();
|
||||
void initMarksLoader();
|
||||
|
||||
std::string path_prefix;
|
||||
std::string data_file_extension;
|
||||
@ -48,5 +49,7 @@ private:
|
||||
|
||||
std::unique_ptr<CachedCompressedReadBuffer> cached_buffer;
|
||||
std::unique_ptr<CompressedReadBufferFromFile> non_cached_buffer;
|
||||
|
||||
MergeTreeMarksLoader marks_loader;
|
||||
};
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user