mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-19 14:11:58 +00:00
Add severals ifs for format_version, but have to move this routine to single class
This commit is contained in:
parent
6f5fef5344
commit
0a64ef5de8
@ -257,7 +257,7 @@ public:
|
||||
|
||||
/// Size of memory, allocated for column.
|
||||
/// This is greater or equals to byteSize due to memory reservation in containers.
|
||||
/// Zero, if could be determined.
|
||||
/// Zero, if could not be determined.
|
||||
virtual size_t allocatedBytes() const = 0;
|
||||
|
||||
/// If the column contains subcolumns (such as Array, Nullable, etc), do callback on them.
|
||||
|
@ -17,11 +17,12 @@ struct MarkInCompressedFile
|
||||
{
|
||||
size_t offset_in_compressed_file;
|
||||
size_t offset_in_decompressed_block;
|
||||
size_t index_granularity;
|
||||
|
||||
bool operator==(const MarkInCompressedFile & rhs) const
|
||||
{
|
||||
return std::tie(offset_in_compressed_file, offset_in_decompressed_block)
|
||||
== std::tie(rhs.offset_in_compressed_file, rhs.offset_in_decompressed_block);
|
||||
return std::tie(offset_in_compressed_file, offset_in_decompressed_block, index_granularity)
|
||||
== std::tie(rhs.offset_in_compressed_file, rhs.offset_in_decompressed_block, rhs.index_granularity);
|
||||
}
|
||||
bool operator!=(const MarkInCompressedFile & rhs) const
|
||||
{
|
||||
@ -30,7 +31,7 @@ struct MarkInCompressedFile
|
||||
|
||||
String toString() const
|
||||
{
|
||||
return "(" + DB::toString(offset_in_compressed_file) + "," + DB::toString(offset_in_decompressed_block) + ")";
|
||||
return "(" + DB::toString(offset_in_compressed_file) + "," + DB::toString(offset_in_decompressed_block) + "," + DB::toString(index_granularity) + ")";
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -114,7 +114,6 @@ MergeTreeData::MergeTreeData(
|
||||
data_parts_by_info(data_parts_indexes.get<TagByInfo>()),
|
||||
data_parts_by_state_and_info(data_parts_indexes.get<TagByStateAndInfo>())
|
||||
{
|
||||
std::cerr << "LOADING PART\n";
|
||||
/// NOTE: using the same columns list as is read when performing actual merges.
|
||||
merging_params.check(getColumns().getAllPhysical());
|
||||
|
||||
|
@ -23,7 +23,10 @@
|
||||
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
#define MERGE_TREE_MARK_SIZE (2 * sizeof(UInt64))
|
||||
namespace {
|
||||
constexpr auto MERGE_TREE_MARK_SIZE_FIXED_INDEX_GRANULARITY = 2 * sizeof(UInt64);
|
||||
constexpr auto MERGE_TREE_MARK_SIZE_ADAPTIVE_INDEX_GRANULARITY = 3 * sizeof(UInt64);
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -195,10 +198,12 @@ String MergeTreeDataPart::getColumnNameWithMinumumCompressedSize() const
|
||||
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
std::cerr << "Searching for column:" << column.name << std::endl;
|
||||
if (!hasColumnFiles(column.name))
|
||||
continue;
|
||||
|
||||
const auto size = getColumnSize(column.name, *column.type).data_compressed;
|
||||
std::cerr << "Column size:" <<size<<std::endl;
|
||||
if (size < minimum_size)
|
||||
{
|
||||
minimum_size = size;
|
||||
@ -444,8 +449,13 @@ void MergeTreeDataPart::loadIndex()
|
||||
if (columns.empty())
|
||||
throw Exception("No columns in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART);
|
||||
|
||||
marks_count = Poco::File(getFullPath() + escapeForFileName(columns.front().name) + ".mrk")
|
||||
.getSize() / MERGE_TREE_MARK_SIZE;
|
||||
if (storage.format_version < MERGE_TREE_MARK_SIZE_ADAPTIVE_INDEX_GRANULARITY)
|
||||
marks_count = Poco::File(getFullPath() + escapeForFileName(columns.front().name) + ".mrk")
|
||||
.getSize() / MERGE_TREE_MARK_SIZE_FIXED_INDEX_GRANULARITY;
|
||||
else
|
||||
marks_count = Poco::File(getFullPath() + escapeForFileName(columns.front().name) + ".mrk2")
|
||||
.getSize() / MERGE_TREE_MARK_SIZE_ADAPTIVE_INDEX_GRANULARITY;
|
||||
|
||||
}
|
||||
|
||||
size_t key_size = storage.primary_sort_columns.size();
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/MemoryTracker.h>
|
||||
#include <IO/CachedCompressedReadBuffer.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataFormatVersion.h>
|
||||
#include <IO/CompressedReadBufferFromFile.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Interpreters/evaluateMissingDefaults.h>
|
||||
@ -162,9 +163,10 @@ MergeTreeReader::Stream::Stream(
|
||||
MarkCache * mark_cache_, bool save_marks_in_cache_,
|
||||
UncompressedCache * uncompressed_cache,
|
||||
size_t aio_threshold, size_t max_read_buffer_size,
|
||||
const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type)
|
||||
const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type,
|
||||
MergeTreeDataFormatVersion format_version_)
|
||||
: path_prefix(path_prefix_), extension(extension_), marks_count(marks_count_)
|
||||
, mark_cache(mark_cache_), save_marks_in_cache(save_marks_in_cache_)
|
||||
, mark_cache(mark_cache_), save_marks_in_cache(save_marks_in_cache_), format_version(format_version_)
|
||||
{
|
||||
/// Compute the size of the buffer.
|
||||
size_t max_mark_range = 0;
|
||||
@ -263,6 +265,8 @@ const MarkInCompressedFile & MergeTreeReader::Stream::getMark(size_t index)
|
||||
void MergeTreeReader::Stream::loadMarks()
|
||||
{
|
||||
std::string path = path_prefix + ".mrk";
|
||||
if (format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_ADAPTIVE_INDEX_GRANULARITY)
|
||||
path += "2";
|
||||
|
||||
auto load = [&]() -> MarkCache::MappedPtr
|
||||
{
|
||||
@ -270,7 +274,11 @@ void MergeTreeReader::Stream::loadMarks()
|
||||
auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock();
|
||||
|
||||
size_t file_size = Poco::File(path).getSize();
|
||||
size_t expected_file_size = sizeof(MarkInCompressedFile) * marks_count;
|
||||
size_t expected_file_size;
|
||||
if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_ADAPTIVE_INDEX_GRANULARITY)
|
||||
expected_file_size = (sizeof(MarkInCompressedFile) - sizeof(size_t)) * marks_count;
|
||||
else
|
||||
expected_file_size = sizeof(MarkInCompressedFile) * marks_count;
|
||||
if (expected_file_size != file_size)
|
||||
throw Exception(
|
||||
"bad size of marks file `" + path + "':" + std::to_string(file_size) + ", must be: " + std::to_string(expected_file_size),
|
||||
@ -375,7 +383,7 @@ void MergeTreeReader::addStreams(const String & name, const IDataType & type, co
|
||||
streams.emplace(stream_name, std::make_unique<Stream>(
|
||||
path + stream_name, DATA_FILE_EXTENSION, data_part->marks_count,
|
||||
all_mark_ranges, mark_cache, save_marks_in_cache,
|
||||
uncompressed_cache, aio_threshold, max_read_buffer_size, profile_callback, clock_type));
|
||||
uncompressed_cache, aio_threshold, max_read_buffer_size, profile_callback, clock_type, storage.format_version));
|
||||
};
|
||||
|
||||
IDataType::SubstreamPath path;
|
||||
|
@ -63,7 +63,8 @@ private:
|
||||
MarkCache * mark_cache, bool save_marks_in_cache,
|
||||
UncompressedCache * uncompressed_cache,
|
||||
size_t aio_threshold, size_t max_read_buffer_size,
|
||||
const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type);
|
||||
const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type,
|
||||
MergeTreeDataFormatVersion format_version_);
|
||||
|
||||
void seekToMark(size_t index);
|
||||
void seekToStart();
|
||||
@ -87,6 +88,7 @@ private:
|
||||
bool save_marks_in_cache;
|
||||
MarkCache::MappedPtr marks;
|
||||
|
||||
MergeTreeDataFormatVersion format_version;
|
||||
std::unique_ptr<CachedCompressedReadBuffer> cached_buffer;
|
||||
std::unique_ptr<CompressedReadBufferFromFile> non_cached_buffer;
|
||||
};
|
||||
|
@ -119,6 +119,7 @@ void IMergedBlockOutputStream::writeData(
|
||||
serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size;
|
||||
serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part != 0;
|
||||
|
||||
std::cerr << "Index granularity:" << index_granularity << std::endl;
|
||||
size_t size = column.size();
|
||||
size_t prev_mark = 0;
|
||||
while (prev_mark < size)
|
||||
@ -153,8 +154,11 @@ void IMergedBlockOutputStream::writeData(
|
||||
|
||||
writeIntBinary(stream.plain_hashing.count(), stream.marks);
|
||||
writeIntBinary(stream.compressed.offset(), stream.marks);
|
||||
if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_ADAPTIVE_INDEX_GRANULARITY)
|
||||
std::cerr << "Format version:" << storage.format_version << std::endl;
|
||||
if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_ADAPTIVE_INDEX_GRANULARITY) {
|
||||
std::cerr << "Writing index granularity:" << index_granularity << std::endl;
|
||||
writeIntBinary(index_granularity, stream.marks);
|
||||
}
|
||||
}, serialize_settings.path);
|
||||
}
|
||||
|
||||
|
@ -27,3 +27,6 @@ target_link_libraries (get_current_inserts_in_replicated dbms)
|
||||
|
||||
add_executable (get_abandonable_lock_in_all_partitions get_abandonable_lock_in_all_partitions.cpp)
|
||||
target_link_libraries (get_abandonable_lock_in_all_partitions dbms)
|
||||
|
||||
add_executable (marks_file_reader marks_file_reader.cpp)
|
||||
target_link_libraries (marks_file_reader dbms)
|
||||
|
Loading…
Reference in New Issue
Block a user