ClickHouse/dbms/src/Storages/MergeTree/MergeTreeReaderCompact.cpp

#include <Storages/MergeTree/MergeTreeReaderCompact.h>
#include <Storages/MergeTree/MergeTreeDataPartCompact.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/NestedUtils.h>
#include <Poco/File.h>

namespace DB
{

namespace ErrorCodes
{
    extern const int LOGICAL_ERROR;
    extern const int NOT_FOUND_EXPECTED_DATA_PART;
    extern const int MEMORY_LIMIT_EXCEEDED;
    extern const int ARGUMENT_OUT_OF_BOUND;
}

MergeTreeReaderCompact::MergeTreeReaderCompact(const MergeTreeData::DataPartPtr & data_part_,
    const NamesAndTypesList & columns_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_,
    const MarkRanges & mark_ranges_, const MergeTreeReaderSettings & settings_, const ValueSizeMap & avg_value_size_hints_)
    : IMergeTreeReader(data_part_, columns_
    , uncompressed_cache_, mark_cache_, mark_ranges_
    , settings_, avg_value_size_hints_)
{
    initMarksLoader();
    size_t buffer_size = settings.max_read_buffer_size;
    const String full_data_path = path + MergeTreeDataPartCompact::DATA_FILE_NAME + MergeTreeDataPartCompact::DATA_FILE_EXTENSION;

    if (uncompressed_cache)
    {
        auto buffer = std::make_unique<CachedCompressedReadBuffer>(
            full_data_path, uncompressed_cache, 0, settings.min_bytes_to_use_direct_io, buffer_size);

        // if (profile_callback)
        //     buffer->setProfileCallback(profile_callback, clock_type);

        cached_buffer = std::move(buffer);
        data_buffer = cached_buffer.get();
    }
    else
    {
        auto buffer = std::make_unique<CompressedReadBufferFromFile>(
            full_data_path, 0, settings.min_bytes_to_use_direct_io, buffer_size);

        // if (profile_callback)
        //     buffer->setProfileCallback(profile_callback, clock_type);

        non_cached_buffer = std::move(buffer);
        data_buffer = non_cached_buffer.get();
    }

    size_t columns_num = columns.size();

    column_positions.resize(columns_num);
    read_only_offsets.resize(columns_num);
    auto name_and_type = columns.begin();
    for (size_t i = 0; i < columns_num; ++i, ++name_and_type)
    {
        const auto & [name, type] = *name_and_type;
        auto position = data_part->getColumnPosition(name);

        /// If array of Nested column is missing in part,
        ///  we have to read it's offsets if they exists.
        if (!position && typeid_cast<const DataTypeArray *>(type.get()))
        {
            position = findColumnForOffsets(name);
            read_only_offsets[i] = (position != std::nullopt);
        }

        column_positions[i] = std::move(position);
    }

}

size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns)
{
    /// FIXME compute correct granularity

    if (continue_reading)
        from_mark = next_mark;

    size_t read_rows = 0;
    size_t num_columns = columns.size();

    while (read_rows < max_rows_to_read)
    {
        size_t rows_to_read = data_part->index_granularity.getMarkRows(from_mark);

        auto name_and_type = columns.begin();
        for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type)
        {
            auto & [name, type] = *name_and_type;

            if (!column_positions[pos])
                continue;

            bool append = res_columns[pos] != nullptr;
            if (!append)
                res_columns[pos] = name_and_type->type->createColumn();

            /// To keep offsets shared. TODO Very dangerous. Get rid of this.
            MutableColumnPtr column = res_columns[pos]->assumeMutable();

            try
            {
                size_t column_size_before_reading = column->size();

                readData(*column, *type, from_mark, *column_positions[pos], rows_to_read, read_only_offsets[pos]);

                size_t read_rows_in_column = column->size() - column_size_before_reading;
                if (read_rows_in_column < rows_to_read)
                    throw Exception("Cannot read all data in MergeTreeReaderCompact. Rows read: " + toString(read_rows_in_column) +
                        ". Rows expected: " + toString(rows_to_read) + ".", ErrorCodes::CANNOT_READ_ALL_DATA);

                /// For elements of Nested, column_size_before_reading may be greater than column size
                ///  if offsets are not empty and were already read, but elements are empty.
                /// FIXME
                // if (column->size())
                //     read_rows_in_mark = std::max(read_rows, column->size() - column_size_before_reading);
            }
            catch (Exception & e)
            {
                /// Better diagnostics.
                e.addMessage("(while reading column " + name + ")");
                throw;
            }

            if (column->size())
                res_columns[pos] = std::move(column);
            else
                res_columns[pos] = nullptr;
        }

        ++from_mark;
        read_rows += rows_to_read;
    }

    next_mark = from_mark;

    return read_rows;
}

MergeTreeReaderCompact::ColumnPosition MergeTreeReaderCompact::findColumnForOffsets(const String & column_name)
{
    String table_name = Nested::extractTableName(column_name);
    for (const auto & part_column : data_part->columns)
    {
        if (typeid_cast<const DataTypeArray *>(part_column.type.get()))
        {
            auto position = data_part->getColumnPosition(part_column.name);
            if (position && Nested::extractTableName(part_column.name) == table_name)
                return position;
        }
    }

    return {};
}


void MergeTreeReaderCompact::readData(
    IColumn & column, const IDataType & type,
    size_t from_mark, size_t column_position, size_t rows_to_read, bool only_offsets)
{
    if (!isContinuousReading(from_mark, column_position))
        seekToMark(from_mark, column_position);

    auto buffer_getter = [&](const IDataType::SubstreamPath & substream_path) -> ReadBuffer *
    {
        if (only_offsets && (substream_path.size() != 1 || substream_path[0].type != IDataType::Substream::ArraySizes))
            return nullptr;

        return data_buffer;
    };

    IDataType::DeserializeBinaryBulkSettings deserialize_settings;
    deserialize_settings.getter = buffer_getter;
    // deserialize_settings.avg_value_size_hint = avg_value_size_hints[name];
    deserialize_settings.position_independent_encoding = true;

    IDataType::DeserializeBinaryBulkStatePtr state;
    type.deserializeBinaryBulkStatePrefix(deserialize_settings, state);
    type.deserializeBinaryBulkWithMultipleStreams(column, rows_to_read, deserialize_settings, state);

    /// The buffer is left in inconsistent state after reading single offsets
    if (only_offsets)
        last_read_granule.reset();
    else
        last_read_granule.emplace(from_mark, column_position);
}


void MergeTreeReaderCompact::initMarksLoader()
{
    if (marks_loader.initialized())
        return;

    size_t columns_num = data_part->columns.size();

    auto load = [this, columns_num](const String & mrk_path) -> MarkCache::MappedPtr
    {
        size_t file_size = Poco::File(mrk_path).getSize();
        size_t marks_count = data_part->getMarksCount();
        size_t mark_size_in_bytes = data_part->index_granularity_info.getMarkSizeInBytes(columns_num);

        size_t expected_file_size = mark_size_in_bytes * marks_count;
        if (expected_file_size != file_size)
            throw Exception(
                "Bad size of marks file '" + mrk_path + "': " + std::to_string(file_size) + ", must be: " + std::to_string(expected_file_size),
                ErrorCodes::CORRUPTED_DATA);

        /// Memory for marks must not be accounted as memory usage for query, because they are stored in shared cache.
        auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock();

        auto res = std::make_shared<MarksInCompressedFile>(marks_count * columns_num);

        ReadBufferFromFile buffer(mrk_path, file_size);
        size_t i = 0;

        while (!buffer.eof())
        {
            buffer.readStrict(reinterpret_cast<char *>(res->data() + i * columns_num), sizeof(MarkInCompressedFile) * columns_num);
            buffer.seek(sizeof(size_t), SEEK_CUR);
            ++i;
        }

        if (i * mark_size_in_bytes != file_size)
            throw Exception("Cannot read all marks from file " + mrk_path, ErrorCodes::CANNOT_READ_ALL_DATA);

        res->protect();
        return res;
    };

    auto mrk_path = data_part->index_granularity_info.getMarksFilePath(path + MergeTreeDataPartCompact::DATA_FILE_NAME);
    marks_loader = MergeTreeMarksLoader{mark_cache, std::move(mrk_path), load, settings.save_marks_in_cache, columns_num};
}

void MergeTreeReaderCompact::seekToMark(size_t row_index, size_t column_index)
{
    MarkInCompressedFile mark = marks_loader.getMark(row_index, column_index);
    try
    {
        if (cached_buffer)
            cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block);
        if (non_cached_buffer)
            non_cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block);
    }
    catch (Exception & e)
    {
        /// Better diagnostics.
        if (e.code() == ErrorCodes::ARGUMENT_OUT_OF_BOUND)
            e.addMessage("(while seeking to mark (" + toString(row_index) + ", " + toString(column_index) + ")");

        throw;
    }
}


bool MergeTreeReaderCompact::isContinuousReading(size_t mark, size_t column_position)
{
    if (!last_read_granule)
        return false;
    const auto & [last_mark, last_column] = *last_read_granule;
    return (mark == last_mark && column_position == last_column + 1)
        || (mark == last_mark + 1 && column_position == 0 && last_column == data_part->columns.size() - 1);
}

}
polymorphic parts (development) 2019-10-11 15:37:16 +00:00			`#include <Storages/MergeTree/MergeTreeReaderCompact.h>`
polymorphic parts (development) alter 2019-12-16 14:51:19 +00:00			`#include <Storages/MergeTree/MergeTreeDataPartCompact.h>`
fix reading of nested columns in compact format 2020-01-09 15:46:32 +00:00			`#include <DataTypes/DataTypeArray.h>`
			`#include <DataTypes/NestedUtils.h>`
polymorphic parts (development) 2019-10-11 15:37:16 +00:00			`#include <Poco/File.h>`

			`namespace DB`
			`{`

polymorphic parts (development) 2019-10-16 18:27:53 +00:00			`namespace ErrorCodes`
			`{`
			`extern const int LOGICAL_ERROR;`
			`extern const int NOT_FOUND_EXPECTED_DATA_PART;`
			`extern const int MEMORY_LIMIT_EXCEEDED;`
			`extern const int ARGUMENT_OUT_OF_BOUND;`
			`}`

polymorphic parts (development) 2019-10-11 15:37:16 +00:00			`MergeTreeReaderCompact::MergeTreeReaderCompact(const MergeTreeData::DataPartPtr & data_part_,`
			`const NamesAndTypesList & columns_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_,`
polymorphic parts (development) cleanup 2019-12-18 15:54:45 +00:00			`const MarkRanges & mark_ranges_, const MergeTreeReaderSettings & settings_, const ValueSizeMap & avg_value_size_hints_)`
polymorphic parts (development) 2019-10-11 15:37:16 +00:00			`: IMergeTreeReader(data_part_, columns_`
			`, uncompressed_cache_, mark_cache_, mark_ranges_`
			`, settings_, avg_value_size_hints_)`
			`{`
polymorphic parts (development) 2019-11-20 13:33:41 +00:00			`initMarksLoader();`
polymorphic parts (development) 2019-10-16 18:27:53 +00:00			`size_t buffer_size = settings.max_read_buffer_size;`
polymorphic parts (development) alter 2019-12-16 14:51:19 +00:00			`const String full_data_path = path + MergeTreeDataPartCompact::DATA_FILE_NAME + MergeTreeDataPartCompact::DATA_FILE_EXTENSION;`
polymorphic parts (development) 2019-10-16 18:27:53 +00:00
			`if (uncompressed_cache)`
			`{`
			`auto buffer = std::make_unique<CachedCompressedReadBuffer>(`
polymorphic parts (development) alter 2019-12-16 14:51:19 +00:00			`full_data_path, uncompressed_cache, 0, settings.min_bytes_to_use_direct_io, buffer_size);`
polymorphic parts (development) 2019-10-16 18:27:53 +00:00
			`// if (profile_callback)`
			`// buffer->setProfileCallback(profile_callback, clock_type);`

			`cached_buffer = std::move(buffer);`
			`data_buffer = cached_buffer.get();`
			`}`
			`else`
			`{`
			`auto buffer = std::make_unique<CompressedReadBufferFromFile>(`
polymorphic parts (development) alter 2019-12-16 14:51:19 +00:00			`full_data_path, 0, settings.min_bytes_to_use_direct_io, buffer_size);`
polymorphic parts (development) 2019-10-16 18:27:53 +00:00
			`// if (profile_callback)`
			`// buffer->setProfileCallback(profile_callback, clock_type);`

			`non_cached_buffer = std::move(buffer);`
			`data_buffer = non_cached_buffer.get();`
			`}`
polymorphic parts (development) alter 2019-12-12 18:55:19 +00:00
fix reading of nested columns in compact format 2020-01-09 17:06:34 +00:00			`size_t columns_num = columns.size();`

			`column_positions.resize(columns_num);`
			`read_only_offsets.resize(columns_num);`
			`auto name_and_type = columns.begin();`
			`for (size_t i = 0; i < columns_num; ++i, ++name_and_type)`
			`{`
			`const auto & [name, type] = *name_and_type;`
			`auto position = data_part->getColumnPosition(name);`
fix reading of nested columns in compact format 2020-01-09 17:27:44 +00:00
			`/// If array of Nested column is missing in part,`
			`/// we have to read it's offsets if they exists.`
fix reading of nested columns in compact format 2020-01-09 17:06:34 +00:00			`if (!position && typeid_cast<const DataTypeArray *>(type.get()))`
			`{`
			`position = findColumnForOffsets(name);`
			`read_only_offsets[i] = (position != std::nullopt);`
			`}`

			`column_positions[i] = std::move(position);`
			`}`

polymorphic parts (development) 2019-10-11 15:37:16 +00:00			`}`

merging with master 2019-12-19 13:10:57 +00:00			`size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns)`
polymorphic parts (development) 2019-10-11 15:37:16 +00:00			`{`
polymorphic parts (development) 2019-11-25 11:06:59 +00:00			`/// FIXME compute correct granularity`
polymorphic parts (development) 2019-11-28 20:14:41 +00:00
			`if (continue_reading)`
			`from_mark = next_mark;`
polymorphic parts (development) 2019-10-16 18:27:53 +00:00
polymorphic parts (development) 2019-11-28 20:14:41 +00:00			`size_t read_rows = 0;`
polymorphic parts (development) alter 2019-12-12 18:55:19 +00:00			`size_t num_columns = columns.size();`

polymorphic parts (development) 2019-11-27 11:35:27 +00:00			`while (read_rows < max_rows_to_read)`
polymorphic parts (development) 2019-10-16 18:27:53 +00:00			`{`
polymorphic parts (development) 2019-11-27 11:35:27 +00:00			`size_t rows_to_read = data_part->index_granularity.getMarkRows(from_mark);`
polymorphic parts (development) 2019-10-16 18:27:53 +00:00
merging with master 2019-12-19 13:10:57 +00:00			`auto name_and_type = columns.begin();`
			`for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type)`
polymorphic parts (development) 2019-10-16 18:27:53 +00:00			`{`
fix reading of nested columns in compact format 2020-01-09 15:46:32 +00:00			`auto & [name, type] = *name_and_type;`

fix reading of nested columns in compact format 2020-01-09 17:06:34 +00:00			`if (!column_positions[pos])`
polymorphic parts (development) alter 2019-12-12 18:55:19 +00:00			`continue;`
fix broken by refactoring functionality with wide parts 2019-12-19 14:05:26 +00:00
merging with master 2019-12-19 13:10:57 +00:00			`bool append = res_columns[pos] != nullptr;`
polymorphic parts (development) 2019-11-27 11:35:27 +00:00			`if (!append)`
merging with master 2019-12-19 13:10:57 +00:00			`res_columns[pos] = name_and_type->type->createColumn();`
polymorphic parts (development) 2019-11-27 11:35:27 +00:00
			`/// To keep offsets shared. TODO Very dangerous. Get rid of this.`
merging with master 2019-12-19 13:10:57 +00:00			`MutableColumnPtr column = res_columns[pos]->assumeMutable();`
polymorphic parts (development) 2019-11-27 11:35:27 +00:00
			`try`
			`{`
polymorphic parts (development) 2019-11-28 20:14:41 +00:00			`size_t column_size_before_reading = column->size();`
polymorphic parts (development) alter 2019-12-12 18:55:19 +00:00
fix reading of nested columns in compact format 2020-01-09 17:06:34 +00:00			`readData(column, type, from_mark, *column_positions[pos], rows_to_read, read_only_offsets[pos]);`
fix reading of nested columns in compact format 2020-01-09 15:46:32 +00:00
			`size_t read_rows_in_column = column->size() - column_size_before_reading;`
polymorphic parts (development) 2019-11-28 20:14:41 +00:00			`if (read_rows_in_column < rows_to_read)`
polymorphic parts (development) cleanup 2019-12-18 16:41:11 +00:00			`throw Exception("Cannot read all data in MergeTreeReaderCompact. Rows read: " + toString(read_rows_in_column) +`
polymorphic parts (development) alter 2019-12-12 18:55:19 +00:00			`". Rows expected: " + toString(rows_to_read) + ".", ErrorCodes::CANNOT_READ_ALL_DATA);`
polymorphic parts (development) 2019-11-28 20:14:41 +00:00
polymorphic parts (development) 2019-11-27 11:35:27 +00:00			`/// For elements of Nested, column_size_before_reading may be greater than column size`
			`/// if offsets are not empty and were already read, but elements are empty.`
			`/// FIXME`
			`// if (column->size())`
			`// read_rows_in_mark = std::max(read_rows, column->size() - column_size_before_reading);`
			`}`
			`catch (Exception & e)`
			`{`
			`/// Better diagnostics.`
merging with master 2019-12-19 13:10:57 +00:00			`e.addMessage("(while reading column " + name + ")");`
polymorphic parts (development) 2019-11-27 11:35:27 +00:00			`throw;`
			`}`
polymorphic parts (development) 2019-10-16 18:27:53 +00:00
			`if (column->size())`
merging with master 2019-12-19 13:10:57 +00:00			`res_columns[pos] = std::move(column);`
polymorphic parts (development) 2019-11-27 11:35:27 +00:00			`else`
merging with master 2019-12-19 13:10:57 +00:00			`res_columns[pos] = nullptr;`
polymorphic parts (development) 2019-10-16 18:27:53 +00:00			`}`

polymorphic parts (development) 2019-11-27 11:35:27 +00:00			`++from_mark;`
			`read_rows += rows_to_read;`
polymorphic parts (development) 2019-10-16 18:27:53 +00:00			`}`

polymorphic parts (development) 2019-11-28 20:14:41 +00:00			`next_mark = from_mark;`
polymorphic parts (development) 2019-11-27 11:35:27 +00:00
polymorphic parts (development) 2019-10-16 18:27:53 +00:00			`return read_rows;`
polymorphic parts (development) 2019-10-11 15:37:16 +00:00			`}`

fix reading of nested columns in compact format 2020-01-09 17:06:34 +00:00			`MergeTreeReaderCompact::ColumnPosition MergeTreeReaderCompact::findColumnForOffsets(const String & column_name)`
fix reading of nested columns in compact format 2020-01-09 15:46:32 +00:00			`{`
fix reading of nested columns in compact format 2020-01-09 17:06:34 +00:00			`String table_name = Nested::extractTableName(column_name);`
			`for (const auto & part_column : data_part->columns)`
fix reading of nested columns in compact format 2020-01-09 15:46:32 +00:00			`{`
fix reading of nested columns in compact format 2020-01-09 17:06:34 +00:00			`if (typeid_cast<const DataTypeArray *>(part_column.type.get()))`
fix reading of nested columns in compact format 2020-01-09 15:46:32 +00:00			`{`
fix reading of nested columns in compact format 2020-01-09 17:06:34 +00:00			`auto position = data_part->getColumnPosition(part_column.name);`
			`if (position && Nested::extractTableName(part_column.name) == table_name)`
			`return position;`
fix reading of nested columns in compact format 2020-01-09 15:46:32 +00:00			`}`
			`}`

fix reading of nested columns in compact format 2020-01-09 17:06:34 +00:00			`return {};`
fix reading of nested columns in compact format 2020-01-09 15:46:32 +00:00			`}`

polymorphic parts (development) 2019-10-16 18:27:53 +00:00
			`void MergeTreeReaderCompact::readData(`
reduce number of seeks in ReaderCompact 2019-12-25 17:34:23 +00:00			`IColumn & column, const IDataType & type,`
fix reading of nested columns in compact format 2020-01-09 15:46:32 +00:00			`size_t from_mark, size_t column_position, size_t rows_to_read, bool only_offsets)`
polymorphic parts (development) 2019-10-16 18:27:53 +00:00			`{`
reduce number of seeks in ReaderCompact 2019-12-25 17:34:23 +00:00			`if (!isContinuousReading(from_mark, column_position))`
			`seekToMark(from_mark, column_position);`
polymorphic parts (development) cleanup 2019-12-18 16:41:11 +00:00
fix reading of nested columns in compact format 2020-01-09 17:06:34 +00:00			`auto buffer_getter = [&](const IDataType::SubstreamPath & substream_path) -> ReadBuffer *`
fix reading of nested columns in compact format 2020-01-09 15:46:32 +00:00			`{`
			`if (only_offsets && (substream_path.size() != 1 \|\| substream_path[0].type != IDataType::Substream::ArraySizes))`
			`return nullptr;`

			`return data_buffer;`
			`};`

polymorphic parts (development) 2019-10-16 18:27:53 +00:00			`IDataType::DeserializeBinaryBulkSettings deserialize_settings;`
fix reading of nested columns in compact format 2020-01-09 15:46:32 +00:00			`deserialize_settings.getter = buffer_getter;`
polymorphic parts (development) 2019-11-28 20:14:41 +00:00			`// deserialize_settings.avg_value_size_hint = avg_value_size_hints[name];`
polymorphic parts (development) 2019-12-02 15:21:07 +00:00			`deserialize_settings.position_independent_encoding = true;`
polymorphic parts (development) 2019-10-16 18:27:53 +00:00
			`IDataType::DeserializeBinaryBulkStatePtr state;`
			`type.deserializeBinaryBulkStatePrefix(deserialize_settings, state);`
			`type.deserializeBinaryBulkWithMultipleStreams(column, rows_to_read, deserialize_settings, state);`
reduce number of seeks in ReaderCompact 2019-12-25 17:34:23 +00:00
fix reading of nested columns in compact format 2020-01-09 17:27:44 +00:00			`/// The buffer is left in inconsistent state after reading single offsets`
			`if (only_offsets)`
			`last_read_granule.reset();`
			`else`
			`last_read_granule.emplace(from_mark, column_position);`
polymorphic parts (development) 2019-10-16 18:27:53 +00:00			`}`


polymorphic parts (development) 2019-11-20 13:33:41 +00:00			`void MergeTreeReaderCompact::initMarksLoader()`
polymorphic parts (development) 2019-10-11 15:37:16 +00:00			`{`
polymorphic parts (development) 2019-11-21 16:10:22 +00:00			`if (marks_loader.initialized())`
			`return;`

polymorphic parts (development) 2019-11-20 13:33:41 +00:00			`size_t columns_num = data_part->columns.size();`
polymorphic parts (development) 2019-10-11 15:37:16 +00:00
polymorphic parts (development) 2019-11-25 20:19:43 +00:00			`auto load = [this, columns_num](const String & mrk_path) -> MarkCache::MappedPtr`
polymorphic parts (development) 2019-10-11 15:37:16 +00:00			`{`
			`size_t file_size = Poco::File(mrk_path).getSize();`
polymorphic parts (development) 2019-11-25 11:06:59 +00:00			`size_t marks_count = data_part->getMarksCount();`
refactor code near MergeTreeDataPart 2020-01-14 13:23:51 +00:00			`size_t mark_size_in_bytes = data_part->index_granularity_info.getMarkSizeInBytes(columns_num);`
polymorphic parts (development) 2019-10-31 14:44:17 +00:00
polymorphic parts (development) 2019-11-25 11:06:59 +00:00			`size_t expected_file_size = mark_size_in_bytes * marks_count;`
polymorphic parts (development) 2019-10-11 15:37:16 +00:00			`if (expected_file_size != file_size)`
			`throw Exception(`
			`"Bad size of marks file '" + mrk_path + "': " + std::to_string(file_size) + ", must be: " + std::to_string(expected_file_size),`
			`ErrorCodes::CORRUPTED_DATA);`

			`/// Memory for marks must not be accounted as memory usage for query, because they are stored in shared cache.`
			`auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock();`

polymorphic parts (development) 2019-10-31 14:44:17 +00:00			`auto res = std::make_shared<MarksInCompressedFile>(marks_count * columns_num);`

polymorphic parts (development) 2019-10-11 15:37:16 +00:00			`ReadBufferFromFile buffer(mrk_path, file_size);`
			`size_t i = 0;`

			`while (!buffer.eof())`
			`{`
polymorphic parts (development) 2019-10-31 14:44:17 +00:00			`buffer.readStrict(reinterpret_cast<char >(res->data() + i columns_num), sizeof(MarkInCompressedFile) * columns_num);`
better writer for compact parts 2019-12-27 21:17:53 +00:00			`buffer.seek(sizeof(size_t), SEEK_CUR);`
polymorphic parts (development) 2019-10-11 15:37:16 +00:00			`++i;`
			`}`

polymorphic parts (development) 2019-11-25 11:06:59 +00:00			`if (i * mark_size_in_bytes != file_size)`
polymorphic parts (development) 2019-10-11 15:37:16 +00:00			`throw Exception("Cannot read all marks from file " + mrk_path, ErrorCodes::CANNOT_READ_ALL_DATA);`

			`res->protect();`
			`return res;`
			`};`

polymorphic parts (development) alter 2019-12-16 14:51:19 +00:00			`auto mrk_path = data_part->index_granularity_info.getMarksFilePath(path + MergeTreeDataPartCompact::DATA_FILE_NAME);`
polymorphic parts (development) 2019-11-25 20:19:43 +00:00			`marks_loader = MergeTreeMarksLoader{mark_cache, std::move(mrk_path), load, settings.save_marks_in_cache, columns_num};`
polymorphic parts (development) 2019-10-11 15:37:16 +00:00			`}`

polymorphic parts (development) 2019-11-20 13:33:41 +00:00			`void MergeTreeReaderCompact::seekToMark(size_t row_index, size_t column_index)`
polymorphic parts (development) 2019-10-16 18:27:53 +00:00			`{`
polymorphic parts (development) 2019-11-20 13:33:41 +00:00			`MarkInCompressedFile mark = marks_loader.getMark(row_index, column_index);`
polymorphic parts (development) 2019-10-16 18:27:53 +00:00			`try`
			`{`
			`if (cached_buffer)`
			`cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block);`
			`if (non_cached_buffer)`
			`non_cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block);`
			`}`
			`catch (Exception & e)`
			`{`
			`/// Better diagnostics.`
			`if (e.code() == ErrorCodes::ARGUMENT_OUT_OF_BOUND)`
polymorphic parts (development) 2019-11-20 13:33:41 +00:00			`e.addMessage("(while seeking to mark (" + toString(row_index) + ", " + toString(column_index) + ")");`
polymorphic parts (development) 2019-10-16 18:27:53 +00:00
			`throw;`
			`}`
			`}`


reduce number of seeks in ReaderCompact 2019-12-25 17:34:23 +00:00			`bool MergeTreeReaderCompact::isContinuousReading(size_t mark, size_t column_position)`
polymorphic parts (development) 2019-10-16 18:27:53 +00:00			`{`
reduce number of seeks in ReaderCompact 2019-12-25 17:34:23 +00:00			`if (!last_read_granule)`
			`return false;`
			`const auto & [last_mark, last_column] = *last_read_granule;`
			`return (mark == last_mark && column_position == last_column + 1)`
			`\|\| (mark == last_mark + 1 && column_position == 0 && last_column == data_part->columns.size() - 1);`
polymorphic parts (development) 2019-10-16 18:27:53 +00:00			`}`

polymorphic parts (development) 2019-10-11 15:37:16 +00:00			`}`