ClickHouse/dbms/Storages/MergeTree/MergeTreeDataPartCompact.h

74 lines
2.8 KiB
C++
Raw Normal View History

2019-10-10 16:30:30 +00:00
#pragma once
#include <Storages/MergeTree/IMergeTreeDataPart.h>
namespace DB
{
2020-02-19 14:07:36 +00:00
/** In compact format all columns are stored in one file (`data.bin`).
* Data is splitted in granules and columns are serialized sequentially in one granule.
* Granules are written one by one in data file.
* Marks are also stored in single file (`data.mrk3`).
* In compact format one mark is an array of marks for every column and a number of rows in granule.
* Format of other data part files is not changed.
* It's considered to store only small parts in compact format (up to 10M).
* NOTE: Compact parts aren't supported for tables with non-adaptive granularty.
* NOTE: In compact part compressed and uncompressed size of single column is unknown.
*/
2019-10-10 16:30:30 +00:00
class MergeTreeDataPartCompact : public IMergeTreeDataPart
{
public:
2019-12-16 14:51:19 +00:00
static constexpr auto DATA_FILE_NAME = "data";
static constexpr auto DATA_FILE_EXTENSION = ".bin";
2020-02-06 15:32:00 +00:00
static constexpr auto TEMP_FILE_SUFFIX = "_temp";
2020-01-13 14:53:32 +00:00
static constexpr auto DATA_FILE_NAME_WITH_EXTENSION = "data.bin";
2019-12-16 14:51:19 +00:00
2019-10-16 18:27:53 +00:00
MergeTreeDataPartCompact(
2019-10-10 16:30:30 +00:00
const MergeTreeData & storage_,
const String & name_,
const MergeTreePartInfo & info_,
2019-12-19 13:10:57 +00:00
const DiskPtr & disk_,
2019-10-16 18:27:53 +00:00
const std::optional<String> & relative_path_ = {});
2019-10-10 16:30:30 +00:00
MergeTreeDataPartCompact(
2019-10-10 16:30:30 +00:00
MergeTreeData & storage_,
const String & name_,
2019-12-19 13:10:57 +00:00
const DiskPtr & disk_,
2019-10-16 18:27:53 +00:00
const std::optional<String> & relative_path_ = {});
2019-10-10 16:30:30 +00:00
MergeTreeReaderPtr getReader(
const NamesAndTypesList & columns,
const MarkRanges & mark_ranges,
UncompressedCache * uncompressed_cache,
MarkCache * mark_cache,
const MergeTreeReaderSettings & reader_settings_,
2020-03-18 03:27:32 +00:00
const ValueSizeMap & avg_value_size_hints,
const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override;
2019-10-21 15:33:59 +00:00
MergeTreeWriterPtr getWriter(
const NamesAndTypesList & columns_list,
2019-11-07 11:11:38 +00:00
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
const CompressionCodecPtr & default_codec_,
const MergeTreeWriterSettings & writer_settings,
2020-03-18 03:27:32 +00:00
const MergeTreeIndexGranularity & computed_index_granularity) const override;
2019-10-10 16:30:30 +00:00
bool isStoredOnDisk() const override { return true; }
2019-12-12 18:55:19 +00:00
bool hasColumnFiles(const String & column_name, const IDataType & type) const override;
2019-12-19 13:10:57 +00:00
String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return DATA_FILE_NAME; }
2019-11-18 12:22:27 +00:00
~MergeTreeDataPartCompact() override;
2019-10-10 16:30:30 +00:00
private:
2020-01-16 16:15:01 +00:00
void checkConsistency(bool require_part_metadata) const override;
2019-10-10 16:30:30 +00:00
/// Loads marks index granularity into memory
2019-10-31 14:44:17 +00:00
void loadIndexGranularity() override;
2020-03-23 12:19:43 +00:00
/// Compact parts doesn't support per column size, only total size
void calculateEachColumnSizesOnDisk(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const override;
2019-10-10 16:30:30 +00:00
};
}