ClickHouse/dbms/src/Storages/MergeTree/IMergeTreeDataPart.h

364 lines
14 KiB
C++
Raw Normal View History

2016-02-14 04:58:47 +00:00
#pragma once
2019-10-10 16:30:30 +00:00
#include <DataStreams/IBlockInputStream.h>
#include <Core/Row.h>
#include <Core/Block.h>
#include <Core/Types.h>
#include <Core/NamesAndTypes.h>
2019-10-10 16:30:30 +00:00
#include <Storages/IStorage.h>
#include <Storages/MergeTree/MergeTreeIndexGranularity.h>
2019-06-19 10:07:56 +00:00
#include <Storages/MergeTree/MergeTreeIndexGranularityInfo.h>
Data Skipping Indices (#4143) * made index parser * added index parsing * some fixes * added index interface and factory * fixed compilation * ptrs * added indexParts * indextypes * index condition * IndexCondition * added indexes in selectexecutor * fix * changed comment * fix * added granularity * comments * fix * fix * added writing indexes * removed indexpart class * fix * added setSkipIndexes * add rw for MergeTreeIndexes * fixes * upd error * fix * fix * reading * test index * fixed nullptr error * fixed * fix * unique names * asts -> exprlist * minmax index * fix * fixed select * fixed merging * fixed mutation * working minmax * removed test index * fixed style * added indexes to checkDataPart * added tests for minmax index * fixed constructor * fix style * fixed includes * fixed setSkipIndexes * added indexes meta to zookeeper * added parsing * removed throw * alter cmds parse * fix * added alter * fix * alters fix * fix alters * fix "after" * fixed alter * alter fix + test * fixes * upd setSkipIndexes * fixed alter bug with drop all indices * fix metadata editing * new test and repl fix * rm test files * fixed repl alter * fix * fix * indices * MTReadStream * upd test for bug * fix * added useful parsers and ast classes * fix * fix comments * replaced columns * fix * fixed parsing * fixed printing * fix err * basic IndicesDescription * go to IndicesDescr * moved indices * go to indicesDescr * fix test minmax_index* * fixed MT alter * fixed bug with replMT indices storing in zk * rename * refactoring * docs ru * docs ru * docs en * refactor * rename tests * fix docs * refactoring * fix * fix * fix * fixed style * unique idx * unique * fix * better minmax calculation * upd * added getBlock * unique_condition * added termForAST * unique * fixed not * uniqueCondition::mayBeTrueOnGranule * fix * fixed bug with double column * is always true * fix * key set * spaces * test * tests * fix * unique * fix * fix * fixed bug with duplicate column * removed unused data * fix * fixes * __bitSwapLastTwo * fix
2019-02-05 14:50:25 +00:00
#include <Storages/MergeTree/MergeTreeIndices.h>
#include <Storages/MergeTree/MergeTreePartInfo.h>
#include <Storages/MergeTree/MergeTreePartition.h>
#include <Storages/MergeTree/MergeTreeDataPartChecksum.h>
#include <Storages/MergeTree/MergeTreeDataPartTTLInfo.h>
#include <Storages/MergeTree/MergeTreeIOSettings.h>
2019-12-16 14:51:19 +00:00
#include <Storages/MergeTree/AlterAnalysisResult.h>
#include <Storages/MergeTree/KeyCondition.h>
#include <Columns/IColumn.h>
#include <Poco/Path.h>
#include <shared_mutex>
2016-02-14 04:58:47 +00:00
namespace DB
{
2019-10-21 15:33:59 +00:00
struct ColumnSize;
class MergeTreeData;
class IMergeTreeReader;
2019-10-21 17:23:06 +00:00
class IMergeTreeDataPartWriter;
2019-10-10 16:30:30 +00:00
namespace ErrorCodes
{
extern const int NOT_IMPLEMETED;
}
class IMergeTreeDataPart : public std::enable_shared_from_this<IMergeTreeDataPart>
2016-02-14 04:58:47 +00:00
{
2019-10-10 16:30:30 +00:00
public:
using Checksums = MergeTreeDataPartChecksums;
using Checksum = MergeTreeDataPartChecksums::Checksum;
2019-10-10 16:30:30 +00:00
using ValueSizeMap = std::map<std::string, double>;
2016-02-14 04:58:47 +00:00
2019-10-21 15:33:59 +00:00
using MergeTreeReaderPtr = std::unique_ptr<IMergeTreeReader>;
2019-10-21 17:23:06 +00:00
using MergeTreeWriterPtr = std::unique_ptr<IMergeTreeDataPartWriter>;
2019-10-21 15:33:59 +00:00
using ColumnSizeByName = std::unordered_map<std::string, ColumnSize>;
2019-10-10 16:30:30 +00:00
virtual MergeTreeReaderPtr getReader(
const NamesAndTypesList & columns_,
const MarkRanges & mark_ranges,
UncompressedCache * uncompressed_cache,
MarkCache * mark_cache,
const MergeTreeReaderSettings & reader_settings_,
2019-10-10 16:30:30 +00:00
const ValueSizeMap & avg_value_size_hints_ = ValueSizeMap{},
const ReadBufferFromFileBase::ProfileCallback & profile_callback_ = ReadBufferFromFileBase::ProfileCallback{}) const = 0;
2019-10-20 23:36:27 +00:00
virtual MergeTreeWriterPtr getWriter(
const NamesAndTypesList & columns_list,
2019-11-05 11:53:22 +00:00
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
2019-10-20 23:36:27 +00:00
const CompressionCodecPtr & default_codec_,
const MergeTreeWriterSettings & writer_settings,
2019-11-05 11:53:22 +00:00
const MergeTreeIndexGranularity & computed_index_granularity = {}) const = 0;
2019-10-20 23:36:27 +00:00
2019-10-10 16:30:30 +00:00
virtual bool isStoredOnDisk() const = 0;
virtual bool supportsVerticalMerge() const { return false; }
2016-02-14 04:58:47 +00:00
/// NOTE: Returns zeros if column files are not found in checksums.
/// NOTE: You must ensure that no ALTERs are in progress when calculating ColumnSizes.
/// (either by locking columns_lock, or by locking table structure).
virtual ColumnSize getColumnSize(const String & /* name */, const IDataType & /* type */) const { return {}; }
virtual ColumnSize getTotalColumnsSize() const { return {}; }
2019-10-10 16:30:30 +00:00
/// Returns the name of a column with minimum compressed size (as returned by getColumnSize()).
/// If no checksums are present returns the name of the first physically existing column.
2019-11-21 16:10:22 +00:00
virtual String getColumnNameWithMinumumCompressedSize() const { return columns.front().name; }
2019-10-11 15:37:16 +00:00
2019-12-16 14:51:19 +00:00
virtual NameToNameMap createRenameMapForAlter(
AlterAnalysisResult & /* analysis_result */,
const NamesAndTypesList & /* old_columns */) const
{
return {};
}
virtual ~IMergeTreeDataPart();
2019-10-10 16:30:30 +00:00
// virtual Checksums check(
// bool require_checksums,
// const DataTypes & primary_key_data_types, /// Check the primary key. If it is not necessary, pass an empty array.
// const MergeTreeIndices & indices = {}, /// Check skip indices
// std::function<bool()> is_cancelled = []{ return false; })
// {
// return {};
// }
using ColumnToSize = std::map<std::string, UInt64>;
2019-11-18 15:18:50 +00:00
virtual void accumulateColumnSizes(ColumnToSize & column_to_size) const;
2019-10-10 16:30:30 +00:00
2019-11-21 16:10:22 +00:00
using Type = MergeTreeDataPartType;
2019-10-10 16:30:30 +00:00
virtual Type getType() const = 0;
2019-10-31 14:44:17 +00:00
static String typeToString(Type type);
2019-10-10 16:30:30 +00:00
String getTypeName() { return typeToString(getType()); }
IMergeTreeDataPart(
const MergeTreeData & storage_,
const String & name_,
const MergeTreePartInfo & info_,
const DiskSpace::DiskPtr & disk = {},
const std::optional<String> & relative_path = {});
IMergeTreeDataPart(
MergeTreeData & storage_,
const String & name_,
const DiskSpace::DiskPtr & disk = {},
const std::optional<String> & relative_path = {});
void assertOnDisk() const;
void remove() const;
2019-10-16 18:27:53 +00:00
/// Initialize columns (from columns.txt if exists, or create from column files if not).
/// Load checksums from checksums.txt if exists. Load index if required.
void loadColumnsChecksumsIndexes(bool require_columns_checksums, bool check_consistency);
2019-10-28 11:00:29 +00:00
String getMarksFileExtension() const { return index_granularity_info.marks_file_extension; }
2018-06-04 11:43:09 +00:00
/// Generate the new name for this part according to `new_part_info` and min/max dates from the old name.
/// This is useful when you want to change e.g. block numbers or the mutation version of the part.
2018-05-23 19:34:37 +00:00
String getNewName(const MergeTreePartInfo & new_part_info) const;
2019-10-31 14:44:17 +00:00
// Block sample_block;
std::optional<size_t> getColumnPosition(const String & column_name) const;
2019-10-31 14:44:17 +00:00
2019-10-10 16:30:30 +00:00
bool contains(const IMergeTreeDataPart & other) const { return info.contains(other.info); }
/// If the partition key includes date column (a common case), these functions will return min and max values for this column.
DayNum getMinDate() const;
DayNum getMaxDate() const;
/// otherwise, if the partition key includes dateTime column (also a common case), these functions will return min and max values for this column.
time_t getMinTime() const;
time_t getMaxTime() const;
2019-11-18 12:22:27 +00:00
void setColumns(const NamesAndTypesList & columns_);
2018-05-23 19:34:37 +00:00
bool isEmpty() const { return rows_count == 0; }
2018-10-17 03:13:00 +00:00
const MergeTreeData & storage;
2016-02-14 04:58:47 +00:00
String name;
MergeTreePartInfo info;
2019-10-16 18:27:53 +00:00
MergeTreeIndexGranularityInfo index_granularity_info;
2019-10-10 16:30:30 +00:00
DiskSpace::DiskPtr disk;
mutable String relative_path;
size_t rows_count = 0;
2019-10-10 16:30:30 +00:00
std::atomic<UInt64> bytes_on_disk {0}; /// 0 - if not counted;
/// Is used from several threads without locks (it is changed with ALTER).
/// May not contain size of checksums.txt and columns.txt
2019-10-10 16:30:30 +00:00
time_t modification_time = 0;
/// When the part is removed from the working set. Changes once.
mutable std::atomic<time_t> remove_time { std::numeric_limits<time_t>::max() };
2016-02-14 04:58:47 +00:00
2017-04-16 15:00:33 +00:00
/// If true, the destructor will delete the directory with the part.
bool is_temp = false;
2016-02-14 04:58:47 +00:00
/// If true it means that there are no ZooKeeper node for this part, so it should be deleted only from filesystem
bool is_duplicate = false;
/// Frozen by ALTER TABLE ... FREEZE ... It is used for information purposes in system.parts table.
mutable std::atomic<bool> is_frozen {false};
/**
* Part state is a stage of its lifetime. States are ordered and state of a part could be increased only.
* Part state should be modified under data_parts mutex.
*
* Possible state transitions:
2019-09-04 18:26:18 +00:00
* Temporary -> Precommitted: we are trying to commit a fetched, inserted or merged part to active set
* Precommitted -> Outdated: we could not to add a part to active set and doing a rollback (for example it is duplicated part)
* Precommitted -> Commited: we successfully committed a part to active dataset
* Precommitted -> Outdated: a part was replaced by a covering part or DROP PARTITION
* Outdated -> Deleting: a cleaner selected this part for deletion
* Deleting -> Outdated: if an ZooKeeper error occurred during the deletion, we will retry deletion
* Committed -> DeleteOnDestroy if part was moved to another disk
*/
enum class State
{
2019-09-04 18:26:18 +00:00
Temporary, /// the part is generating now, it is not in data_parts list
PreCommitted, /// the part is in data_parts, but not used for SELECTs
Committed, /// active data part, used by current and upcoming SELECTs
Outdated, /// not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes
Deleting, /// not active data part with identity refcounter, it is deleting right now by a cleaner
DeleteOnDestroy, /// part was moved to another disk and should be deleted in own destructor
};
using TTLInfo = MergeTreeDataPartTTLInfo;
using TTLInfos = MergeTreeDataPartTTLInfos;
TTLInfos ttl_infos;
/// Current state of the part. If the part is in working set already, it should be accessed via data_parts mutex
mutable State state{State::Temporary};
/// Returns name of state
static String stateToString(State state);
String stateString() const;
String getNameWithState() const
{
return name + " (state " + stateString() + ")";
}
/// Returns true if state of part is one of affordable_states
bool checkState(const std::initializer_list<State> & affordable_states) const
{
for (auto affordable_state : affordable_states)
{
if (state == affordable_state)
return true;
}
return false;
}
/// Throws an exception if state of the part is not in affordable_states
void assertState(const std::initializer_list<State> & affordable_states) const;
/// Primary key (correspond to primary.idx file).
/// Always loaded in RAM. Contains each index_granularity-th value of primary key tuple.
/// Note that marks (also correspond to primary key) is not always in RAM, but cached. See MarkCache.h.
using Index = Columns;
Index index;
2016-02-14 04:58:47 +00:00
MergeTreePartition partition;
2018-11-15 14:06:54 +00:00
/// Amount of rows between marks
/// As index always loaded into memory
MergeTreeIndexGranularity index_granularity;
2018-11-15 14:06:54 +00:00
/// Index that for each part stores min and max values of a set of columns. This allows quickly excluding
/// parts based on conditions on these columns imposed by a query.
/// Currently this index is built using only columns required by partition expression, but in principle it
/// can be built using any set of columns.
struct MinMaxIndex
{
/// A direct product of ranges for each key column. See Storages/MergeTree/KeyCondition.cpp for details.
std::vector<Range> parallelogram;
bool initialized = false;
public:
MinMaxIndex() = default;
/// For month-based partitioning.
MinMaxIndex(DayNum min_date, DayNum max_date)
: parallelogram(1, Range(min_date, true, max_date, true))
, initialized(true)
{
}
void load(const MergeTreeData & storage, const String & part_path);
void store(const MergeTreeData & storage, const String & part_path, Checksums & checksums) const;
void store(const Names & column_names, const DataTypes & data_types, const String & part_path, Checksums & checksums) const;
void update(const Block & block, const Names & column_names);
void merge(const MinMaxIndex & other);
};
MinMaxIndex minmax_idx;
Checksums checksums;
2016-02-14 04:58:47 +00:00
/// Columns description.
NamesAndTypesList columns;
2016-02-14 04:58:47 +00:00
/// Columns with values, that all have been zeroed by expired ttl
2019-10-10 16:30:30 +00:00
NameSet expired_columns;
2016-02-14 04:58:47 +00:00
2017-04-16 15:00:33 +00:00
/** It is blocked for writing when changing columns, checksums or any part files.
* Locked to read when reading columns, checksums or any part files.
*/
mutable std::shared_mutex columns_lock;
2016-02-14 04:58:47 +00:00
2017-04-16 15:00:33 +00:00
/** It is taken for the whole time ALTER a part: from the beginning of the recording of the temporary files to their renaming to permanent.
* It is taken with unlocked `columns_lock`.
*
2017-04-16 15:00:33 +00:00
* NOTE: "You can" do without this mutex if you could turn ReadRWLock into WriteRWLock without removing the lock.
* This transformation is impossible, because it would create a deadlock, if you do it from two threads at once.
* Taking this mutex means that we want to lock columns_lock on read with intention then, not
* unblocking, block it for writing.
*/
mutable std::mutex alter_mutex;
2016-02-14 04:58:47 +00:00
ColumnSizeByName columns_sizes;
2019-10-10 16:30:30 +00:00
/// For data in RAM ('index')
UInt64 getIndexSizeInBytes() const;
UInt64 getIndexSizeInAllocatedBytes() const;
UInt64 getMarksCount() const;
2019-10-10 16:30:30 +00:00
size_t getFileSizeOrZero(const String & file_name) const;
String getFullPath() const;
void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists = false) const;
void renameToDetached(const String & prefix) const;
void makeCloneInDetached(const String & prefix) const;
void makeCloneOnDiskDetached(const DiskSpace::ReservationPtr & reservation) const;
2019-06-07 19:16:42 +00:00
/// Checks that .bin and .mrk files exist
2019-12-09 21:21:17 +00:00
virtual bool hasColumnFiles(const String & /* column */, const IDataType & /* type */ ) const { return true; }
2019-10-10 16:30:30 +00:00
static UInt64 calculateTotalSizeOnDisk(const String & from);
2017-08-16 19:22:49 +00:00
2019-11-18 12:22:27 +00:00
protected:
2019-11-18 12:22:27 +00:00
void removeIfNeeded();
2019-11-18 15:18:50 +00:00
virtual void checkConsistency(bool require_part_metadata) const;
2019-11-18 12:22:27 +00:00
2017-08-16 19:22:49 +00:00
private:
2019-11-18 12:22:27 +00:00
Block sample_block;
2019-10-31 14:44:17 +00:00
/// Reads columns names and types from columns.txt
void loadColumns(bool require);
/// If checksums.txt exists, reads files' checksums (and sizes) from it
void loadChecksums(bool require);
/// Loads marks index granularity into memory
virtual void loadIndexGranularity();
/// Loads index file.
void loadIndex();
/// Load rows count for this part from disk (for the newer storage format version).
/// For the older format version calculates rows count from the size of a column with a fixed size.
void loadRowsCount();
/// Loads ttl infos in json format from file ttl.txt. If file doesn`t exists assigns ttl infos with all zeros
void loadTTLInfos();
void loadPartitionAndMinMaxIndex();
void loadColumnSizes();
2019-10-10 16:30:30 +00:00
String getRelativePathForDetachedPart(const String & prefix) const;
2016-02-14 04:58:47 +00:00
};
2019-10-10 16:30:30 +00:00
using MergeTreeDataPartState = IMergeTreeDataPart::State;
2019-10-21 15:33:59 +00:00
using MergeTreeDataPartPtr = std::shared_ptr<const IMergeTreeDataPart>;
bool isCompactPart(const MergeTreeDataPartPtr & data_part);
bool isWidePart(const MergeTreeDataPartPtr & data_part);
2016-02-14 04:58:47 +00:00
}