2016-02-14 04:58:47 +00:00
|
|
|
#pragma once
|
|
|
|
|
2017-08-19 18:11:20 +00:00
|
|
|
#include <Core/Row.h>
|
2017-08-21 15:35:29 +00:00
|
|
|
#include <Core/Block.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Core/NamesAndTypes.h>
|
2017-08-14 18:16:11 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreePartInfo.h>
|
2017-09-11 17:55:41 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreePartition.h>
|
2018-03-21 20:21:34 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreeDataPartChecksum.h>
|
2017-07-13 16:49:09 +00:00
|
|
|
#include <Columns/IColumn.h>
|
2017-07-28 17:34:02 +00:00
|
|
|
#include <shared_mutex>
|
2016-02-14 04:58:47 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
class MergeTreeData;
|
|
|
|
|
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/// Description of the data part.
|
2017-08-14 18:16:11 +00:00
|
|
|
struct MergeTreeDataPart
|
2016-02-14 04:58:47 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
using Checksums = MergeTreeDataPartChecksums;
|
2017-03-24 13:52:50 +00:00
|
|
|
using Checksum = MergeTreeDataPartChecksums::Checksum;
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-08-16 19:24:50 +00:00
|
|
|
MergeTreeDataPart(MergeTreeData & storage_, const String & name_, const MergeTreePartInfo & info_)
|
|
|
|
: storage(storage_), name(name_), info(info_)
|
|
|
|
{
|
|
|
|
}
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-08-25 20:41:45 +00:00
|
|
|
MergeTreeDataPart(MergeTreeData & storage_, const String & name_);
|
|
|
|
|
2017-12-04 15:44:31 +00:00
|
|
|
const Checksum * tryGetChecksum(const String & name, const String & ext) const;
|
2017-03-24 13:52:50 +00:00
|
|
|
/// Returns checksum of column's binary file.
|
|
|
|
const Checksum * tryGetBinChecksum(const String & name) const;
|
2017-12-04 15:44:31 +00:00
|
|
|
/// Returns checksum of column's mrk file.
|
|
|
|
const Checksum * tryGetMrkChecksum(const String & name) const;
|
2017-03-24 13:52:50 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Returns the size of .bin file for column `name` if found, zero otherwise
|
2018-03-03 15:36:20 +00:00
|
|
|
UInt64 getColumnCompressedSize(const String & name) const;
|
|
|
|
UInt64 getColumnUncompressedSize(const String & name) const;
|
2017-12-04 15:44:31 +00:00
|
|
|
/// Returns the size of .mrk file for column `name` if found, zero otherwise
|
2018-03-03 15:36:20 +00:00
|
|
|
UInt64 getColumnMrkSize(const String & name) const;
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-03-24 13:52:50 +00:00
|
|
|
/// Returns the name of a column with minimum compressed size (as returned by getColumnSize()).
|
|
|
|
/// If no checksums are present returns the name of the first physically existing column.
|
2017-04-01 07:20:54 +00:00
|
|
|
String getColumnNameWithMinumumCompressedSize() const;
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-03-24 13:52:50 +00:00
|
|
|
/// Returns full path to part dir
|
|
|
|
String getFullPath() const;
|
|
|
|
|
2017-05-16 15:40:32 +00:00
|
|
|
/// Returns part->name with prefixes like 'tmp_<name>'
|
|
|
|
String getNameWithPrefix() const;
|
|
|
|
|
2017-08-14 18:16:11 +00:00
|
|
|
bool contains(const MergeTreeDataPart & other) const { return info.contains(other.info); }
|
|
|
|
|
2017-08-21 15:35:29 +00:00
|
|
|
/// If the partition key includes date column (a common case), these functions will return min and max values for this column.
|
|
|
|
DayNum_t getMinDate() const;
|
|
|
|
DayNum_t getMaxDate() const;
|
2017-05-16 15:40:32 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
MergeTreeData & storage;
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-08-14 18:16:11 +00:00
|
|
|
String name;
|
|
|
|
MergeTreePartInfo info;
|
|
|
|
|
2018-01-23 22:56:46 +00:00
|
|
|
/// A directory path (relative to storage's path) where part data is actually stored
|
2017-05-26 00:47:06 +00:00
|
|
|
/// Examples: 'detached/tmp_fetch_<name>', 'tmp_<name>', '<name>'
|
2017-05-16 15:40:32 +00:00
|
|
|
mutable String relative_path;
|
|
|
|
|
2017-10-24 14:11:53 +00:00
|
|
|
size_t rows_count = 0;
|
|
|
|
size_t marks_count = 0;
|
2018-03-03 15:36:20 +00:00
|
|
|
std::atomic<UInt64> size_in_bytes {0}; /// size in bytes, 0 - if not counted;
|
2017-05-16 15:40:32 +00:00
|
|
|
/// is used from several threads without locks (it is changed with ALTER).
|
2017-04-01 07:20:54 +00:00
|
|
|
time_t modification_time = 0;
|
2018-03-14 15:57:13 +00:00
|
|
|
/// When the part is removed from the working set. Changes once.
|
2018-03-14 16:04:19 +00:00
|
|
|
mutable std::atomic<time_t> remove_time { std::numeric_limits<time_t>::max() };
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/// If true, the destructor will delete the directory with the part.
|
2017-04-01 07:20:54 +00:00
|
|
|
bool is_temp = false;
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-10-06 11:30:57 +00:00
|
|
|
/// If true it means that there are no ZooKeeper node for this part, so it should be deleted only from filesystem
|
|
|
|
bool is_duplicate = false;
|
|
|
|
|
2017-09-05 19:03:51 +00:00
|
|
|
/**
|
|
|
|
* Part state is a stage of its lifetime. States are ordered and state of a part could be increased only.
|
|
|
|
* Part state should be modified under data_parts mutex.
|
|
|
|
*
|
|
|
|
* Possible state transitions:
|
|
|
|
* Temporary -> Precommitted: we are trying to commit a fetched, inserted or merged part to active set
|
|
|
|
* Precommitted -> Outdated: we could not to add a part to active set and doing a rollback (for example it is duplicated part)
|
|
|
|
* Precommitted -> Commited: we successfully committed a part to active dataset
|
|
|
|
* Precommitted -> Outdated: a part was replaced by a covering part or DROP PARTITION
|
|
|
|
* Outdated -> Deleting: a cleaner selected this part for deletion
|
2017-10-06 11:30:57 +00:00
|
|
|
* Deleting -> Outdated: if an ZooKeeper error occurred during the deletion, we will retry deletion
|
2017-09-05 19:03:51 +00:00
|
|
|
*/
|
|
|
|
enum class State
|
|
|
|
{
|
|
|
|
Temporary, /// the part is generating now, it is not in data_parts list
|
2017-09-11 22:40:51 +00:00
|
|
|
PreCommitted, /// the part is in data_parts, but not used for SELECTs
|
2017-09-05 19:03:51 +00:00
|
|
|
Committed, /// active data part, used by current and upcoming SELECTs
|
|
|
|
Outdated, /// not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes
|
|
|
|
Deleting /// not active data part with identity refcounter, it is deleting right now by a cleaner
|
|
|
|
};
|
|
|
|
|
2017-09-21 21:51:17 +00:00
|
|
|
/// Current state of the part. If the part is in working set already, it should be accessed via data_parts mutex
|
2017-09-11 22:40:51 +00:00
|
|
|
mutable State state{State::Temporary};
|
2017-09-05 19:03:51 +00:00
|
|
|
|
2017-09-11 22:40:51 +00:00
|
|
|
/// Returns name of state
|
|
|
|
static String stateToString(State state);
|
|
|
|
String stateString() const;
|
|
|
|
|
|
|
|
String getNameWithState() const
|
|
|
|
{
|
|
|
|
return name + " (state " + stateString() + ")";
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns true if state of part is one of affordable_states
|
2017-09-21 21:51:17 +00:00
|
|
|
bool checkState(const std::initializer_list<State> & affordable_states) const
|
2017-09-11 22:40:51 +00:00
|
|
|
{
|
|
|
|
for (auto affordable_state : affordable_states)
|
|
|
|
{
|
|
|
|
if (state == affordable_state)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Throws an exception if state of the part is not in affordable_states
|
2017-11-20 19:33:12 +00:00
|
|
|
void assertState(const std::initializer_list<State> & affordable_states) const;
|
2017-09-11 22:40:51 +00:00
|
|
|
|
2017-09-21 21:51:17 +00:00
|
|
|
/// In comparison with lambdas, it is move assignable and could has several overloaded operator()
|
|
|
|
struct StatesFilter
|
|
|
|
{
|
|
|
|
std::initializer_list<State> affordable_states;
|
|
|
|
StatesFilter(const std::initializer_list<State> & affordable_states) : affordable_states(affordable_states) {}
|
|
|
|
|
|
|
|
bool operator() (const std::shared_ptr<const MergeTreeDataPart> & part) const
|
|
|
|
{
|
|
|
|
return part->checkState(affordable_states);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2017-09-11 22:40:51 +00:00
|
|
|
/// Returns a lambda that returns true only for part with states from specified list
|
2017-09-21 21:51:17 +00:00
|
|
|
static inline StatesFilter getStatesFilter(const std::initializer_list<State> & affordable_states)
|
2017-09-11 22:40:51 +00:00
|
|
|
{
|
2017-09-21 21:51:17 +00:00
|
|
|
return StatesFilter(affordable_states);
|
2017-09-11 22:40:51 +00:00
|
|
|
}
|
2017-09-05 19:03:51 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Primary key (correspond to primary.idx file).
|
|
|
|
/// Always loaded in RAM. Contains each index_granularity-th value of primary key tuple.
|
|
|
|
/// Note that marks (also correspond to primary key) is not always in RAM, but cached. See MarkCache.h.
|
|
|
|
using Index = Columns;
|
|
|
|
Index index;
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-09-11 17:55:41 +00:00
|
|
|
MergeTreePartition partition;
|
2017-08-31 15:40:34 +00:00
|
|
|
|
|
|
|
/// Index that for each part stores min and max values of a set of columns. This allows quickly excluding
|
|
|
|
/// parts based on conditions on these columns imposed by a query.
|
|
|
|
/// Currently this index is built using only columns required by partition expression, but in principle it
|
|
|
|
/// can be built using any set of columns.
|
|
|
|
struct MinMaxIndex
|
|
|
|
{
|
|
|
|
Row min_values;
|
|
|
|
Row max_values;
|
|
|
|
bool initialized = false;
|
|
|
|
|
|
|
|
public:
|
|
|
|
MinMaxIndex() = default;
|
|
|
|
|
|
|
|
/// For month-based partitioning.
|
|
|
|
MinMaxIndex(DayNum_t min_date, DayNum_t max_date)
|
|
|
|
: min_values(1, static_cast<UInt64>(min_date))
|
|
|
|
, max_values(1, static_cast<UInt64>(max_date))
|
|
|
|
, initialized(true)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
void load(const MergeTreeData & storage, const String & part_path);
|
|
|
|
void store(const MergeTreeData & storage, const String & part_path, Checksums & checksums) const;
|
|
|
|
|
|
|
|
void update(const Block & block, const Names & column_names);
|
|
|
|
void merge(const MinMaxIndex & other);
|
|
|
|
};
|
|
|
|
|
2017-08-18 19:46:26 +00:00
|
|
|
MinMaxIndex minmax_idx;
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
Checksums checksums;
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-03-24 13:52:50 +00:00
|
|
|
/// Columns description.
|
2017-04-01 07:20:54 +00:00
|
|
|
NamesAndTypesList columns;
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2018-03-03 15:36:20 +00:00
|
|
|
using ColumnToSize = std::map<std::string, UInt64>;
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/** It is blocked for writing when changing columns, checksums or any part files.
|
|
|
|
* Locked to read when reading columns, checksums or any part files.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
2017-07-28 17:34:02 +00:00
|
|
|
mutable std::shared_mutex columns_lock;
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/** It is taken for the whole time ALTER a part: from the beginning of the recording of the temporary files to their renaming to permanent.
|
|
|
|
* It is taken with unlocked `columns_lock`.
|
2017-04-01 07:20:54 +00:00
|
|
|
*
|
2017-04-16 15:00:33 +00:00
|
|
|
* NOTE: "You can" do without this mutex if you could turn ReadRWLock into WriteRWLock without removing the lock.
|
|
|
|
* This transformation is impossible, because it would create a deadlock, if you do it from two threads at once.
|
|
|
|
* Taking this mutex means that we want to lock columns_lock on read with intention then, not
|
|
|
|
* unblocking, block it for writing.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
mutable std::mutex alter_mutex;
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
~MergeTreeDataPart();
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/// Calculate the total size of the entire directory with all the files
|
2018-03-03 15:36:20 +00:00
|
|
|
static UInt64 calculateTotalSize(const String & from);
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void remove() const;
|
2017-05-16 15:40:32 +00:00
|
|
|
|
|
|
|
/// Makes checks and move part to new directory
|
|
|
|
/// Changes only relative_dir_name, you need to update other metadata (name, is_temp) explicitly
|
|
|
|
void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists = true) const;
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-06-25 00:17:08 +00:00
|
|
|
/// Renames a part by appending a prefix to the name. To_detached - also moved to the detached directory.
|
2017-04-01 07:20:54 +00:00
|
|
|
void renameAddPrefix(bool to_detached, const String & prefix) const;
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-03-24 13:52:50 +00:00
|
|
|
/// Populates columns_to_size map (compressed size).
|
2017-04-01 07:20:54 +00:00
|
|
|
void accumulateColumnSizes(ColumnToSize & column_to_size) const;
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-08-16 19:22:49 +00:00
|
|
|
/// Initialize columns (from columns.txt if exists, or create from column files if not).
|
|
|
|
/// Load checksums from checksums.txt if exists. Load index if required.
|
2017-08-16 19:24:50 +00:00
|
|
|
void loadColumnsChecksumsIndexes(bool require_columns_checksums, bool check_consistency);
|
2016-02-14 04:58:47 +00:00
|
|
|
|
2017-03-24 13:52:50 +00:00
|
|
|
/// Checks that .bin and .mrk files exist
|
2017-04-01 07:20:54 +00:00
|
|
|
bool hasColumnFiles(const String & column) const;
|
2017-01-17 20:49:16 +00:00
|
|
|
|
2017-03-24 13:52:50 +00:00
|
|
|
/// For data in RAM ('index')
|
2018-03-03 15:36:20 +00:00
|
|
|
UInt64 getIndexSizeInBytes() const;
|
|
|
|
UInt64 getIndexSizeInAllocatedBytes() const;
|
2017-12-04 15:44:31 +00:00
|
|
|
/// Total size of *.mrk files
|
2018-03-03 15:36:20 +00:00
|
|
|
UInt64 getTotalMrkSizeInBytes() const;
|
2017-08-16 19:22:49 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
/// Reads columns names and types from columns.txt
|
|
|
|
void loadColumns(bool require);
|
|
|
|
|
|
|
|
/// If checksums.txt exists, reads files' checksums (and sizes) from it
|
|
|
|
void loadChecksums(bool require);
|
|
|
|
|
2017-10-24 14:11:53 +00:00
|
|
|
/// Loads index file. Also calculates this->marks_count if marks_count = 0
|
2017-08-16 19:22:49 +00:00
|
|
|
void loadIndex();
|
|
|
|
|
2017-10-24 14:11:53 +00:00
|
|
|
/// Load rows count for this part from disk (for the newer storage format version).
|
|
|
|
/// For the older format version calculates rows count from the size of a column with a fixed size.
|
|
|
|
void loadRowsCount();
|
|
|
|
|
2017-08-18 19:46:26 +00:00
|
|
|
void loadPartitionAndMinMaxIndex();
|
2017-08-16 19:24:50 +00:00
|
|
|
|
2017-08-16 19:22:49 +00:00
|
|
|
void checkConsistency(bool require_part_metadata);
|
2016-02-14 04:58:47 +00:00
|
|
|
};
|
|
|
|
|
2017-11-20 19:33:12 +00:00
|
|
|
|
|
|
|
using MergeTreeDataPartState = MergeTreeDataPart::State;
|
|
|
|
|
2016-02-14 04:58:47 +00:00
|
|
|
}
|