ClickHouse/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.h

158 lines
5.9 KiB
C++
Raw Normal View History

2014-03-13 12:48:07 +00:00
#pragma once
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MutationCommands.h>
#include <atomic>
2016-03-01 17:47:53 +00:00
#include <functional>
#include <Common/ActionBlocker.h>
2014-03-13 12:48:07 +00:00
namespace DB
{
2015-04-16 06:12:35 +00:00
class MergeListEntry;
class MergeProgressCallback;
2015-04-16 06:12:35 +00:00
/// Auxiliary struct holding metainformation for the future merged or mutated part.
struct FutureMergedMutatedPart
{
String name;
MergeTreePartInfo part_info;
MergeTreeData::DataPartsVector parts;
const MergeTreePartition & getPartition() const { return parts.front()->partition; }
FutureMergedMutatedPart() = default;
explicit FutureMergedMutatedPart(MergeTreeData::DataPartsVector parts_)
{
assign(std::move(parts_));
}
void assign(MergeTreeData::DataPartsVector parts_);
};
2015-04-16 06:12:35 +00:00
2019-06-19 17:56:41 +00:00
2019-08-16 15:57:19 +00:00
/** Can select parts for background processes and do them.
* Currently helps with merges, mutations and moves
*/
class MergeTreeDataMergerMutator
2014-03-13 12:48:07 +00:00
{
2016-03-01 17:47:53 +00:00
public:
using AllowedMergingPredicate = std::function<bool (const MergeTreeData::DataPartPtr &, const MergeTreeData::DataPartPtr &, String * reason)>;
2016-03-01 17:47:53 +00:00
2014-03-13 12:48:07 +00:00
public:
2019-09-04 16:00:20 +00:00
MergeTreeDataMergerMutator(MergeTreeData & data_, size_t background_pool_size);
/** Get maximum total size of parts to do merge, at current moment of time.
* It depends on number of free threads in background_pool and amount of free space in disk.
*/
UInt64 getMaxSourcePartsSizeForMerge();
/** For explicitly passed size of pool and number of used tasks.
* This method could be used to calculate threshold depending on number of tasks in replication queue.
*/
UInt64 getMaxSourcePartsSizeForMerge(size_t pool_size, size_t pool_used);
/** Get maximum total size of parts to do mutation, at current moment of time.
* It depends only on amount of free space in disk.
*/
UInt64 getMaxSourcePartSizeForMutation();
2017-04-16 15:00:33 +00:00
/** Selects which parts to merge. Uses a lot of heuristics.
*
2017-04-16 15:00:33 +00:00
* can_merge - a function that determines if it is possible to merge a pair of adjacent parts.
* This function must coordinate merge with inserts and other merges, ensuring that
* - Parts between which another part can still appear can not be merged. Refer to METR-7001.
* - A part that already merges with something in one place, you can not start to merge into something else in another place.
*/
bool selectPartsToMerge(
FutureMergedMutatedPart & future_part,
bool aggressive,
size_t max_total_size_to_merge,
const AllowedMergingPredicate & can_merge,
String * out_disable_reason = nullptr);
2019-06-19 17:56:41 +00:00
2017-04-16 15:00:33 +00:00
/** Select all the parts in the specified partition for merge, if possible.
* final - choose to merge even a single part - that is, allow to merge one part "with itself".
*/
bool selectAllPartsToMergeWithinPartition(
FutureMergedMutatedPart & future_part,
2018-12-17 14:10:23 +00:00
UInt64 & available_disk_space,
const AllowedMergingPredicate & can_merge,
const String & partition_id,
bool final,
String * out_disable_reason = nullptr);
2017-04-16 15:00:33 +00:00
/** Merge the parts.
* If `reservation != nullptr`, now and then reduces the size of the reserved space
* is approximately proportional to the amount of data already written.
*
2017-04-16 15:00:33 +00:00
* Creates and returns a temporary part.
* To end the merge, call the function renameMergedTemporaryPart.
*
2017-04-16 15:00:33 +00:00
* time_of_merge - the time when the merge was assigned.
* Important when using ReplicatedGraphiteMergeTree to provide the same merge on replicas.
*/
MergeTreeData::MutableDataPartPtr mergePartsToTemporaryPart(
const FutureMergedMutatedPart & future_part,
2019-08-19 19:02:20 +00:00
MergeListEntry & merge_entry, TableStructureReadLockHolder & table_lock_holder, time_t time_of_merge,
2019-11-27 09:39:44 +00:00
const ReservationPtr & disk_reservation, bool deduplication, bool force_ttl);
/// Mutate a single data part with the specified commands. Will create and return a temporary part.
MergeTreeData::MutableDataPartPtr mutatePartToTemporaryPart(
const FutureMergedMutatedPart & future_part,
const std::vector<MutationCommand> & commands,
2019-08-21 10:09:29 +00:00
MergeListEntry & merge_entry, const Context & context,
2019-11-27 09:39:44 +00:00
const ReservationPtr & disk_reservation,
2019-08-21 10:09:29 +00:00
TableStructureReadLockHolder & table_lock_holder);
MergeTreeData::DataPartPtr renameMergedTemporaryPart(
MergeTreeData::MutableDataPartPtr & new_data_part,
const MergeTreeData::DataPartsVector & parts,
MergeTreeData::Transaction * out_transaction = nullptr);
2019-06-19 17:56:41 +00:00
/// The approximate amount of disk space needed for merge or mutation. With a surplus.
static size_t estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts);
2014-03-13 12:48:07 +00:00
2016-01-28 16:06:57 +00:00
private:
2017-04-16 15:00:33 +00:00
/** Select all parts belonging to the same partition.
*/
MergeTreeData::DataPartsVector selectAllPartsFromPartition(const String & partition_id);
public:
/** Is used to cancel all merges and mutations. On cancel() call all currently running actions will throw exception soon.
* All new attempts to start a merge or mutation will throw an exception until all 'LockHolder' objects will be destroyed.
*/
2019-08-01 15:36:12 +00:00
ActionBlocker merges_blocker;
ActionBlocker ttl_merges_blocker;
enum class MergeAlgorithm
{
Horizontal, /// per-row merge of all columns
Data Skipping Indices (#4143) * made index parser * added index parsing * some fixes * added index interface and factory * fixed compilation * ptrs * added indexParts * indextypes * index condition * IndexCondition * added indexes in selectexecutor * fix * changed comment * fix * added granularity * comments * fix * fix * added writing indexes * removed indexpart class * fix * added setSkipIndexes * add rw for MergeTreeIndexes * fixes * upd error * fix * fix * reading * test index * fixed nullptr error * fixed * fix * unique names * asts -> exprlist * minmax index * fix * fixed select * fixed merging * fixed mutation * working minmax * removed test index * fixed style * added indexes to checkDataPart * added tests for minmax index * fixed constructor * fix style * fixed includes * fixed setSkipIndexes * added indexes meta to zookeeper * added parsing * removed throw * alter cmds parse * fix * added alter * fix * alters fix * fix alters * fix "after" * fixed alter * alter fix + test * fixes * upd setSkipIndexes * fixed alter bug with drop all indices * fix metadata editing * new test and repl fix * rm test files * fixed repl alter * fix * fix * indices * MTReadStream * upd test for bug * fix * added useful parsers and ast classes * fix * fix comments * replaced columns * fix * fixed parsing * fixed printing * fix err * basic IndicesDescription * go to IndicesDescr * moved indices * go to indicesDescr * fix test minmax_index* * fixed MT alter * fixed bug with replMT indices storing in zk * rename * refactoring * docs ru * docs ru * docs en * refactor * rename tests * fix docs * refactoring * fix * fix * fix * fixed style * unique idx * unique * fix * better minmax calculation * upd * added getBlock * unique_condition * added termForAST * unique * fixed not * uniqueCondition::mayBeTrueOnGranule * fix * fixed bug with double column * is always true * fix * key set * spaces * test * tests * fix * unique * fix * fix * fixed bug with duplicate column * removed unused data * fix * fixes * __bitSwapLastTwo * fix
2019-02-05 14:50:25 +00:00
Vertical /// per-row merge of PK and secondary indices columns, per-column gather for non-PK columns
};
private:
MergeAlgorithm chooseMergeAlgorithm(
const MergeTreeData::DataPartsVector & parts,
size_t rows_upper_bound, const NamesAndTypesList & gathering_columns, bool deduplicate, bool need_remove_expired_values) const;
2014-03-13 12:48:07 +00:00
private:
MergeTreeData & data;
2019-09-04 16:00:20 +00:00
const size_t background_pool_size;
2014-03-13 12:48:07 +00:00
Logger * log;
2014-03-13 12:48:07 +00:00
2017-04-16 15:00:33 +00:00
/// When the last time you wrote to the log that the disk space was running out (not to write about this too often).
time_t disk_space_warning_time = 0;
/// Last time when TTLMergeSelector has been used
time_t last_merge_with_ttl = 0;
2014-03-13 12:48:07 +00:00
};
2014-03-13 12:48:07 +00:00
}