2014-09-10 11:34:26 +00:00
|
|
|
#pragma once
|
|
|
|
|
2020-10-26 12:40:55 +00:00
|
|
|
#include <Core/Names.h>
|
|
|
|
#include <Core/Field.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/Stopwatch.h>
|
|
|
|
#include <Common/CurrentMetrics.h>
|
|
|
|
#include <Common/MemoryTracker.h>
|
2020-09-03 13:00:13 +00:00
|
|
|
#include <Storages/MergeTree/MergeType.h>
|
2020-10-26 12:40:55 +00:00
|
|
|
#include <Storages/MergeTree/MergeAlgorithm.h>
|
2021-06-24 14:07:43 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreePartInfo.h>
|
2020-10-26 12:40:55 +00:00
|
|
|
#include <Storages/MergeTree/BackgroundProcessList.h>
|
2021-06-06 12:24:49 +00:00
|
|
|
#include <Interpreters/StorageID.h>
|
2020-10-26 12:40:55 +00:00
|
|
|
#include <boost/noncopyable.hpp>
|
2015-02-10 21:10:58 +00:00
|
|
|
#include <memory>
|
2014-09-10 11:34:26 +00:00
|
|
|
#include <list>
|
|
|
|
#include <mutex>
|
|
|
|
#include <atomic>
|
|
|
|
|
2016-10-24 04:06:27 +00:00
|
|
|
|
|
|
|
namespace CurrentMetrics
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const Metric Merge;
|
2016-10-24 04:06:27 +00:00
|
|
|
}
|
|
|
|
|
2014-09-10 11:34:26 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2015-04-16 06:12:35 +00:00
|
|
|
struct MergeInfo
|
2016-12-23 20:23:46 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
std::string database;
|
|
|
|
std::string table;
|
|
|
|
std::string result_part_name;
|
2019-11-20 08:51:52 +00:00
|
|
|
std::string result_part_path;
|
2017-04-01 07:20:54 +00:00
|
|
|
Array source_part_names;
|
2019-11-20 08:51:52 +00:00
|
|
|
Array source_part_paths;
|
2018-09-11 11:16:40 +00:00
|
|
|
std::string partition_id;
|
2019-01-13 22:02:33 +00:00
|
|
|
bool is_mutation;
|
2017-04-01 07:20:54 +00:00
|
|
|
Float64 elapsed;
|
|
|
|
Float64 progress;
|
|
|
|
UInt64 num_parts;
|
|
|
|
UInt64 total_size_bytes_compressed;
|
|
|
|
UInt64 total_size_marks;
|
2019-03-26 12:37:42 +00:00
|
|
|
UInt64 total_rows_count;
|
2017-04-01 07:20:54 +00:00
|
|
|
UInt64 bytes_read_uncompressed;
|
|
|
|
UInt64 bytes_written_uncompressed;
|
|
|
|
UInt64 rows_read;
|
|
|
|
UInt64 rows_written;
|
|
|
|
UInt64 columns_written;
|
|
|
|
UInt64 memory_usage;
|
2020-02-02 20:01:13 +00:00
|
|
|
UInt64 thread_id;
|
2020-09-03 13:00:13 +00:00
|
|
|
std::string merge_type;
|
2020-09-10 14:56:15 +00:00
|
|
|
std::string merge_algorithm;
|
2016-12-23 20:23:46 +00:00
|
|
|
};
|
|
|
|
|
2019-01-13 22:02:33 +00:00
|
|
|
struct FutureMergedMutatedPart;
|
2021-09-16 21:19:58 +00:00
|
|
|
using FutureMergedMutatedPartPtr = std::shared_ptr<FutureMergedMutatedPart>;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Since merge is executed with multiple threads, this class
|
|
|
|
* switches the parent MemoryTracker to account all the memory used.
|
|
|
|
*/
|
|
|
|
class MemoryTrackerThreadSwitcher : boost::noncopyable
|
|
|
|
{
|
|
|
|
public:
|
2021-10-13 20:47:28 +00:00
|
|
|
explicit MemoryTrackerThreadSwitcher(MemoryTracker * memory_tracker_ptr, UInt64 untracked_memory_limit, const std::string & query_id);
|
2021-09-16 21:19:58 +00:00
|
|
|
~MemoryTrackerThreadSwitcher();
|
|
|
|
private:
|
|
|
|
MemoryTracker * background_thread_memory_tracker;
|
|
|
|
MemoryTracker * background_thread_memory_tracker_prev_parent = nullptr;
|
2021-10-03 08:21:54 +00:00
|
|
|
UInt64 prev_untracked_memory_limit;
|
2021-10-13 20:47:28 +00:00
|
|
|
UInt64 prev_untracked_memory;
|
2021-10-13 20:47:28 +00:00
|
|
|
String prev_query_id;
|
2021-09-16 21:19:58 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
using MemoryTrackerThreadSwitcherPtr = std::unique_ptr<MemoryTrackerThreadSwitcher>;
|
2016-12-23 20:23:46 +00:00
|
|
|
|
|
|
|
struct MergeListElement : boost::noncopyable
|
2014-09-10 11:34:26 +00:00
|
|
|
{
|
2021-06-06 12:24:49 +00:00
|
|
|
const StorageID table_id;
|
2018-09-11 11:16:40 +00:00
|
|
|
std::string partition_id;
|
2019-01-14 12:25:25 +00:00
|
|
|
|
|
|
|
const std::string result_part_name;
|
2019-11-20 08:51:52 +00:00
|
|
|
const std::string result_part_path;
|
2021-06-24 14:07:43 +00:00
|
|
|
MergeTreePartInfo result_part_info;
|
2019-01-14 12:25:25 +00:00
|
|
|
bool is_mutation{};
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
UInt64 num_parts{};
|
|
|
|
Names source_part_names;
|
2019-11-20 08:51:52 +00:00
|
|
|
Names source_part_paths;
|
2019-01-14 12:25:25 +00:00
|
|
|
Int64 source_data_version{};
|
|
|
|
|
|
|
|
Stopwatch watch;
|
|
|
|
std::atomic<Float64> progress{};
|
|
|
|
std::atomic<bool> is_cancelled{};
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
UInt64 total_size_bytes_compressed{};
|
|
|
|
UInt64 total_size_marks{};
|
2019-03-26 12:37:42 +00:00
|
|
|
UInt64 total_rows_count{};
|
2017-04-01 07:20:54 +00:00
|
|
|
std::atomic<UInt64> bytes_read_uncompressed{};
|
|
|
|
std::atomic<UInt64> bytes_written_uncompressed{};
|
2016-11-03 12:00:44 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// In case of Vertical algorithm they are actual only for primary key columns
|
|
|
|
std::atomic<UInt64> rows_read{};
|
|
|
|
std::atomic<UInt64> rows_written{};
|
2016-11-22 19:34:36 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Updated only for Vertical algorithm
|
|
|
|
std::atomic<UInt64> columns_written{};
|
2016-11-03 12:00:44 +00:00
|
|
|
|
2018-06-09 15:29:08 +00:00
|
|
|
MemoryTracker memory_tracker{VariableContext::Process};
|
2021-10-03 08:21:54 +00:00
|
|
|
UInt64 max_untracked_memory;
|
2021-10-13 20:47:28 +00:00
|
|
|
std::string query_id;
|
2016-07-31 03:53:16 +00:00
|
|
|
|
2020-02-02 20:01:13 +00:00
|
|
|
UInt64 thread_id;
|
2020-09-03 13:00:13 +00:00
|
|
|
MergeType merge_type;
|
2020-10-27 15:27:12 +00:00
|
|
|
/// Detected after merge already started
|
|
|
|
std::atomic<MergeAlgorithm> merge_algorithm;
|
2016-12-23 20:23:46 +00:00
|
|
|
|
2021-10-03 08:21:54 +00:00
|
|
|
MergeListElement(
|
|
|
|
const StorageID & table_id_,
|
|
|
|
FutureMergedMutatedPartPtr future_part,
|
|
|
|
UInt64 memory_profiler_step,
|
|
|
|
UInt64 memory_profiler_sample_probability,
|
|
|
|
UInt64 max_untracked_memory_);
|
2016-12-23 20:23:46 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
MergeInfo getInfo() const;
|
2016-12-23 20:23:46 +00:00
|
|
|
|
2021-09-24 13:57:44 +00:00
|
|
|
MergeListElement * ptr() { return this; }
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
~MergeListElement();
|
2015-04-16 06:12:35 +00:00
|
|
|
};
|
2014-09-10 11:34:26 +00:00
|
|
|
|
2020-10-26 12:40:55 +00:00
|
|
|
using MergeListEntry = BackgroundProcessListEntry<MergeListElement, MergeInfo>;
|
2014-09-10 11:34:26 +00:00
|
|
|
|
2020-10-26 12:40:55 +00:00
|
|
|
/** Maintains a list of currently running merges.
|
|
|
|
* For implementation of system.merges table.
|
|
|
|
*/
|
|
|
|
class MergeList final : public BackgroundProcessList<MergeListElement, MergeInfo>
|
2015-04-16 06:12:35 +00:00
|
|
|
{
|
2020-10-26 16:38:35 +00:00
|
|
|
private:
|
2020-10-26 12:40:55 +00:00
|
|
|
using Parent = BackgroundProcessList<MergeListElement, MergeInfo>;
|
2020-09-04 06:55:19 +00:00
|
|
|
std::atomic<size_t> merges_with_ttl_counter = 0;
|
2015-04-16 06:12:35 +00:00
|
|
|
public:
|
2020-10-26 12:40:55 +00:00
|
|
|
MergeList()
|
|
|
|
: Parent(CurrentMetrics::Merge)
|
|
|
|
{}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-10-26 12:40:55 +00:00
|
|
|
void onEntryDestroy(const Parent::Entry & entry) override
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2020-10-26 12:40:55 +00:00
|
|
|
if (isTTLMergeType(entry->merge_type))
|
|
|
|
--merges_with_ttl_counter;
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2019-01-14 12:25:25 +00:00
|
|
|
|
2021-06-06 12:24:49 +00:00
|
|
|
void cancelPartMutations(const StorageID & table_id, const String & partition_id, Int64 mutation_version)
|
2019-01-14 12:25:25 +00:00
|
|
|
{
|
|
|
|
std::lock_guard lock{mutex};
|
2020-10-26 12:40:55 +00:00
|
|
|
for (auto & merge_element : entries)
|
2019-01-14 12:25:25 +00:00
|
|
|
{
|
2019-02-04 12:53:25 +00:00
|
|
|
if ((partition_id.empty() || merge_element.partition_id == partition_id)
|
2021-06-06 12:24:49 +00:00
|
|
|
&& merge_element.table_id == table_id
|
2019-02-04 12:53:25 +00:00
|
|
|
&& merge_element.source_data_version < mutation_version
|
2021-06-24 14:07:43 +00:00
|
|
|
&& merge_element.result_part_info.getDataVersion() >= mutation_version)
|
|
|
|
merge_element.is_cancelled = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void cancelInPartition(const StorageID & table_id, const String & partition_id, Int64 delimiting_block_number)
|
|
|
|
{
|
2021-06-30 12:55:26 +00:00
|
|
|
std::lock_guard lock{mutex};
|
2021-06-24 14:07:43 +00:00
|
|
|
for (auto & merge_element : entries)
|
|
|
|
{
|
|
|
|
if (merge_element.table_id == table_id
|
|
|
|
&& merge_element.partition_id == partition_id
|
|
|
|
&& merge_element.result_part_info.min_block < delimiting_block_number)
|
2019-01-14 12:25:25 +00:00
|
|
|
merge_element.is_cancelled = true;
|
|
|
|
}
|
|
|
|
}
|
2020-09-04 06:55:19 +00:00
|
|
|
|
2021-01-27 11:56:12 +00:00
|
|
|
/// Merge consists of two parts: assignment and execution. We add merge to
|
|
|
|
/// merge list on execution, but checking merge list during merge
|
|
|
|
/// assignment. This lead to the logical race condition (we can assign more
|
|
|
|
/// merges with TTL than allowed). So we "book" merge with ttl during
|
|
|
|
/// assignment, and remove from list after merge execution.
|
|
|
|
///
|
|
|
|
/// NOTE: Not important for replicated merge tree, we check count of merges twice:
|
|
|
|
/// in assignment and in queue before execution.
|
|
|
|
void bookMergeWithTTL()
|
|
|
|
{
|
|
|
|
++merges_with_ttl_counter;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t getMergesWithTTLCount() const
|
2020-09-04 06:55:19 +00:00
|
|
|
{
|
|
|
|
return merges_with_ttl_counter;
|
|
|
|
}
|
2014-09-10 11:34:26 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|