ClickHouse/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp

312 lines
9.7 KiB
C++
Raw Normal View History

2021-03-31 15:20:30 +00:00
#include <Storages/MergeTree/MergeTreeDeduplicationLog.h>
#include <filesystem>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/join.hpp>
#include <boost/algorithm/string/trim.hpp>
2021-04-02 16:45:18 +00:00
#include <IO/ReadBufferFromFile.h>
2021-04-02 12:37:42 +00:00
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
2021-03-31 15:20:30 +00:00
namespace DB
{
namespace
{
2021-04-02 17:46:01 +00:00
/// Deduplication operation part was dropped or added
2021-04-02 11:46:42 +00:00
enum class MergeTreeDeduplicationOp : uint8_t
{
ADD = 1,
DROP = 2,
};
2021-04-02 17:46:01 +00:00
/// Record for deduplication on disk
2021-04-02 11:46:42 +00:00
struct MergeTreeDeduplicationLogRecord
{
MergeTreeDeduplicationOp operation;
std::string part_name;
std::string block_id;
};
void writeRecord(const MergeTreeDeduplicationLogRecord & record, WriteBuffer & out)
{
writeIntText(static_cast<uint8_t>(record.operation), out);
2021-04-02 16:45:18 +00:00
writeChar('\t', out);
writeString(record.part_name, out);
writeChar('\t', out);
writeString(record.block_id, out);
2021-04-02 11:46:42 +00:00
writeChar('\n', out);
2021-04-02 16:45:18 +00:00
out.next();
2021-04-02 11:46:42 +00:00
}
void readRecord(MergeTreeDeduplicationLogRecord & record, ReadBuffer & in)
{
uint8_t op;
readIntText(op, in);
record.operation = static_cast<MergeTreeDeduplicationOp>(op);
2021-04-02 16:45:18 +00:00
assertChar('\t', in);
readString(record.part_name, in);
assertChar('\t', in);
readString(record.block_id, in);
2021-04-02 11:46:42 +00:00
assertChar('\n', in);
}
2021-03-31 15:20:30 +00:00
std::string getLogPath(const std::string & prefix, size_t number)
{
std::filesystem::path path(prefix);
path /= std::filesystem::path(std::string{"deduplication_log_"} + std::to_string(number) + ".txt");
return path;
}
size_t getLogNumber(const std::string & path_str)
{
std::filesystem::path path(path_str);
std::string filename = path.stem();
Strings filename_parts;
boost::split(filename_parts, filename, boost::is_any_of("_"));
return parse<size_t>(filename_parts[2]);
}
}
MergeTreeDeduplicationLog::MergeTreeDeduplicationLog(
const std::string & logs_dir_,
size_t deduplication_window_,
2021-04-02 11:46:42 +00:00
const MergeTreeDataFormatVersion & format_version_)
2021-03-31 15:20:30 +00:00
: logs_dir(logs_dir_)
, deduplication_window(deduplication_window_)
2021-04-02 11:46:42 +00:00
, rotate_interval(deduplication_window_ * 2) /// actually it doesn't matter
, format_version(format_version_)
, deduplication_map(deduplication_window)
2021-04-06 10:14:44 +00:00
{
namespace fs = std::filesystem;
if (deduplication_window != 0 && !fs::exists(logs_dir))
fs::create_directories(logs_dir);
}
2021-03-31 15:20:30 +00:00
void MergeTreeDeduplicationLog::load()
{
namespace fs = std::filesystem;
if (!fs::exists(logs_dir))
2021-04-06 10:14:44 +00:00
return;
2021-03-31 15:20:30 +00:00
for (const auto & p : fs::directory_iterator(logs_dir))
{
2021-04-03 14:57:03 +00:00
const auto & path = p.path();
2021-03-31 15:20:30 +00:00
auto log_number = getLogNumber(path);
2021-04-02 11:46:42 +00:00
existing_logs[log_number] = {path, 0};
2021-03-31 15:20:30 +00:00
}
2021-04-02 11:46:42 +00:00
2021-04-06 10:14:44 +00:00
/// We should know which logs are exist even in case
/// of deduplication_window = 0
if (!existing_logs.empty())
current_log_number = existing_logs.rbegin()->first;
if (deduplication_window != 0)
2021-04-02 11:46:42 +00:00
{
2021-04-06 10:14:44 +00:00
/// Order important, we load history from the begging to the end
for (auto & [log_number, desc] : existing_logs)
2021-04-02 16:56:02 +00:00
{
2021-04-06 10:14:44 +00:00
try
{
desc.entries_count = loadSingleLog(desc.path);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__, "Error while loading MergeTree deduplication log on path " + desc.path);
}
2021-04-02 16:56:02 +00:00
}
2021-04-02 11:46:42 +00:00
2021-04-06 10:14:44 +00:00
/// Start new log, drop previous
rotateAndDropIfNeeded();
2021-04-02 17:46:01 +00:00
2021-04-06 10:14:44 +00:00
/// Can happen in case we have unfinished log
if (!current_writer)
current_writer = std::make_unique<WriteBufferFromFile>(existing_logs.rbegin()->second.path, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
}
2021-03-31 15:20:30 +00:00
}
2021-04-02 11:46:42 +00:00
size_t MergeTreeDeduplicationLog::loadSingleLog(const std::string & path)
2021-03-31 15:20:30 +00:00
{
ReadBufferFromFile read_buf(path);
2021-04-02 11:46:42 +00:00
size_t total_entries = 0;
2021-03-31 15:20:30 +00:00
while (!read_buf.eof())
{
2021-04-02 11:46:42 +00:00
MergeTreeDeduplicationLogRecord record;
readRecord(record, read_buf);
if (record.operation == MergeTreeDeduplicationOp::DROP)
deduplication_map.erase(record.block_id);
else
deduplication_map.insert(record.block_id, MergeTreePartInfo::fromPartName(record.part_name, format_version));
total_entries++;
2021-03-31 15:20:30 +00:00
}
2021-04-02 11:46:42 +00:00
return total_entries;
2021-03-31 15:20:30 +00:00
}
void MergeTreeDeduplicationLog::rotate()
{
2021-04-06 10:14:44 +00:00
/// We don't deduplicate anything so we don't need any writers
if (deduplication_window == 0)
return;
2021-04-02 11:46:42 +00:00
current_log_number++;
auto new_path = getLogPath(logs_dir, current_log_number);
MergeTreeDeduplicationLogNameDescription log_description{new_path, 0};
existing_logs.emplace(current_log_number, log_description);
if (current_writer)
current_writer->sync();
current_writer = std::make_unique<WriteBufferFromFile>(log_description.path, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
}
void MergeTreeDeduplicationLog::dropOutdatedLogs()
{
size_t current_sum = 0;
2021-04-02 12:37:42 +00:00
size_t remove_from_value = 0;
2021-04-02 17:46:01 +00:00
/// Go from end to the beginning
2021-04-02 12:37:42 +00:00
for (auto itr = existing_logs.rbegin(); itr != existing_logs.rend(); ++itr)
2021-04-02 11:46:42 +00:00
{
if (current_sum > deduplication_window)
{
2021-04-02 17:46:01 +00:00
/// We have more logs than required, all older files (including current) can be dropped
2021-04-02 12:37:42 +00:00
remove_from_value = itr->first;
break;
2021-04-02 11:46:42 +00:00
}
2021-04-02 17:46:01 +00:00
auto & description = itr->second;
2021-04-02 12:37:42 +00:00
current_sum += description.entries_count;
}
2021-04-02 17:46:01 +00:00
/// If we found some logs to drop
2021-04-02 12:37:42 +00:00
if (remove_from_value != 0)
{
2021-04-06 10:14:44 +00:00
/// Go from the beginning to the end and drop all outdated logs
2021-04-02 12:37:42 +00:00
for (auto itr = existing_logs.begin(); itr != existing_logs.end();)
2021-04-02 11:46:42 +00:00
{
2021-04-02 13:34:36 +00:00
size_t number = itr->first;
std::filesystem::remove(itr->second.path);
itr = existing_logs.erase(itr);
if (remove_from_value == number)
break;
2021-04-02 11:46:42 +00:00
}
}
2021-04-02 12:37:42 +00:00
2021-04-02 11:46:42 +00:00
}
void MergeTreeDeduplicationLog::rotateAndDropIfNeeded()
{
2021-04-02 17:46:01 +00:00
/// If we don't have logs at all or already have enough records in current
2021-04-02 16:45:18 +00:00
if (existing_logs.empty() || existing_logs[current_log_number].entries_count >= rotate_interval)
2021-04-02 11:46:42 +00:00
{
rotate();
dropOutdatedLogs();
}
}
2021-04-02 16:45:18 +00:00
std::pair<MergeTreePartInfo, bool> MergeTreeDeduplicationLog::addPart(const std::string & block_id, const MergeTreePartInfo & part_info)
2021-04-02 11:46:42 +00:00
{
std::lock_guard lock(state_mutex);
2021-03-31 15:20:30 +00:00
2021-04-06 10:14:44 +00:00
/// We support zero case because user may want to disable deduplication with
2021-04-06 10:16:34 +00:00
/// ALTER MODIFY SETTING query. It's much more simpler to handle zero case
2021-04-06 10:14:44 +00:00
/// here then destroy whole object, check for null pointer from different
/// threads and so on.
if (deduplication_window == 0)
return std::make_pair(part_info, true);
2021-04-02 17:51:57 +00:00
/// If we already have this block let's deduplicate it
2021-04-02 11:46:42 +00:00
if (deduplication_map.contains(block_id))
2021-04-02 16:45:18 +00:00
{
auto info = deduplication_map.get(block_id);
return std::make_pair(info, false);
}
2021-03-31 15:20:30 +00:00
2021-04-02 11:46:42 +00:00
assert(current_writer != nullptr);
2021-04-02 16:45:18 +00:00
2021-04-02 17:46:01 +00:00
/// Create new record
2021-04-02 11:46:42 +00:00
MergeTreeDeduplicationLogRecord record;
record.operation = MergeTreeDeduplicationOp::ADD;
2021-04-02 16:45:18 +00:00
record.part_name = part_info.getPartName();
2021-04-02 11:46:42 +00:00
record.block_id = block_id;
2021-04-02 17:46:01 +00:00
/// Write it to disk
2021-04-02 11:46:42 +00:00
writeRecord(record, *current_writer);
2021-04-02 17:46:01 +00:00
/// We have one more record in current log
2021-04-02 11:46:42 +00:00
existing_logs[current_log_number].entries_count++;
2021-04-02 17:46:01 +00:00
/// Add to deduplication map
2021-04-02 16:45:18 +00:00
deduplication_map.insert(record.block_id, part_info);
2021-04-02 17:46:01 +00:00
/// Rotate and drop old logs if needed
2021-04-02 16:45:18 +00:00
rotateAndDropIfNeeded();
2021-04-02 11:46:42 +00:00
2021-04-02 16:45:18 +00:00
return std::make_pair(part_info, true);
2021-04-02 11:46:42 +00:00
}
2021-04-02 16:45:18 +00:00
void MergeTreeDeduplicationLog::dropPart(const MergeTreePartInfo & drop_part_info)
2021-04-02 11:46:42 +00:00
{
std::lock_guard lock(state_mutex);
2021-04-06 10:14:44 +00:00
/// We support zero case because user may want to disable deduplication with
2021-04-06 10:16:34 +00:00
/// ALTER MODIFY SETTING query. It's much more simpler to handle zero case
2021-04-06 10:14:44 +00:00
/// here then destroy whole object, check for null pointer from different
/// threads and so on.
if (deduplication_window == 0)
return;
2021-04-02 11:46:42 +00:00
assert(current_writer != nullptr);
2021-04-02 17:46:01 +00:00
for (auto itr = deduplication_map.begin(); itr != deduplication_map.end(); /* no increment here, we erasing from map */)
2021-04-02 11:46:42 +00:00
{
2021-04-02 13:34:36 +00:00
const auto & part_info = itr->value;
2021-04-02 17:46:01 +00:00
/// Part is covered by dropped part, let's remove it from
/// deduplication history
2021-04-02 16:45:18 +00:00
if (drop_part_info.contains(part_info))
2021-04-02 11:46:42 +00:00
{
2021-04-02 17:46:01 +00:00
/// Create drop record
2021-04-02 13:34:36 +00:00
MergeTreeDeduplicationLogRecord record;
2021-04-02 11:46:42 +00:00
record.operation = MergeTreeDeduplicationOp::DROP;
record.part_name = part_info.getPartName();
2021-04-02 13:34:36 +00:00
record.block_id = itr->key;
2021-04-02 17:46:01 +00:00
/// Write it to disk
2021-04-02 11:46:42 +00:00
writeRecord(record, *current_writer);
2021-04-02 17:46:01 +00:00
/// We have one more record on disk
2021-04-02 11:46:42 +00:00
existing_logs[current_log_number].entries_count++;
2021-04-02 17:46:01 +00:00
/// Increment itr before erase, otherwise it will invalidated
2021-04-02 16:45:18 +00:00
++itr;
2021-04-02 17:46:01 +00:00
/// Remove block_id from in-memory table
2021-04-02 16:45:18 +00:00
deduplication_map.erase(record.block_id);
2021-04-02 17:46:01 +00:00
/// Rotate and drop old logs if needed
2021-04-02 11:46:42 +00:00
rotateAndDropIfNeeded();
}
2021-04-02 16:45:18 +00:00
else
2021-04-02 11:46:42 +00:00
{
2021-04-02 16:45:18 +00:00
++itr;
2021-04-02 11:46:42 +00:00
}
}
2021-03-31 15:20:30 +00:00
}
2021-04-06 10:14:44 +00:00
void MergeTreeDeduplicationLog::setDeduplicationWindowSize(size_t deduplication_window_)
{
std::lock_guard lock(state_mutex);
deduplication_window = deduplication_window_;
rotate_interval = deduplication_window * 2;
/// If settings was set for the first time with ALTER MODIFY SETTING query
if (deduplication_window != 0 && !std::filesystem::exists(logs_dir))
std::filesystem::create_directories(logs_dir);
deduplication_map.setMaxSize(deduplication_window);
rotateAndDropIfNeeded();
/// Can happen in case we have unfinished log
if (!current_writer)
current_writer = std::make_unique<WriteBufferFromFile>(existing_logs.rbegin()->second.path, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
}
2021-03-31 15:20:30 +00:00
}