ClickHouse/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp

337 lines
10 KiB
C++
Raw Normal View History

2020-04-14 19:47:19 +00:00
#include <Storages/MergeTree/MergeTreeWriteAheadLog.h>
#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
#include <Storages/MergeTree/MergeTreeData.h>
2021-08-26 11:01:15 +00:00
#include <Storages/MergeTree/MergeTreeDataWriter.h>
2020-04-14 19:47:19 +00:00
#include <Storages/MergeTree/MergedBlockOutputStream.h>
#include <Storages/MergeTree/MergeTreeDataPartState.h>
#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
2020-10-29 16:18:25 +00:00
#include <IO/MemoryReadWriteBuffer.h>
#include <IO/ReadHelpers.h>
2020-10-29 16:18:25 +00:00
#include <IO/copyData.h>
2022-07-20 20:30:16 +00:00
#include <Interpreters/Context.h>
#include <Common/logger_useful.h>
2020-11-02 22:36:32 +00:00
#include <Poco/JSON/JSON.h>
#include <Poco/JSON/Object.h>
#include <Poco/JSON/Stringifier.h>
#include <Poco/JSON/Parser.h>
2020-09-01 22:25:10 +00:00
#include <sys/time.h>
2020-04-14 19:47:19 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int UNKNOWN_FORMAT_VERSION;
2020-05-14 20:08:15 +00:00
extern const int CANNOT_READ_ALL_DATA;
2020-05-20 12:02:02 +00:00
extern const int BAD_DATA_PART_NAME;
2020-05-29 15:02:12 +00:00
extern const int CORRUPTED_DATA;
2020-04-14 19:47:19 +00:00
}
MergeTreeWriteAheadLog::MergeTreeWriteAheadLog(
2020-09-01 22:25:10 +00:00
MergeTreeData & storage_,
2020-04-14 19:47:19 +00:00
const DiskPtr & disk_,
2020-05-14 20:08:15 +00:00
const String & name_)
2020-04-14 19:47:19 +00:00
: storage(storage_)
, disk(disk_)
2020-05-14 20:08:15 +00:00
, name(name_)
, path(storage.getRelativeDataPath() + name_)
, pool(storage.getContext()->getSchedulePool())
2021-08-26 11:01:15 +00:00
, log(&Poco::Logger::get(storage.getLogName() + " (WriteAheadLog)"))
{
init();
2020-09-01 22:25:10 +00:00
sync_task = pool.createTask("MergeTreeWriteAheadLog::sync", [this]
{
std::lock_guard lock(write_mutex);
out->sync();
sync_scheduled = false;
2020-09-10 23:24:16 +00:00
sync_cv.notify_all();
2020-09-01 22:25:10 +00:00
});
}
2020-04-14 19:47:19 +00:00
2020-09-11 13:09:26 +00:00
MergeTreeWriteAheadLog::~MergeTreeWriteAheadLog()
{
2022-06-04 23:05:15 +00:00
try
{
shutdown();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
2020-04-14 19:47:19 +00:00
2022-09-20 16:28:15 +00:00
void MergeTreeWriteAheadLog::dropAllWriteAheadLogs(DiskPtr disk_to_drop, std::string relative_data_path)
{
std::vector<std::string> files;
disk_to_drop->listFiles(relative_data_path, files);
for (const auto & file : files)
{
if (file.starts_with(WAL_FILE_NAME))
disk_to_drop->removeFile(fs::path(relative_data_path) / file);
}
}
void MergeTreeWriteAheadLog::init()
{
out = disk->writeFile(path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append);
2022-05-09 19:13:02 +00:00
/// Small hack: in NativeWriter header is used only in `getHeader` method.
/// To avoid complex logic of changing it during ALTERs we leave it empty.
2021-10-08 17:21:19 +00:00
block_out = std::make_unique<NativeWriter>(*out, 0, Block{});
min_block_number = std::numeric_limits<Int64>::max();
2020-05-14 20:08:15 +00:00
max_block_number = -1;
2020-09-01 22:25:10 +00:00
bytes_at_last_sync = 0;
}
2020-04-14 19:47:19 +00:00
2020-05-29 15:02:12 +00:00
void MergeTreeWriteAheadLog::dropPart(const String & part_name)
{
2020-09-10 23:24:16 +00:00
std::unique_lock lock(write_mutex);
2020-05-29 15:02:12 +00:00
writeIntBinary(WAL_VERSION, *out);
2020-10-29 16:18:25 +00:00
ActionMetadata metadata{};
metadata.write(*out);
2020-05-29 15:02:12 +00:00
writeIntBinary(static_cast<UInt8>(ActionType::DROP_PART), *out);
writeStringBinary(part_name, *out);
2020-09-09 13:32:50 +00:00
out->next();
2020-05-29 15:02:12 +00:00
}
2020-09-10 23:24:16 +00:00
void MergeTreeWriteAheadLog::rotate(const std::unique_lock<std::mutex> &)
2020-04-14 19:47:19 +00:00
{
String new_name = String(WAL_FILE_NAME) + "_"
+ toString(min_block_number) + "_"
+ toString(max_block_number) + WAL_FILE_EXTENSION;
2022-02-03 09:14:00 +00:00
/// Finalize stream before file rename
out->finalize();
2020-05-14 20:08:15 +00:00
disk->replaceFile(path, storage.getRelativeDataPath() + new_name);
init();
2020-04-14 19:47:19 +00:00
}
MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(
const StorageMetadataPtr & metadata_snapshot,
ContextPtr context,
std::unique_lock<std::mutex> & parts_lock,
bool readonly)
2020-04-14 19:47:19 +00:00
{
2020-09-10 23:24:16 +00:00
std::unique_lock lock(write_mutex);
2020-04-14 19:47:19 +00:00
2020-05-29 15:02:12 +00:00
MergeTreeData::MutableDataPartsVector parts;
2022-07-13 20:35:24 +00:00
auto in = disk->readFile(path);
2021-10-08 17:21:19 +00:00
NativeReader block_in(*in, 0);
2020-05-29 15:02:12 +00:00
NameSet dropped_parts;
2020-04-14 19:47:19 +00:00
while (!in->eof())
{
2020-05-14 20:08:15 +00:00
MergeTreeData::MutableDataPartPtr part;
2020-04-14 19:47:19 +00:00
UInt8 version;
String part_name;
2020-05-14 20:08:15 +00:00
Block block;
2020-05-29 15:02:12 +00:00
ActionType action_type;
2020-05-14 20:08:15 +00:00
try
{
ActionMetadata metadata;
2020-05-14 20:08:15 +00:00
readIntBinary(version, *in);
if (version > 0)
{
metadata.read(*in);
}
2020-05-14 20:08:15 +00:00
2020-05-29 15:02:12 +00:00
readIntBinary(action_type, *in);
2020-05-14 20:08:15 +00:00
readStringBinary(part_name, *in);
2020-05-29 15:02:12 +00:00
if (action_type == ActionType::DROP_PART)
{
dropped_parts.insert(part_name);
}
else if (action_type == ActionType::ADD_PART)
{
auto single_disk_volume = std::make_shared<SingleDiskVolume>("volume_" + part_name, disk, 0);
part = storage.getDataPartBuilder(part_name, single_disk_volume, part_name)
.withPartType(MergeTreeDataPartType::InMemory)
.withPartStorageType(MergeTreeDataPartStorageType::Full)
.build();
2020-05-29 15:02:12 +00:00
2020-10-29 16:18:25 +00:00
part->uuid = metadata.part_uuid;
2020-05-29 15:02:12 +00:00
block = block_in.read();
if (storage.getActiveContainingPart(part->info, MergeTreeDataPartState::Active, parts_lock))
continue;
2020-05-29 15:02:12 +00:00
}
else
{
throw Exception(ErrorCodes::CORRUPTED_DATA, "Unknown action type: {}", toString(static_cast<UInt8>(action_type)));
2020-05-29 15:02:12 +00:00
}
2020-05-14 20:08:15 +00:00
}
catch (const Exception & e)
{
2020-05-20 12:02:02 +00:00
if (e.code() == ErrorCodes::CANNOT_READ_ALL_DATA
|| e.code() == ErrorCodes::UNKNOWN_FORMAT_VERSION
2020-05-29 15:02:12 +00:00
|| e.code() == ErrorCodes::BAD_DATA_PART_NAME
|| e.code() == ErrorCodes::CORRUPTED_DATA)
2020-05-14 20:08:15 +00:00
{
2021-08-26 11:01:15 +00:00
LOG_WARNING(log, "WAL file '{}' is broken. {}", path, e.displayText());
2020-05-14 20:08:15 +00:00
/// If file is broken, do not write new parts to it.
/// But if it contains any part rotate and save them.
if (max_block_number == -1)
{
if (!readonly)
disk->removeFile(path);
}
else if (name == DEFAULT_WAL_FILE_NAME)
rotate(lock);
2020-05-14 20:08:15 +00:00
break;
}
throw;
}
2020-05-29 15:02:12 +00:00
if (action_type == ActionType::ADD_PART)
{
2021-05-23 01:25:06 +00:00
MergedBlockOutputStream part_out(
part,
metadata_snapshot,
block.getNamesAndTypesList(),
{},
2022-02-14 19:50:08 +00:00
CompressionCodecFactory::instance().get("NONE", {}),
2022-03-16 19:16:26 +00:00
NO_TRANSACTION_PTR);
2020-05-14 20:08:15 +00:00
part->minmax_idx->update(block, storage.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey()));
part->partition.create(metadata_snapshot, block, 0, context);
part->setColumns(block.getNamesAndTypesList(), {}, metadata_snapshot->getMetadataVersion());
2020-06-26 11:30:23 +00:00
if (metadata_snapshot->hasSortingKey())
metadata_snapshot->getSortingKey().expression->execute(block);
2020-05-14 20:08:15 +00:00
2020-05-29 15:02:12 +00:00
part_out.write(block);
2021-08-26 11:01:15 +00:00
for (const auto & projection : metadata_snapshot->getProjections())
{
auto projection_block = projection.calculate(block, context);
2022-10-29 14:26:34 +00:00
auto temp_part = MergeTreeDataWriter::writeProjectionPart(storage, log, projection_block, projection, part.get());
temp_part.finalize();
2021-08-26 11:01:15 +00:00
if (projection_block.rows())
part->addProjectionPart(projection.name, std::move(temp_part.part));
2021-08-26 11:01:15 +00:00
}
2022-10-22 22:51:59 +00:00
part_out.finalizePart(part, false);
2020-04-14 19:47:19 +00:00
2020-05-29 15:02:12 +00:00
min_block_number = std::min(min_block_number, part->info.min_block);
max_block_number = std::max(max_block_number, part->info.max_block);
parts.push_back(std::move(part));
}
2020-04-14 19:47:19 +00:00
}
2020-05-29 15:02:12 +00:00
MergeTreeData::MutableDataPartsVector result;
std::copy_if(parts.begin(), parts.end(), std::back_inserter(result),
[&dropped_parts](const auto & part) { return dropped_parts.count(part->name) == 0; });
/// All parts in WAL had been already committed into the disk -> clear the WAL
if (!readonly && result.empty())
{
LOG_DEBUG(log, "WAL file '{}' had been completely processed. Removing.", path);
disk->removeFile(path);
init();
return {};
}
2020-04-14 19:47:19 +00:00
return result;
}
2022-06-04 23:05:15 +00:00
void MergeTreeWriteAheadLog::shutdown()
{
2022-06-05 14:15:11 +00:00
{
std::unique_lock lock(write_mutex);
2022-06-08 11:28:33 +00:00
if (shutdown_called)
2022-06-05 14:15:11 +00:00
return;
2022-06-04 23:05:15 +00:00
2022-06-05 14:15:11 +00:00
if (sync_scheduled)
sync_cv.wait(lock, [this] { return !sync_scheduled; });
2022-06-08 11:28:33 +00:00
shutdown_called = true;
2022-06-05 14:15:11 +00:00
out->finalize();
out.reset();
}
2022-06-04 23:05:15 +00:00
2022-06-08 11:28:33 +00:00
/// Do it without lock, otherwise inversion between pool lock and write_mutex is possible
2022-06-04 23:05:15 +00:00
sync_task->deactivate();
}
std::optional<MergeTreeWriteAheadLog::MinMaxBlockNumber>
MergeTreeWriteAheadLog::tryParseMinMaxBlockNumber(const String & filename)
{
Int64 min_block;
Int64 max_block;
ReadBufferFromString in(filename);
if (!checkString(WAL_FILE_NAME, in)
|| !checkChar('_', in)
|| !tryReadIntText(min_block, in)
|| !checkChar('_', in)
|| !tryReadIntText(max_block, in))
{
return {};
}
return std::make_pair(min_block, max_block);
}
2020-11-02 22:36:32 +00:00
String MergeTreeWriteAheadLog::ActionMetadata::toJSON() const
{
Poco::JSON::Object json;
if (part_uuid != UUIDHelpers::Nil)
json.set(JSON_KEY_PART_UUID, toString(part_uuid));
std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
oss.exceptions(std::ios::failbit);
json.stringify(oss);
return oss.str();
}
void MergeTreeWriteAheadLog::ActionMetadata::fromJSON(const String & buf)
{
Poco::JSON::Parser parser;
auto json = parser.parse(buf).extract<Poco::JSON::Object::Ptr>();
if (json->has(JSON_KEY_PART_UUID))
part_uuid = parseFromString<UUID>(json->getValue<std::string>(JSON_KEY_PART_UUID));
}
void MergeTreeWriteAheadLog::ActionMetadata::read(ReadBuffer & meta_in)
{
readIntBinary(min_compatible_version, meta_in);
if (min_compatible_version > WAL_VERSION)
throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION,
"WAL metadata version {} is not compatible with this ClickHouse version",
toString(min_compatible_version));
size_t metadata_size;
readVarUInt(metadata_size, meta_in);
2020-11-02 22:36:32 +00:00
if (metadata_size == 0)
return;
2020-11-02 22:36:32 +00:00
String buf(metadata_size, ' ');
meta_in.readStrict(buf.data(), metadata_size);
2020-11-02 22:36:32 +00:00
fromJSON(buf);
}
void MergeTreeWriteAheadLog::ActionMetadata::write(WriteBuffer & meta_out) const
{
writeIntBinary(min_compatible_version, meta_out);
2020-10-29 16:18:25 +00:00
2020-11-02 22:36:32 +00:00
String ser_meta = toJSON();
2020-10-29 16:18:25 +00:00
2020-11-02 22:36:32 +00:00
writeVarUInt(static_cast<UInt32>(ser_meta.length()), meta_out);
writeString(ser_meta, meta_out);
}
2020-11-02 22:36:32 +00:00
2020-04-14 19:47:19 +00:00
}