From ad787938f5fa247901da7003c5717fed6a838445 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 23 Jul 2019 22:43:33 +0300 Subject: [PATCH] better detached part name parsing --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 17 +------------- .../Storages/MergeTree/MergeTreePartInfo.cpp | 22 +++++++++++++++++++ .../Storages/MergeTree/MergeTreePartInfo.h | 6 +++++ .../System/StorageSystemDetachedParts.cpp | 1 + 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index b32470f9f77..6a7b6d5405e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2579,22 +2579,7 @@ MergeTreeData::getDetachedParts() const res.emplace_back(); auto & part = res.back(); - /// First, try to parse as . - if (MergeTreePartInfo::tryParsePartName(dir_name, &part, format_version)) - continue; - - /// Next, as _. Use entire name as prefix if it fails. - part.prefix = dir_name; - const auto first_separator = dir_name.find_first_of('_'); - if (first_separator == String::npos) - continue; - - const auto part_name = dir_name.substr(first_separator + 1, - dir_name.size() - first_separator - 1); - if (!MergeTreePartInfo::tryParsePartName(part_name, &part, format_version)) - continue; - - part.prefix = dir_name.substr(0, first_separator); + DetachedPartInfo::tryParseDetachedPartName(dir_name, &part, format_version); } return res; } diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp index 19f77448110..732cc3436f4 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp @@ -188,4 +188,26 @@ String MergeTreePartInfo::getPartNameV0(DayNum left_date, DayNum right_date) con return wb.str(); } +bool DetachedPartInfo::tryParseDetachedPartName(const String & dir_name, DetachedPartInfo * part_info, + MergeTreeDataFormatVersion format_version) +{ + /// First, try to parse as . + if (MergeTreePartInfo::tryParsePartName(dir_name, part_info, format_version)) + return part_info->valid_name = true; + + /// Next, as _. Use entire name as prefix if it fails. + part_info->prefix = dir_name; + const auto first_separator = dir_name.find_first_of('_'); + if (first_separator == String::npos) + return part_info->valid_name = false; + + // TODO what if contains '_'? + const auto part_name = dir_name.substr(first_separator + 1, + dir_name.size() - first_separator - 1); + if (!MergeTreePartInfo::tryParsePartName(part_name, part_info, format_version)) + return part_info->valid_name = false; + + part_info->prefix = dir_name.substr(0, first_separator); + return part_info->valid_name = true; +} } diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h index e80664c3dd9..2a168086a1c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h @@ -93,6 +93,12 @@ struct MergeTreePartInfo struct DetachedPartInfo : public MergeTreePartInfo { String prefix; + + /// If false, prefix contains full directory name and MergeTreePartInfo may be in invalid state + /// (directory name was not successfully parsed). + bool valid_name; + + static bool tryParseDetachedPartName(const String & dir_name, DetachedPartInfo * part_info, MergeTreeDataFormatVersion format_version); }; } diff --git a/dbms/src/Storages/System/StorageSystemDetachedParts.cpp b/dbms/src/Storages/System/StorageSystemDetachedParts.cpp index 9ae6f7b607a..9f33a60b84a 100644 --- a/dbms/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/dbms/src/Storages/System/StorageSystemDetachedParts.cpp @@ -28,6 +28,7 @@ public: protected: explicit StorageSystemDetachedParts() { + // TODO add column "directory_name" or "is_valid_name" setColumns(ColumnsDescription{{ {"database", std::make_shared()}, {"table", std::make_shared()},