2017-08-14 18:16:11 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreePartInfo.h>
|
|
|
|
#include <IO/ReadBufferFromString.h>
|
|
|
|
#include <IO/ReadHelpers.h>
|
|
|
|
#include <IO/WriteHelpers.h>
|
2023-02-03 13:34:18 +00:00
|
|
|
#include "Core/ProtocolDefines.h"
|
2017-08-14 18:16:11 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int BAD_DATA_PART_NAME;
|
2021-08-06 18:03:38 +00:00
|
|
|
extern const int INVALID_PARTITION_VALUE;
|
2023-02-03 13:34:18 +00:00
|
|
|
extern const int UNKNOWN_FORMAT_VERSION;
|
2017-08-14 18:16:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-03-09 02:31:05 +00:00
|
|
|
MergeTreePartInfo MergeTreePartInfo::fromPartName(const String & part_name, MergeTreeDataFormatVersion format_version)
|
2017-08-14 18:16:11 +00:00
|
|
|
{
|
2021-09-03 11:33:40 +00:00
|
|
|
if (auto part_opt = tryParsePartName(part_name, format_version))
|
2021-08-24 12:57:49 +00:00
|
|
|
return *part_opt;
|
2021-09-03 11:33:40 +00:00
|
|
|
else
|
2022-12-22 13:31:42 +00:00
|
|
|
throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Unexpected part name: {} for format version: {}", part_name, format_version);
|
2017-08-14 18:16:11 +00:00
|
|
|
}
|
|
|
|
|
2021-08-06 18:03:38 +00:00
|
|
|
void MergeTreePartInfo::validatePartitionID(const String & partition_id, MergeTreeDataFormatVersion format_version)
|
2021-07-29 13:11:05 +00:00
|
|
|
{
|
|
|
|
if (partition_id.empty())
|
2021-08-06 18:03:38 +00:00
|
|
|
throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Partition id is empty");
|
2021-07-29 13:11:05 +00:00
|
|
|
|
|
|
|
if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
|
|
|
|
{
|
2021-08-06 18:03:38 +00:00
|
|
|
if (partition_id.size() != 6 || !std::all_of(partition_id.begin(), partition_id.end(), isNumericASCII))
|
|
|
|
throw Exception(ErrorCodes::INVALID_PARTITION_VALUE,
|
|
|
|
"Invalid partition format: {}. Partition should consist of 6 digits: YYYYMM",
|
|
|
|
partition_id);
|
2021-07-29 13:11:05 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-08-06 18:03:38 +00:00
|
|
|
auto is_valid_char = [](char c) { return c == '-' || isAlphaNumericASCII(c); };
|
|
|
|
if (!std::all_of(partition_id.begin(), partition_id.end(), is_valid_char))
|
|
|
|
throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Invalid partition format: {}", partition_id);
|
2021-07-29 13:11:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2021-08-24 12:57:49 +00:00
|
|
|
std::optional<MergeTreePartInfo> MergeTreePartInfo::tryParsePartName(
|
|
|
|
std::string_view part_name, MergeTreeDataFormatVersion format_version)
|
2017-08-14 18:16:11 +00:00
|
|
|
{
|
2020-03-09 02:31:05 +00:00
|
|
|
ReadBufferFromString in(part_name);
|
2017-08-25 20:41:45 +00:00
|
|
|
|
|
|
|
String partition_id;
|
2021-08-24 12:57:49 +00:00
|
|
|
|
2017-09-07 16:21:06 +00:00
|
|
|
if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
|
2017-08-25 20:41:45 +00:00
|
|
|
{
|
|
|
|
UInt32 min_yyyymmdd = 0;
|
|
|
|
UInt32 max_yyyymmdd = 0;
|
2021-08-24 12:57:49 +00:00
|
|
|
|
2017-08-25 20:41:45 +00:00
|
|
|
if (!tryReadIntText(min_yyyymmdd, in)
|
|
|
|
|| !checkChar('_', in)
|
|
|
|
|| !tryReadIntText(max_yyyymmdd, in)
|
|
|
|
|| !checkChar('_', in))
|
|
|
|
{
|
2021-08-24 12:57:49 +00:00
|
|
|
return std::nullopt;
|
2017-08-25 20:41:45 +00:00
|
|
|
}
|
2021-08-24 12:57:49 +00:00
|
|
|
|
2017-08-25 20:41:45 +00:00
|
|
|
partition_id = toString(min_yyyymmdd / 100);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
while (!in.eof())
|
|
|
|
{
|
|
|
|
char c;
|
|
|
|
readChar(c, in);
|
|
|
|
if (c == '_')
|
|
|
|
break;
|
|
|
|
|
|
|
|
partition_id.push_back(c);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-20 16:24:36 +00:00
|
|
|
/// Sanity check
|
|
|
|
if (partition_id.empty())
|
2021-08-24 12:57:49 +00:00
|
|
|
return std::nullopt;
|
2019-05-20 16:24:36 +00:00
|
|
|
|
2017-08-14 18:16:11 +00:00
|
|
|
Int64 min_block_num = 0;
|
|
|
|
Int64 max_block_num = 0;
|
|
|
|
UInt32 level = 0;
|
2018-05-13 00:24:23 +00:00
|
|
|
UInt32 mutation = 0;
|
2017-08-14 18:16:11 +00:00
|
|
|
|
2017-08-25 20:41:45 +00:00
|
|
|
if (!tryReadIntText(min_block_num, in)
|
2017-08-14 18:16:11 +00:00
|
|
|
|| !checkChar('_', in)
|
|
|
|
|| !tryReadIntText(max_block_num, in)
|
|
|
|
|| !checkChar('_', in)
|
2018-05-13 00:24:23 +00:00
|
|
|
|| !tryReadIntText(level, in))
|
2017-08-14 18:16:11 +00:00
|
|
|
{
|
2021-08-24 12:57:49 +00:00
|
|
|
return std::nullopt;
|
2017-08-14 18:16:11 +00:00
|
|
|
}
|
|
|
|
|
2019-05-20 16:24:36 +00:00
|
|
|
/// Sanity check
|
|
|
|
if (min_block_num > max_block_num)
|
2021-08-24 12:57:49 +00:00
|
|
|
return std::nullopt;
|
2019-05-20 16:24:36 +00:00
|
|
|
|
2018-05-13 00:24:23 +00:00
|
|
|
if (!in.eof())
|
|
|
|
{
|
|
|
|
if (!checkChar('_', in)
|
|
|
|
|| !tryReadIntText(mutation, in)
|
|
|
|
|| !in.eof())
|
|
|
|
{
|
2021-08-24 12:57:49 +00:00
|
|
|
return std::nullopt;
|
2018-05-13 00:24:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-24 12:57:49 +00:00
|
|
|
MergeTreePartInfo part_info;
|
|
|
|
|
|
|
|
part_info.partition_id = std::move(partition_id);
|
|
|
|
part_info.min_block = min_block_num;
|
|
|
|
part_info.max_block = max_block_num;
|
|
|
|
|
|
|
|
if (level == LEGACY_MAX_LEVEL)
|
2017-08-14 18:16:11 +00:00
|
|
|
{
|
2021-08-24 12:57:49 +00:00
|
|
|
/// We (accidentally) had two different max levels until 21.6 and it might cause logical errors like
|
|
|
|
/// "Part 20170601_20170630_0_2_999999999 intersects 201706_0_1_4294967295".
|
|
|
|
/// So we replace unexpected max level to make contains(...) method and comparison operators work
|
|
|
|
/// correctly with such virtual parts. On part name serialization we will use legacy max level to keep the name unchanged.
|
|
|
|
part_info.use_leagcy_max_level = true;
|
|
|
|
level = MAX_LEVEL;
|
2017-08-14 18:16:11 +00:00
|
|
|
}
|
|
|
|
|
2021-08-24 12:57:49 +00:00
|
|
|
part_info.level = level;
|
|
|
|
part_info.mutation = mutation;
|
|
|
|
|
|
|
|
return part_info;
|
2017-08-14 18:16:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-03-09 02:31:05 +00:00
|
|
|
void MergeTreePartInfo::parseMinMaxDatesFromPartName(const String & part_name, DayNum & min_date, DayNum & max_date)
|
2017-08-14 18:16:11 +00:00
|
|
|
{
|
|
|
|
UInt32 min_yyyymmdd = 0;
|
|
|
|
UInt32 max_yyyymmdd = 0;
|
|
|
|
|
2020-03-09 02:31:05 +00:00
|
|
|
ReadBufferFromString in(part_name);
|
2017-08-14 18:16:11 +00:00
|
|
|
|
|
|
|
if (!tryReadIntText(min_yyyymmdd, in)
|
|
|
|
|| !checkChar('_', in)
|
|
|
|
|| !tryReadIntText(max_yyyymmdd, in))
|
|
|
|
{
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Unexpected part name: {}", part_name);
|
2017-08-14 18:16:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
const auto & date_lut = DateLUT::instance();
|
|
|
|
|
2017-08-15 20:03:59 +00:00
|
|
|
min_date = date_lut.YYYYMMDDToDayNum(min_yyyymmdd);
|
|
|
|
max_date = date_lut.YYYYMMDDToDayNum(max_yyyymmdd);
|
|
|
|
|
2020-04-12 22:25:41 +00:00
|
|
|
auto min_month = date_lut.toNumYYYYMM(min_date);
|
|
|
|
auto max_month = date_lut.toNumYYYYMM(max_date);
|
2017-08-14 18:16:11 +00:00
|
|
|
|
|
|
|
if (min_month != max_month)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Part name {} contains different months", part_name);
|
2017-08-14 18:16:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-08-25 20:41:45 +00:00
|
|
|
bool MergeTreePartInfo::contains(const String & outer_part_name, const String & inner_part_name, MergeTreeDataFormatVersion format_version)
|
2017-08-14 18:16:11 +00:00
|
|
|
{
|
2017-08-25 20:41:45 +00:00
|
|
|
MergeTreePartInfo outer = fromPartName(outer_part_name, format_version);
|
|
|
|
MergeTreePartInfo inner = fromPartName(inner_part_name, format_version);
|
2017-08-14 18:16:11 +00:00
|
|
|
return outer.contains(inner);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-12-20 21:44:27 +00:00
|
|
|
String MergeTreePartInfo::getPartNameAndCheckFormat(MergeTreeDataFormatVersion format_version) const
|
|
|
|
{
|
|
|
|
if (format_version == MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
|
2023-01-10 20:39:35 +00:00
|
|
|
return getPartNameV1();
|
2022-12-20 21:44:27 +00:00
|
|
|
|
|
|
|
/// We cannot just call getPartNameV0 because it requires extra arguments, but at least we can warn about it.
|
|
|
|
chassert(false); /// Catch it in CI. Feel free to remove this line.
|
2023-01-10 20:39:35 +00:00
|
|
|
throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Trying to get part name in new format for old format version. "
|
2022-12-20 21:44:27 +00:00
|
|
|
"Either some new feature is incompatible with deprecated *MergeTree definition syntax or it's a bug.");
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
String MergeTreePartInfo::getPartNameForLogs() const
|
|
|
|
{
|
|
|
|
/// We don't care about format version here
|
|
|
|
return getPartNameV1();
|
|
|
|
}
|
|
|
|
|
|
|
|
String MergeTreePartInfo::getPartNameV1() const
|
2017-08-25 20:41:45 +00:00
|
|
|
{
|
|
|
|
WriteBufferFromOwnString wb;
|
|
|
|
|
|
|
|
writeString(partition_id, wb);
|
|
|
|
writeChar('_', wb);
|
|
|
|
writeIntText(min_block, wb);
|
|
|
|
writeChar('_', wb);
|
|
|
|
writeIntText(max_block, wb);
|
|
|
|
writeChar('_', wb);
|
2021-05-14 12:55:30 +00:00
|
|
|
if (use_leagcy_max_level)
|
|
|
|
{
|
|
|
|
assert(level == MAX_LEVEL);
|
|
|
|
writeIntText(LEGACY_MAX_LEVEL, wb);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
writeIntText(level, wb);
|
|
|
|
}
|
2017-08-25 20:41:45 +00:00
|
|
|
|
2018-05-13 00:24:23 +00:00
|
|
|
if (mutation)
|
|
|
|
{
|
|
|
|
writeChar('_', wb);
|
|
|
|
writeIntText(mutation, wb);
|
|
|
|
}
|
|
|
|
|
2017-08-25 20:41:45 +00:00
|
|
|
return wb.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-05-25 13:29:15 +00:00
|
|
|
String MergeTreePartInfo::getPartNameV0(DayNum left_date, DayNum right_date) const
|
2017-08-14 18:16:11 +00:00
|
|
|
{
|
|
|
|
const auto & date_lut = DateLUT::instance();
|
|
|
|
|
|
|
|
/// Directory name for the part has form: `YYYYMMDD_YYYYMMDD_N_N_L`.
|
|
|
|
|
|
|
|
unsigned left_date_id = date_lut.toNumYYYYMMDD(left_date);
|
|
|
|
unsigned right_date_id = date_lut.toNumYYYYMMDD(right_date);
|
|
|
|
|
|
|
|
WriteBufferFromOwnString wb;
|
|
|
|
|
|
|
|
writeIntText(left_date_id, wb);
|
|
|
|
writeChar('_', wb);
|
|
|
|
writeIntText(right_date_id, wb);
|
|
|
|
writeChar('_', wb);
|
2017-08-25 20:41:45 +00:00
|
|
|
writeIntText(min_block, wb);
|
2017-08-14 18:16:11 +00:00
|
|
|
writeChar('_', wb);
|
2017-08-25 20:41:45 +00:00
|
|
|
writeIntText(max_block, wb);
|
2017-08-14 18:16:11 +00:00
|
|
|
writeChar('_', wb);
|
2021-05-14 12:55:30 +00:00
|
|
|
if (use_leagcy_max_level)
|
|
|
|
{
|
|
|
|
assert(level == MAX_LEVEL);
|
|
|
|
writeIntText(LEGACY_MAX_LEVEL, wb);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
writeIntText(level, wb);
|
|
|
|
}
|
2017-08-14 18:16:11 +00:00
|
|
|
|
2018-05-13 00:24:23 +00:00
|
|
|
if (mutation)
|
|
|
|
{
|
|
|
|
writeChar('_', wb);
|
|
|
|
writeIntText(mutation, wb);
|
|
|
|
}
|
|
|
|
|
2017-08-14 18:16:11 +00:00
|
|
|
return wb.str();
|
|
|
|
}
|
|
|
|
|
2023-02-03 13:34:18 +00:00
|
|
|
void MergeTreePartInfo::serialize(WriteBuffer & out) const
|
|
|
|
{
|
|
|
|
UInt64 version = DBMS_MERGE_TREE_PART_INFO_VERSION;
|
|
|
|
/// Must be the first
|
|
|
|
writeIntBinary(version, out);
|
|
|
|
|
|
|
|
writeStringBinary(partition_id, out);
|
|
|
|
writeIntBinary(min_block, out);
|
|
|
|
writeIntBinary(max_block, out);
|
|
|
|
writeIntBinary(level, out);
|
|
|
|
writeIntBinary(mutation, out);
|
|
|
|
writeBoolText(use_leagcy_max_level, out);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
String MergeTreePartInfo::describe() const
|
|
|
|
{
|
|
|
|
return getPartNameV1();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void MergeTreePartInfo::deserialize(ReadBuffer & in)
|
|
|
|
{
|
|
|
|
UInt64 version;
|
|
|
|
readIntBinary(version, in);
|
|
|
|
if (version != DBMS_MERGE_TREE_PART_INFO_VERSION)
|
|
|
|
throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Version for MergeTreePart info mismatched. Got: {}, supported version: {}",
|
|
|
|
version, DBMS_MERGE_TREE_PART_INFO_VERSION);
|
|
|
|
|
|
|
|
readStringBinary(partition_id, in);
|
|
|
|
readIntBinary(min_block, in);
|
|
|
|
readIntBinary(max_block, in);
|
|
|
|
readIntBinary(level, in);
|
|
|
|
readIntBinary(mutation, in);
|
|
|
|
readBoolText(use_leagcy_max_level, in);
|
|
|
|
}
|
|
|
|
|
2021-08-24 12:57:49 +00:00
|
|
|
DetachedPartInfo DetachedPartInfo::parseDetachedPartName(
|
2021-11-24 19:45:10 +00:00
|
|
|
const DiskPtr & disk, std::string_view dir_name, MergeTreeDataFormatVersion format_version)
|
2019-07-23 19:43:33 +00:00
|
|
|
{
|
2021-08-24 12:57:49 +00:00
|
|
|
DetachedPartInfo part_info;
|
2021-11-24 19:45:10 +00:00
|
|
|
part_info.disk = disk;
|
2019-07-26 20:04:45 +00:00
|
|
|
part_info.dir_name = dir_name;
|
|
|
|
|
2021-08-24 12:57:49 +00:00
|
|
|
/// First, try to find known prefix and parse dir_name as <prefix>_<part_name>.
|
2021-08-04 14:42:48 +00:00
|
|
|
/// Arbitrary strings are not allowed for partition_id, so known_prefix cannot be confused with partition_id.
|
2021-08-24 12:57:49 +00:00
|
|
|
for (std::string_view known_prefix : DETACH_REASONS)
|
2021-08-04 14:42:48 +00:00
|
|
|
{
|
2021-08-24 12:57:49 +00:00
|
|
|
if (dir_name.starts_with(known_prefix)
|
|
|
|
&& known_prefix.size() < dir_name.size()
|
|
|
|
&& dir_name[known_prefix.size()] == '_')
|
2021-08-04 14:42:48 +00:00
|
|
|
{
|
|
|
|
part_info.prefix = known_prefix;
|
2021-08-24 12:57:49 +00:00
|
|
|
|
|
|
|
const std::string_view part_name = dir_name.substr(known_prefix.size() + 1);
|
|
|
|
|
|
|
|
if (auto part_opt = MergeTreePartInfo::tryParsePartName(part_name, format_version))
|
|
|
|
{
|
|
|
|
part_info.valid_name = true;
|
|
|
|
part_info.addParsedPartInfo(*part_opt);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
part_info.valid_name = false;
|
|
|
|
|
|
|
|
return part_info;
|
2021-08-04 14:42:48 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Next, try to parse dir_name as <part_name>.
|
2021-08-24 12:57:49 +00:00
|
|
|
if (auto part_opt = MergeTreePartInfo::tryParsePartName(dir_name, format_version))
|
|
|
|
{
|
|
|
|
part_info.valid_name = true;
|
|
|
|
part_info.addParsedPartInfo(*part_opt);
|
|
|
|
return part_info;
|
|
|
|
}
|
2019-07-23 19:43:33 +00:00
|
|
|
|
|
|
|
/// Next, as <prefix>_<partname>. Use entire name as prefix if it fails.
|
2019-07-26 20:04:45 +00:00
|
|
|
part_info.prefix = dir_name;
|
2021-08-24 12:57:49 +00:00
|
|
|
|
|
|
|
const size_t first_separator = dir_name.find_first_of('_');
|
|
|
|
|
2019-07-23 19:43:33 +00:00
|
|
|
if (first_separator == String::npos)
|
2021-08-24 12:57:49 +00:00
|
|
|
{
|
|
|
|
part_info.valid_name = false;
|
|
|
|
return part_info;
|
|
|
|
}
|
2019-07-23 19:43:33 +00:00
|
|
|
|
2021-08-24 12:57:49 +00:00
|
|
|
const std::string_view part_name = dir_name.substr(
|
|
|
|
first_separator + 1,
|
|
|
|
dir_name.size() - first_separator - 1);
|
2019-07-23 19:43:33 +00:00
|
|
|
|
2021-08-24 12:57:49 +00:00
|
|
|
if (auto part_opt = MergeTreePartInfo::tryParsePartName(part_name, format_version))
|
|
|
|
{
|
|
|
|
part_info.valid_name = true;
|
|
|
|
part_info.prefix = dir_name.substr(0, first_separator);
|
|
|
|
part_info.addParsedPartInfo(*part_opt);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
part_info.valid_name = false;
|
|
|
|
|
2021-11-24 19:45:10 +00:00
|
|
|
// TODO what if name contains "_tryN" suffix?
|
2021-08-24 12:57:49 +00:00
|
|
|
return part_info;
|
2019-07-23 19:43:33 +00:00
|
|
|
}
|
2019-07-25 10:46:07 +00:00
|
|
|
|
2021-08-24 12:57:49 +00:00
|
|
|
void DetachedPartInfo::addParsedPartInfo(const MergeTreePartInfo& part)
|
|
|
|
{
|
|
|
|
// Both class are aggregates so it's ok.
|
|
|
|
static_cast<MergeTreePartInfo &>(*this) = part;
|
|
|
|
}
|
2017-08-14 18:16:11 +00:00
|
|
|
}
|