Merge pull request #39181 from azat/part_log-merge_algorithm

Add merge_algorithm to system.part_log
This commit is contained in:
Yakov Olkhovskiy 2022-07-14 11:43:04 -04:00 committed by GitHub
commit 1f09303d54
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 96 additions and 6 deletions

View File

@ -19,6 +19,10 @@ The `system.part_log` table contains the following columns:
- `REGULAR_MERGE` — Some regular merge.
- `TTL_DELETE_MERGE` — Cleaning up expired data.
- `TTL_RECOMPRESS_MERGE` — Recompressing data part with the.
- `merge_algorithm` ([Enum8](../../sql-reference/data-types/enum.md)) — Merge algorithm for the event with type `MERGE_PARTS`. Can have one of the following values:
- `UNDECIDED`
- `HORIZONTAL`
- `VERTICAL`
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time.
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds precision.
@ -52,6 +56,7 @@ Row 1:
query_id: 983ad9c7-28d5-4ae1-844e-603116b7de31
event_type: NewPart
merge_reason: NotAMerge
merge_algorithm: Undecided
event_date: 2021-02-02
event_time: 2021-02-02 11:14:28
event_time_microseconds: 2021-02-02 11:14:28.861919

View File

@ -14,6 +14,15 @@
- `REMOVE_PART` — удаление или отсоединение из таблицы с помощью [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition).
- `MUTATE_PART` — изменение куска.
- `MOVE_PART` — перемещение куска между дисками.
- `merge_reason` ([Enum8](../../sql-reference/data-types/enum.md)) — Причина события с типом `MERGE_PARTS`. Может принимать одно из следующих значений:
- `NOT_A_MERGE` — событие имеет тип иной, чем `MERGE_PARTS`.
- `REGULAR_MERGE` — обычное слияние.
- `TTL_DELETE_MERGE` — очистка истекших данных.
- `TTL_RECOMPRESS_MERGE` — переупаковка куска.
- `merge_algorithm` ([Enum8](../../sql-reference/data-types/enum.md)) — Алгоритм слияния для события с типом `MERGE_PARTS`. Может принимать одно из следующих значений:
- `UNDECIDED`
- `HORIZONTAL`
- `VERTICAL`
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата события.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время события.
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — время события с точностью до микросекунд.
@ -46,6 +55,8 @@ Row 1:
──────
query_id: 983ad9c7-28d5-4ae1-844e-603116b7de31
event_type: NewPart
merge_reason: NotAMerge
merge_algorithm: Undecided
event_date: 2021-02-02
event_time: 2021-02-02 11:14:28
event_time_microseconds: 2021-02-02 11:14:28.861919

View File

@ -25,17 +25,32 @@ PartLogElement::MergeReasonType PartLogElement::getMergeReasonType(MergeType mer
{
switch (merge_type)
{
case MergeType::Regular:
return REGULAR_MERGE;
case MergeType::TTLDelete:
return TTL_DELETE_MERGE;
case MergeType::TTLRecompress:
return TTL_RECOMPRESS_MERGE;
case MergeType::Regular:
return REGULAR_MERGE;
case MergeType::TTLDelete:
return TTL_DELETE_MERGE;
case MergeType::TTLRecompress:
return TTL_RECOMPRESS_MERGE;
}
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeType {}", static_cast<UInt64>(merge_type));
}
PartLogElement::PartMergeAlgorithm PartLogElement::getMergeAlgorithm(MergeAlgorithm merge_algorithm_)
{
switch (merge_algorithm_)
{
case MergeAlgorithm::Undecided:
return UNDECIDED;
case MergeAlgorithm::Horizontal:
return HORIZONTAL;
case MergeAlgorithm::Vertical:
return VERTICAL;
}
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeAlgorithm {}", static_cast<UInt64>(merge_algorithm_));
}
NamesAndTypesList PartLogElement::getNamesAndTypes()
{
auto event_type_datatype = std::make_shared<DataTypeEnum8>(
@ -60,12 +75,22 @@ NamesAndTypesList PartLogElement::getNamesAndTypes()
}
);
auto merge_algorithm_datatype = std::make_shared<DataTypeEnum8>(
DataTypeEnum8::Values
{
{"Undecided", static_cast<Int8>(UNDECIDED)},
{"Horizontal", static_cast<Int8>(HORIZONTAL)},
{"Vertical", static_cast<Int8>(VERTICAL)},
}
);
ColumnsWithTypeAndName columns_with_type_and_name;
return {
{"query_id", std::make_shared<DataTypeString>()},
{"event_type", std::move(event_type_datatype)},
{"merge_reason", std::move(merge_reason_datatype)},
{"merge_algorithm", std::move(merge_algorithm_datatype)},
{"event_date", std::make_shared<DataTypeDate>()},
{"event_time", std::make_shared<DataTypeDateTime>()},
@ -104,6 +129,7 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const
columns[i++]->insert(query_id);
columns[i++]->insert(event_type);
columns[i++]->insert(merge_reason);
columns[i++]->insert(merge_algorithm);
columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType());
columns[i++]->insert(event_time);
columns[i++]->insert(event_time_microseconds);

View File

@ -5,6 +5,7 @@
#include <Core/NamesAndTypes.h>
#include <Core/NamesAndAliases.h>
#include <Storages/MergeTree/MergeType.h>
#include <Storages/MergeTree/MergeAlgorithm.h>
namespace DB
@ -22,6 +23,14 @@ struct PartLogElement
MOVE_PART = 6,
};
/// Copy of MergeAlgorithm since values are written to disk.
enum PartMergeAlgorithm
{
UNDECIDED = 0,
VERTICAL = 1,
HORIZONTAL = 2,
};
enum MergeReasonType
{
/// merge_reason is relevant only for event_type = 'MERGE_PARTS', in other cases it is NOT_A_MERGE
@ -38,6 +47,7 @@ struct PartLogElement
Type event_type = NEW_PART;
MergeReasonType merge_reason = NOT_A_MERGE;
PartMergeAlgorithm merge_algorithm = UNDECIDED;
time_t event_time = 0;
Decimal64 event_time_microseconds = 0;
@ -72,6 +82,8 @@ struct PartLogElement
static std::string name() { return "PartLog"; }
static MergeReasonType getMergeReasonType(MergeType merge_type);
static PartMergeAlgorithm getMergeAlgorithm(MergeAlgorithm merge_algorithm_);
static NamesAndTypesList getNamesAndTypes();
static NamesAndAliases getNamesAndAliases() { return {}; }
void appendToBlock(MutableColumns & columns) const;

View File

@ -6229,8 +6229,13 @@ try
part_log_elem.event_type = type;
if (part_log_elem.event_type == PartLogElement::MERGE_PARTS)
{
if (merge_entry)
{
part_log_elem.merge_reason = PartLogElement::getMergeReasonType((*merge_entry)->merge_type);
part_log_elem.merge_algorithm = PartLogElement::getMergeAlgorithm((*merge_entry)->merge_algorithm);
}
}
part_log_elem.error = static_cast<UInt16>(execution_status.code);
part_log_elem.exception = execution_status.message;

View File

@ -0,0 +1,5 @@
data_horizontal all_1_1_0 NewPart Undecided
data_horizontal all_1_1_1 MergeParts Horizontal
data_vertical all_1_1_0 NewPart Undecided
data_vertical all_2_2_0 NewPart Undecided
data_vertical all_1_2_1 MergeParts Vertical

View File

@ -0,0 +1,26 @@
CREATE TABLE data_horizontal (
key Int
)
Engine=MergeTree()
ORDER BY key;
INSERT INTO data_horizontal VALUES (1);
OPTIMIZE TABLE data_horizontal FINAL;
SYSTEM FLUSH LOGS;
SELECT table, part_name, event_type, merge_algorithm FROM system.part_log WHERE event_date >= yesterday() AND database = currentDatabase() AND table = 'data_horizontal' ORDER BY event_time_microseconds;
CREATE TABLE data_vertical
(
key UInt64,
value String
)
ENGINE = MergeTree()
ORDER BY key
SETTINGS index_granularity_bytes = 0, enable_mixed_granularity_parts = 0, min_bytes_for_wide_part = 0,
vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 1;
INSERT INTO data_vertical VALUES (1, '1');
INSERT INTO data_vertical VALUES (2, '2');
OPTIMIZE TABLE data_vertical FINAL;
SYSTEM FLUSH LOGS;
SELECT table, part_name, event_type, merge_algorithm FROM system.part_log WHERE event_date >= yesterday() AND database = currentDatabase() AND table = 'data_vertical' ORDER BY event_time_microseconds;