add comments [#CLICKHOUSE-3000]

This commit is contained in:
Alexey Zatelepin 2017-09-13 19:22:04 +03:00 committed by alexey-milovidov
parent 61f0b32757
commit f1a8b9bfa1
4 changed files with 20 additions and 7 deletions

View File

@ -46,10 +46,10 @@ namespace ErrorCodes
/// The date column is specified. For each part min and max dates are remembered.
/// Essentially it is an index too.
///
/// Data is partitioned by month. Parts belonging to different months are not merged - for the ease of
/// administration (data sync and backup).
/// Data is partitioned by the value of the partitioning expression.
/// Parts belonging to different partitions are not merged - for the ease of administration (data sync and backup).
///
/// File structure:
/// File structure of old-style month-partitioned tables (format_version = 0):
/// Part directory - / min-date _ max-date _ min-id _ max-id _ level /
/// Inside the part directory:
/// checksums.txt - contains the list of all files along with their sizes and checksums.
@ -58,6 +58,13 @@ namespace ErrorCodes
/// [Column].bin - contains compressed column data.
/// [Column].mrk - marks, pointing to seek positions allowing to skip n * k rows.
///
/// File structure of tables with custom partitioning (format_version >= 1):
/// Part directory - / partiiton-id _ min-id _ max-id _ level /
/// Inside the part directory:
/// The same files as for month-partitioned tables, plus
/// partition.dat - contains the value of the partitioning expression
/// minmax_[Column].idx - MinMax indexes (see MergeTreeDataPart::MinMaxIndex class) for the columns required by the partitioning expression.
///
/// Several modes are implemented. Modes determine additional actions during merge:
/// - Ordinary - don't do anything special
/// - Collapsing - collapse pairs of rows with the opposite values of sign_columns for the same values

View File

@ -1,5 +1,6 @@
#pragma once
#include <Core/Types.h>
#include <common/strong_typedef.h>
namespace DB

View File

@ -19,17 +19,18 @@ static ReadBufferFromFile openForReading(const String & path)
return ReadBufferFromFile(path, std::min(static_cast<Poco::File::FileSize>(DBMS_DEFAULT_BUFFER_SIZE), Poco::File(path).getSize()));
}
/// NOTE: This ID is used to create part names which are then persisted in ZK and as directory names on the file system.
/// So if you want to change this method, be sure to guarantee compatibility with existing table data.
String MergeTreePartition::getID(const MergeTreeData & storage) const
{
if (value.size() != storage.partition_expr_columns.size())
throw Exception("Invalid partition key size: " + toString(value.size()), ErrorCodes::LOGICAL_ERROR);
if (value.empty())
return "all";
return "all"; /// It is tempting to use an empty string here. But that would break directory structure in ZK.
/// In case all partition fields are represented by integral types, try to produce a human-readable partition id.
/// In case all partition fields are represented by integral types, try to produce a human-readable ID.
/// Otherwise use a hex-encoded hash.
bool are_all_integral = true;
for (const Field & field : value)
{
@ -51,9 +52,12 @@ String MergeTreePartition::getID(const MergeTreeData & storage) const
result += '-';
if (typeid_cast<const DataTypeDate *>(storage.partition_expr_column_types[i].get()))
result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum_t(value[i].get<UInt64>())));
result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum_t(value[i].safeGet<UInt64>())));
else
result += applyVisitor(to_string_visitor, value[i]);
/// It is tempting to output DateTime as YYYYMMDDhhmmss, but that would make partition ID
/// timezone-dependent.
}
return result;

View File

@ -10,6 +10,7 @@ namespace DB
class MergeTreeData;
struct MergeTreeDataPartChecksums;
/// This class represents a partition value of a single part and encapsulates its loading/storing logic.
struct MergeTreePartition
{
Row value;