Add doc and fix fasttests

This commit is contained in:
MikhailBurdukov 2024-09-09 08:13:38 +00:00
parent af340681d5
commit 690f6143ed
11 changed files with 25 additions and 14 deletions

View File

@ -777,6 +777,7 @@ Storage policies configuration markup:
<!-- more volumes -->
</volumes>
<move_factor>0.2</move_factor>
<move_policy>by_part_size</move_policy>
</policy_name_1>
<policy_name_2>
<!-- configuration -->
@ -794,7 +795,8 @@ Tags:
- `volume_name_N` — Volume name. Volume names must be unique.
- `disk` — a disk within a volume.
- `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volumes disks. If the a size of a merged part estimated to be bigger than `max_data_part_size_bytes` then this part will be written to a next volume. Basically this feature allows to keep new/small parts on a hot (SSD) volume and move them to a cold (HDD) volume when they reach large size. Do not use this setting if your policy has only one volume.
- `move_factor` — when the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1). ClickHouse sorts existing parts by size from largest to smallest (in descending order) and selects parts with the total size that is sufficient to meet the `move_factor` condition. If the total size of all parts is insufficient, all parts will be moved.
- `move_factor` — when the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1). If the total size of all parts is insufficient, all parts will be moved.
- `move_policy` - Policy for selecting parts for move to the next volume with the total size that is sufficient to meet the `move_factor` condition. `by_part_size` or `by_insert_data_time`.
- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default (if enabled) if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3). If disabled then already expired data part is written into a default volume and then right after moved to TTL volume.
- `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`.
- `least_used_ttl_ms` - Configure timeout (in milliseconds) for the updating available space on all disks (`0` - update always, `-1` - never update, default is `60000`). Note, if the disk can be used by ClickHouse only and is not subject to a online filesystem resize/shrink you can use `-1`, in all other cases it is not recommended, since eventually it will lead to incorrect space distribution.

View File

@ -51,6 +51,10 @@ Columns:
- `modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) The time the directory with the data part was modified. This usually corresponds to the time of data part creation.
- `min_time_of_data_insert` ([DateTime](../../sql-reference/data-types/datetime.md)) The minimum time of when data was inserted into this part.
- `max_time_of_data_insert` ([DateTime](../../sql-reference/data-types/datetime.md)) The maximum time of when data was inserted into this part.
- `remove_time` ([DateTime](../../sql-reference/data-types/datetime.md)) The time when the data part became inactive.
- `refcount` ([UInt32](../../sql-reference/data-types/int-uint.md)) The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges.
@ -136,6 +140,8 @@ secondary_indices_uncompressed_bytes: 6
secondary_indices_marks_bytes: 48
marks_bytes: 144
modification_time: 2020-06-18 13:01:49
min_time_of_data_insert: 2020-06-18 13:01:49
max_time_of_data_insert: 2020-06-18 13:01:49
remove_time: 1970-01-01 00:00:00
refcount: 1
min_date: 1970-01-01

View File

@ -17,6 +17,9 @@ Columns:
- `UNKNOWN`
- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit).
- `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order.
- `move_policy`([Enum8](../../sql-reference/data-types/enum.md)) - Policy for selecting parts for move to the next volume with the total size that is sufficient to meet the `move_factor` condition. Can have one of the following values:
- `BY_PART_SIZE` - sorts existing parts by size from largest to smallest (in descending order).
- `BY_INSERT_DATA_TIME` - sorts existing parts by time of data insert in this part(older parts first).
- `prefer_not_to_merge` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `prefer_not_to_merge` setting. Should be always false. When this setting is enabled, you did a mistake.
- `perform_ttl_move_on_insert` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `perform_ttl_move_on_insert` setting. — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3).
- `load_balancing` ([Enum8](../../sql-reference/data-types/enum.md)) — Policy for disk balancing. Can have one of the following values:

View File

@ -7431,9 +7431,6 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAn
dst_data_part->loadColumnsChecksumsIndexes(require_part_metadata, true);
dst_data_part->modification_time = dst_part_storage->getLastModified().epochTime();
// dst_data_part->min_time_of_data_insert = src_part->getMinTimeOfDataInsertion();
// dst_data_part->max_time_of_data_insert = src_part->getMaxTimeOfDataInsertion();
return std::make_pair(dst_data_part, std::move(temporary_directory_lock));
}

View File

@ -62,8 +62,8 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_)
{"secondary_indices_uncompressed_bytes", std::make_shared<DataTypeUInt64>(), "Total size of uncompressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included."},
{"secondary_indices_marks_bytes", std::make_shared<DataTypeUInt64>(), "The size of the file with marks for secondary indices."},
{"modification_time", std::make_shared<DataTypeDateTime>(), "The time the directory with the data part was modified. This usually corresponds to the time of data part creation."},
{"min_time_of_data_insert", std::make_shared<DataTypeDateTime>(), "min_time_of_data_insert."},
{"max_time_of_data_insert", std::make_shared<DataTypeDateTime>(), "max_time_of_data_insert."},
{"min_time_of_data_insert", std::make_shared<DataTypeDateTime>(), "The minimum time of when data was inserted into this part."},
{"max_time_of_data_insert", std::make_shared<DataTypeDateTime>(), "The maximum time of when data was inserted into this part."},
{"remove_time", std::make_shared<DataTypeDateTime>(), "The time when the data part became inactive."},
{"refcount", std::make_shared<DataTypeUInt32>(), "The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges."},
{"min_date", std::make_shared<DataTypeDate>(), "The minimum value of the date key in the data part."},

View File

@ -44,7 +44,7 @@ StorageSystemStoragePolicies::StorageSystemStoragePolicies(const StorageID & tab
{"volume_type", std::make_shared<DataTypeEnum8>(getTypeEnumValues<VolumeType>()), "The type of the volume - JBOD or a single disk."},
{"max_data_part_size", std::make_shared<DataTypeUInt64>(), "the maximum size of a part that can be stored on any of the volumes disks."},
{"move_factor", std::make_shared<DataTypeFloat32>(), "When the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1)."},
{"move_policy", std::make_shared<DataTypeEnum8>(getTypeEnumValues<IStoragePolicy::MovePolicy>())},
{"move_policy", std::make_shared<DataTypeEnum8>(getTypeEnumValues<IStoragePolicy::MovePolicy>()), "Policy for selecting parts for move to the next volume, `by_part_size` or `by_insert_data_time`."},
{"prefer_not_to_merge", std::make_shared<DataTypeUInt8>(), "You should not use this setting. Disables merging of data parts on this volume (this is harmful and leads to performance degradation)."},
{"perform_ttl_move_on_insert", std::make_shared<DataTypeUInt8>(), "Disables TTL move on data part INSERT. By default (if enabled) if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule."},
{"load_balancing", std::make_shared<DataTypeEnum8>(getTypeEnumValues<VolumeLoadBalancing>()), "Policy for disk balancing, `round_robin` or `least_used`."}

View File

@ -1 +1 @@
20000101_1_1_0 test_00961 e4ed027389c208d2b5fce9c4ef1ca42c 4c23d7f5920f89aefc3b062b646cd23d 908ddf2b1d0af239da96ff1e527a8a1f
20000101_1_1_0 test_00961 812036551f93a3685116fa6169d36fa9 2ca3d19eefed0ce7b4e7627e443b9a59 908ddf2b1d0af239da96ff1e527a8a1f

View File

@ -471,6 +471,8 @@ CREATE TABLE system.parts
`secondary_indices_uncompressed_bytes` UInt64,
`secondary_indices_marks_bytes` UInt64,
`modification_time` DateTime,
`min_time_of_data_insert` DateTime,
`max_time_of_data_insert` DateTime,
`remove_time` DateTime,
`refcount` UInt32,
`min_date` Date,
@ -1056,6 +1058,7 @@ CREATE TABLE system.storage_policies
`volume_type` Enum8('JBOD' = 0, 'SINGLE_DISK' = 1, 'UNKNOWN' = 2),
`max_data_part_size` UInt64,
`move_factor` Float32,
`move_policy` Enum8('BY_PART_SIZE' = 0, 'BY_INSERT_DATA_TIME' = 1),
`prefer_not_to_merge` UInt8,
`perform_ttl_move_on_insert` UInt8,
`load_balancing` Enum8('ROUND_ROBIN' = 0, 'LEAST_USED' = 1)

View File

@ -5,4 +5,4 @@
0
1
0
0_0_0_0 Wide 370db59d5dcaef5d762b11d319c368c7 514a8be2dac94fd039dbd230065e58a4 b324ada5cd6bb14402c1e59200bd003a
0_0_0_0 Wide 3f2fc7b294c1676dcdc3a7c830228aa9 645e5774e2921598cd105516a04c6a8b b324ada5cd6bb14402c1e59200bd003a

View File

@ -46,7 +46,7 @@ for i in {1..100}; do
# Non retriable errors
if [[ $FileSync -ne 8 ]]; then
echo "FileSync: $FileSync != 8" >&2
echo "FileSync: $FileSync != 9" >&2
exit 2
fi
# Check that all files was synced

View File

@ -1,13 +1,13 @@
1000 10000
1000 10000
test_02381 2000000 16112790 11904 16100886
test_02381_compress 2000000 16099626 1658 16097968
test_02381 2000000 16112832 11904 16100928
test_02381_compress 2000000 16099668 1658 16098010
10000 100000
10000 100000
10000 100000
10000 100000
test_02381 4000000 28098334 2946 28095388
test_02381_compress 4000000 28125412 23616 28101796
test_02381 4000000 28098376 2946 28095430
test_02381_compress 4000000 28125454 23616 28101838
1 Hello
2 World
1 Hello