add test for const adaptive granularity

This commit is contained in:
Anton Popov 2024-11-11 20:38:33 +00:00
parent aa51f4cc88
commit c5817d528c
16 changed files with 129 additions and 31 deletions

View File

@ -3,7 +3,6 @@
#include <base/types.h>
#include <exception>
#include <memory>
#include <optional>
#include <string_view>
#include <Compression/CompressedReadBuffer.h>
@ -30,6 +29,7 @@
#include <Storages/MergeTree/checkDataPart.h>
#include <Storages/MergeTree/Backup.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h>
#include <base/JSON.h>
#include <boost/algorithm/string/join.hpp>
#include <Common/CurrentMetrics.h>
@ -39,8 +39,6 @@
#include <Common/StringUtils.h>
#include <Common/escapeForFileName.h>
#include <Common/logger_useful.h>
#include "Storages/MergeTree/MergeTreeIndexGranularity.h"
#include "Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h"
#include <Disks/IO/CachedOnDiskReadBufferFromFile.h>

View File

@ -1,11 +1,6 @@
#include <memory>
#include <optional>
#include <Storages/MergeTree/IMergeTreeDataPartWriter.h>
#include <Common/MemoryTrackerBlockerInThread.h>
#include "Storages/MergeTree/MergeTreeIndexGranularity.h"
#include "Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h"
#include "Storages/MergeTree/MergeTreeIndexGranularityConstant.h"
#include "Storages/MergeTree/MergeTreeSettings.h"
#include <Storages/MergeTree/MergeTreeIndexGranularity.h>
#include <Columns/ColumnSparse.h>
namespace DB

View File

@ -21,7 +21,6 @@
#include <Common/quoteString.h>
#include <Common/scope_guard_safe.h>
#include <Common/typeid_cast.h>
#include "Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h"
#include <Core/Settings.h>
#include <Core/ServerSettings.h>
#include <Storages/MergeTree/RangesInDataPart.h>
@ -84,6 +83,7 @@
#include <Storages/StorageMergeTree.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/VirtualColumnUtils.h>
#include <Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h>
#include <boost/range/algorithm_ext/erase.hpp>
#include <boost/algorithm/string/join.hpp>
@ -96,7 +96,6 @@
#include <atomic>
#include <chrono>
#include <limits>
#include <memory>
#include <optional>
#include <ranges>
#include <set>

View File

@ -3,7 +3,7 @@
#include <Storages/MergeTree/MergeTreeReaderCompactSingleBuffer.h>
#include <Storages/MergeTree/MergeTreeDataPartWriterCompact.h>
#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
#include <Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
namespace DB

View File

@ -3,9 +3,10 @@
#include <Storages/MergeTree/MergeTreeDataPartWriterWide.h>
#include <Storages/MergeTree/IMergeTreeDataPartWriter.h>
#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
#include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <DataTypes/NestedUtils.h>
#include <Core/NamesAndTypes.h>
#include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h>
namespace DB

View File

@ -273,7 +273,12 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm
/// but not in case of vertical part of vertical merge)
if (compute_granularity)
{
size_t index_granularity_for_block = computeIndexGranularity(block_to_write);
size_t index_granularity_for_block;
if (auto constant_granularity = index_granularity->getConstantGranularity())
index_granularity_for_block = *constant_granularity;
else
index_granularity_for_block = computeIndexGranularity(block_to_write);
if (rows_written_in_last_mark > 0)
{
size_t rows_left_in_last_mark = index_granularity->getMarkRows(getCurrentMark()) - rows_written_in_last_mark;

View File

@ -1,11 +1,9 @@
#include <memory>
#include <Storages/MergeTree/MergeTreeIndexGranularity.h>
#include <Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h>
#include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h>
#include <Storages/MergeTree/MergeTreeIndexGranularityInfo.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <IO/WriteHelpers.h>
#include "Common/Exception.h"
#include "Storages/MergeTree/MergeTreeDataPartType.h"
namespace DB
{

View File

@ -1,7 +1,6 @@
#pragma once
#include <optional>
#include <Storages/MergeTree/MarkRange.h>
#include "Storages/MergeTree/MergeTreeSettings.h"
namespace DB
{
@ -18,6 +17,8 @@ public:
MergeTreeIndexGranularity() = default;
virtual ~MergeTreeIndexGranularity() = default;
virtual std::optional<size_t> getConstantGranularity() const = 0;
/// Return count of rows between marks
virtual size_t getRowsCountInRange(size_t begin, size_t end) const = 0;
/// Return count of rows between marks

View File

@ -1,9 +1,5 @@
#include <memory>
#include <Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h>
#include <IO/WriteHelpers.h>
#include "Common/Logger.h"
#include "Common/logger_useful.h"
#include "Storages/MergeTree/MergeTreeIndexGranularityConstant.h"
#include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h>
namespace DB

View File

@ -10,6 +10,7 @@ public:
MergeTreeIndexGranularityAdaptive() = default;
explicit MergeTreeIndexGranularityAdaptive(const std::vector<size_t> & marks_rows_partial_sums_);
std::optional<size_t> getConstantGranularity() const override { return {}; }
size_t getRowsCountInRange(size_t begin, size_t end) const override;
size_t countMarksForRows(size_t from_mark, size_t number_of_rows) const override;
size_t countRowsForRows(size_t from_mark, size_t number_of_rows, size_t offset_in_rows) const override;

View File

@ -1,6 +1,4 @@
#include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h>
#include <Common/Exception.h>
#include <IO/WriteHelpers.h>
namespace DB

View File

@ -18,6 +18,7 @@ public:
explicit MergeTreeIndexGranularityConstant(size_t constant_granularity_);
MergeTreeIndexGranularityConstant(size_t constant_granularity_, size_t last_mark_granularity_, size_t num_marks_without_final_, bool has_final_mark_);
std::optional<size_t> getConstantGranularity() const override { return constant_granularity; }
size_t getRowsCountInRange(size_t begin, size_t end) const override;
size_t countMarksForRows(size_t from_mark, size_t number_of_rows) const override;
size_t countRowsForRows(size_t from_mark, size_t number_of_rows, size_t offset_in_rows) const override;

View File

@ -187,8 +187,8 @@ namespace ErrorCodes
DECLARE(UInt64, min_merge_bytes_to_use_direct_io, 10ULL * 1024 * 1024 * 1024, "Minimal amount of bytes to enable O_DIRECT in merge (0 - disabled).", 0) \
DECLARE(UInt64, index_granularity_bytes, 10 * 1024 * 1024, "Approximate amount of bytes in single granule (0 - disabled).", 0) \
DECLARE(UInt64, min_index_granularity_bytes, 1024, "Minimum amount of bytes in single granule.", 1024) \
DECLARE(Bool, use_const_adaptive_granularity, false, "KEK KEK KEK KEK KEK KEK", 0) \
DECLARE(Bool, enable_index_granularity_compression, true, "KEK KEK KEK KEK KEK KEK", 0) \
DECLARE(Bool, use_const_adaptive_granularity, false, "Always use constant granularity for whole part. It allows to compress in memory values of index granularity. It can be useful in extremely large workloads with thin tables.", 0) \
DECLARE(Bool, enable_index_granularity_compression, true, "Compress in memory values of index granularity if it is possible", 0) \
DECLARE(Int64, merge_with_ttl_timeout, 3600 * 4, "Minimal time in seconds, when merge with delete TTL can be repeated.", 0) \
DECLARE(Int64, merge_with_recompression_ttl_timeout, 3600 * 4, "Minimal time in seconds, when merge with recompression TTL can be repeated.", 0) \
DECLARE(Bool, ttl_only_drop_parts, false, "Only drop altogether the expired parts and not partially prune them.", 0) \

View File

@ -1,11 +1,9 @@
#include <optional>
#include <Storages/MergeTree/MergedBlockOutputStream.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <IO/HashingWriteBuffer.h>
#include <Interpreters/Context.h>
#include <Interpreters/MergeTreeTransaction.h>
#include <Parsers/queryToString.h>
#include "Common/Logger.h"
#include <Common/logger_useful.h>
#include <Core/Settings.h>
@ -19,7 +17,7 @@ namespace ErrorCodes
namespace MergeTreeSetting
{
extern MergeTreeSettingsBool enable_index_granularity_compression;
extern const MergeTreeSettingsBool enable_index_granularity_compression;
}
MergedBlockOutputStream::MergedBlockOutputStream(

View File

@ -0,0 +1,54 @@
adaptive non-const, before merge
all_1_1_0 0 10 0
all_1_1_0 1 5 10
all_1_1_0 2 0 14
all_2_2_0 0 2 15
all_2_2_0 1 2 17
all_2_2_0 2 2 19
all_2_2_0 3 2 21
all_2_2_0 4 2 23
all_2_2_0 5 2 25
all_2_2_0 6 2 27
all_2_2_0 7 1 29
all_2_2_0 8 0 29
all_1_1_0 24
all_2_2_0 72
adaptive non-const, after merge
all_1_2_1 0 10 0
all_1_2_1 1 5 10
all_1_2_1 2 2 15
all_1_2_1 3 2 17
all_1_2_1 4 2 19
all_1_2_1 5 2 21
all_1_2_1 6 2 23
all_1_2_1 7 2 25
all_1_2_1 8 2 27
all_1_2_1 9 1 29
all_1_2_1 10 0 29
all_1_2_1 88
adaptive const, before merge
all_1_1_0 0 10 0
all_1_1_0 1 5 10
all_1_1_0 2 0 14
all_2_2_0 0 2 15
all_2_2_0 1 2 17
all_2_2_0 2 2 19
all_2_2_0 3 2 21
all_2_2_0 4 2 23
all_2_2_0 5 2 25
all_2_2_0 6 2 27
all_2_2_0 7 1 29
all_2_2_0 8 0 29
all_1_1_0 25
all_2_2_0 25
adaptive const, after merge
all_1_2_1 0 4 0
all_1_2_1 1 4 4
all_1_2_1 2 4 8
all_1_2_1 3 4 12
all_1_2_1 4 4 16
all_1_2_1 5 4 20
all_1_2_1 6 4 24
all_1_2_1 7 2 28
all_1_2_1 8 0 29
all_1_2_1 25

View File

@ -0,0 +1,53 @@
DROP TABLE IF EXISTS t_index_granularity;
CREATE TABLE t_index_granularity (id UInt64, s String)
ENGINE = MergeTree ORDER BY id
SETTINGS min_bytes_for_wide_part = 0,
index_granularity = 10,
index_granularity_bytes = 4096,
merge_max_block_size = 10,
merge_max_block_size_bytes = 4096,
enable_index_granularity_compression = 1,
use_const_adaptive_granularity = 0,
enable_vertical_merge_algorithm = 0;
INSERT INTO t_index_granularity SELECT number, 'a' FROM numbers(15);
INSERT INTO t_index_granularity SELECT number, repeat('a', 2048) FROM numbers(15, 15);
SELECT 'adaptive non-const, before merge';
SELECT * FROM mergeTreeIndex(currentDatabase(), t_index_granularity) ORDER BY ALL;
SELECT name, index_granularity_bytes_in_memory FROM system.parts WHERE database = currentDatabase() AND table = 't_index_granularity' AND active;
OPTIMIZE TABLE t_index_granularity FINAL;
SELECT 'adaptive non-const, after merge';
SELECT * FROM mergeTreeIndex(currentDatabase(), t_index_granularity) ORDER BY ALL;
SELECT name, index_granularity_bytes_in_memory FROM system.parts WHERE database = currentDatabase() AND table = 't_index_granularity' AND active;
DROP TABLE t_index_granularity;
CREATE TABLE t_index_granularity (id UInt64, s String)
ENGINE = MergeTree ORDER BY id
SETTINGS min_bytes_for_wide_part = 0,
index_granularity = 10,
index_granularity_bytes = 4096,
merge_max_block_size = 10,
merge_max_block_size_bytes = 4096,
enable_index_granularity_compression = 1,
use_const_adaptive_granularity = 1,
enable_vertical_merge_algorithm = 0;
INSERT INTO t_index_granularity SELECT number, 'a' FROM numbers(15);
INSERT INTO t_index_granularity SELECT number, repeat('a', 2048) FROM numbers(15, 15);
SELECT 'adaptive const, before merge';
SELECT * FROM mergeTreeIndex(currentDatabase(), t_index_granularity) ORDER BY ALL;
SELECT name, index_granularity_bytes_in_memory FROM system.parts WHERE database = currentDatabase() AND table = 't_index_granularity' AND active;
OPTIMIZE TABLE t_index_granularity FINAL;
SELECT 'adaptive const, after merge';
SELECT * FROM mergeTreeIndex(currentDatabase(), t_index_granularity) ORDER BY ALL;
SELECT name, index_granularity_bytes_in_memory FROM system.parts WHERE database = currentDatabase() AND table = 't_index_granularity' AND active;
DROP TABLE t_index_granularity;