mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
add test for const adaptive granularity
This commit is contained in:
parent
aa51f4cc88
commit
c5817d528c
@ -3,7 +3,6 @@
|
||||
#include <base/types.h>
|
||||
|
||||
#include <exception>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string_view>
|
||||
#include <Compression/CompressedReadBuffer.h>
|
||||
@ -30,6 +29,7 @@
|
||||
#include <Storages/MergeTree/checkDataPart.h>
|
||||
#include <Storages/MergeTree/Backup.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h>
|
||||
#include <base/JSON.h>
|
||||
#include <boost/algorithm/string/join.hpp>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
@ -39,8 +39,6 @@
|
||||
#include <Common/StringUtils.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include "Storages/MergeTree/MergeTreeIndexGranularity.h"
|
||||
#include "Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h"
|
||||
|
||||
#include <Disks/IO/CachedOnDiskReadBufferFromFile.h>
|
||||
|
||||
|
@ -1,11 +1,6 @@
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <Storages/MergeTree/IMergeTreeDataPartWriter.h>
|
||||
#include <Common/MemoryTrackerBlockerInThread.h>
|
||||
#include "Storages/MergeTree/MergeTreeIndexGranularity.h"
|
||||
#include "Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h"
|
||||
#include "Storages/MergeTree/MergeTreeIndexGranularityConstant.h"
|
||||
#include "Storages/MergeTree/MergeTreeSettings.h"
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularity.h>
|
||||
#include <Columns/ColumnSparse.h>
|
||||
|
||||
namespace DB
|
||||
|
@ -21,7 +21,6 @@
|
||||
#include <Common/quoteString.h>
|
||||
#include <Common/scope_guard_safe.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include "Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h"
|
||||
#include <Core/Settings.h>
|
||||
#include <Core/ServerSettings.h>
|
||||
#include <Storages/MergeTree/RangesInDataPart.h>
|
||||
@ -84,6 +83,7 @@
|
||||
#include <Storages/StorageMergeTree.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/VirtualColumnUtils.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h>
|
||||
|
||||
#include <boost/range/algorithm_ext/erase.hpp>
|
||||
#include <boost/algorithm/string/join.hpp>
|
||||
@ -96,7 +96,6 @@
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <ranges>
|
||||
#include <set>
|
||||
|
@ -3,7 +3,7 @@
|
||||
#include <Storages/MergeTree/MergeTreeReaderCompactSingleBuffer.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartWriterCompact.h>
|
||||
#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h>
|
||||
#include <Storages/MergeTree/MergeTreeSettings.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
@ -3,9 +3,10 @@
|
||||
#include <Storages/MergeTree/MergeTreeDataPartWriterWide.h>
|
||||
#include <Storages/MergeTree/IMergeTreeDataPartWriter.h>
|
||||
#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h>
|
||||
#include <Storages/MergeTree/MergeTreeSettings.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
@ -273,7 +273,12 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm
|
||||
/// but not in case of vertical part of vertical merge)
|
||||
if (compute_granularity)
|
||||
{
|
||||
size_t index_granularity_for_block = computeIndexGranularity(block_to_write);
|
||||
size_t index_granularity_for_block;
|
||||
if (auto constant_granularity = index_granularity->getConstantGranularity())
|
||||
index_granularity_for_block = *constant_granularity;
|
||||
else
|
||||
index_granularity_for_block = computeIndexGranularity(block_to_write);
|
||||
|
||||
if (rows_written_in_last_mark > 0)
|
||||
{
|
||||
size_t rows_left_in_last_mark = index_granularity->getMarkRows(getCurrentMark()) - rows_written_in_last_mark;
|
||||
|
@ -1,11 +1,9 @@
|
||||
#include <memory>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularity.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityInfo.h>
|
||||
#include <Storages/MergeTree/MergeTreeSettings.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include "Common/Exception.h"
|
||||
#include "Storages/MergeTree/MergeTreeDataPartType.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -1,7 +1,6 @@
|
||||
#pragma once
|
||||
#include <optional>
|
||||
#include <Storages/MergeTree/MarkRange.h>
|
||||
#include "Storages/MergeTree/MergeTreeSettings.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -18,6 +17,8 @@ public:
|
||||
MergeTreeIndexGranularity() = default;
|
||||
virtual ~MergeTreeIndexGranularity() = default;
|
||||
|
||||
virtual std::optional<size_t> getConstantGranularity() const = 0;
|
||||
|
||||
/// Return count of rows between marks
|
||||
virtual size_t getRowsCountInRange(size_t begin, size_t end) const = 0;
|
||||
/// Return count of rows between marks
|
||||
|
@ -1,9 +1,5 @@
|
||||
#include <memory>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include "Common/Logger.h"
|
||||
#include "Common/logger_useful.h"
|
||||
#include "Storages/MergeTree/MergeTreeIndexGranularityConstant.h"
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
@ -10,6 +10,7 @@ public:
|
||||
MergeTreeIndexGranularityAdaptive() = default;
|
||||
explicit MergeTreeIndexGranularityAdaptive(const std::vector<size_t> & marks_rows_partial_sums_);
|
||||
|
||||
std::optional<size_t> getConstantGranularity() const override { return {}; }
|
||||
size_t getRowsCountInRange(size_t begin, size_t end) const override;
|
||||
size_t countMarksForRows(size_t from_mark, size_t number_of_rows) const override;
|
||||
size_t countRowsForRows(size_t from_mark, size_t number_of_rows, size_t offset_in_rows) const override;
|
||||
|
@ -1,6 +1,4 @@
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
@ -18,6 +18,7 @@ public:
|
||||
explicit MergeTreeIndexGranularityConstant(size_t constant_granularity_);
|
||||
MergeTreeIndexGranularityConstant(size_t constant_granularity_, size_t last_mark_granularity_, size_t num_marks_without_final_, bool has_final_mark_);
|
||||
|
||||
std::optional<size_t> getConstantGranularity() const override { return constant_granularity; }
|
||||
size_t getRowsCountInRange(size_t begin, size_t end) const override;
|
||||
size_t countMarksForRows(size_t from_mark, size_t number_of_rows) const override;
|
||||
size_t countRowsForRows(size_t from_mark, size_t number_of_rows, size_t offset_in_rows) const override;
|
||||
|
@ -187,8 +187,8 @@ namespace ErrorCodes
|
||||
DECLARE(UInt64, min_merge_bytes_to_use_direct_io, 10ULL * 1024 * 1024 * 1024, "Minimal amount of bytes to enable O_DIRECT in merge (0 - disabled).", 0) \
|
||||
DECLARE(UInt64, index_granularity_bytes, 10 * 1024 * 1024, "Approximate amount of bytes in single granule (0 - disabled).", 0) \
|
||||
DECLARE(UInt64, min_index_granularity_bytes, 1024, "Minimum amount of bytes in single granule.", 1024) \
|
||||
DECLARE(Bool, use_const_adaptive_granularity, false, "KEK KEK KEK KEK KEK KEK", 0) \
|
||||
DECLARE(Bool, enable_index_granularity_compression, true, "KEK KEK KEK KEK KEK KEK", 0) \
|
||||
DECLARE(Bool, use_const_adaptive_granularity, false, "Always use constant granularity for whole part. It allows to compress in memory values of index granularity. It can be useful in extremely large workloads with thin tables.", 0) \
|
||||
DECLARE(Bool, enable_index_granularity_compression, true, "Compress in memory values of index granularity if it is possible", 0) \
|
||||
DECLARE(Int64, merge_with_ttl_timeout, 3600 * 4, "Minimal time in seconds, when merge with delete TTL can be repeated.", 0) \
|
||||
DECLARE(Int64, merge_with_recompression_ttl_timeout, 3600 * 4, "Minimal time in seconds, when merge with recompression TTL can be repeated.", 0) \
|
||||
DECLARE(Bool, ttl_only_drop_parts, false, "Only drop altogether the expired parts and not partially prune them.", 0) \
|
||||
|
@ -1,11 +1,9 @@
|
||||
#include <optional>
|
||||
#include <Storages/MergeTree/MergedBlockOutputStream.h>
|
||||
#include <Storages/MergeTree/MergeTreeSettings.h>
|
||||
#include <IO/HashingWriteBuffer.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/MergeTreeTransaction.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include "Common/Logger.h"
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Core/Settings.h>
|
||||
|
||||
|
||||
@ -19,7 +17,7 @@ namespace ErrorCodes
|
||||
|
||||
namespace MergeTreeSetting
|
||||
{
|
||||
extern MergeTreeSettingsBool enable_index_granularity_compression;
|
||||
extern const MergeTreeSettingsBool enable_index_granularity_compression;
|
||||
}
|
||||
|
||||
MergedBlockOutputStream::MergedBlockOutputStream(
|
||||
|
@ -0,0 +1,54 @@
|
||||
adaptive non-const, before merge
|
||||
all_1_1_0 0 10 0
|
||||
all_1_1_0 1 5 10
|
||||
all_1_1_0 2 0 14
|
||||
all_2_2_0 0 2 15
|
||||
all_2_2_0 1 2 17
|
||||
all_2_2_0 2 2 19
|
||||
all_2_2_0 3 2 21
|
||||
all_2_2_0 4 2 23
|
||||
all_2_2_0 5 2 25
|
||||
all_2_2_0 6 2 27
|
||||
all_2_2_0 7 1 29
|
||||
all_2_2_0 8 0 29
|
||||
all_1_1_0 24
|
||||
all_2_2_0 72
|
||||
adaptive non-const, after merge
|
||||
all_1_2_1 0 10 0
|
||||
all_1_2_1 1 5 10
|
||||
all_1_2_1 2 2 15
|
||||
all_1_2_1 3 2 17
|
||||
all_1_2_1 4 2 19
|
||||
all_1_2_1 5 2 21
|
||||
all_1_2_1 6 2 23
|
||||
all_1_2_1 7 2 25
|
||||
all_1_2_1 8 2 27
|
||||
all_1_2_1 9 1 29
|
||||
all_1_2_1 10 0 29
|
||||
all_1_2_1 88
|
||||
adaptive const, before merge
|
||||
all_1_1_0 0 10 0
|
||||
all_1_1_0 1 5 10
|
||||
all_1_1_0 2 0 14
|
||||
all_2_2_0 0 2 15
|
||||
all_2_2_0 1 2 17
|
||||
all_2_2_0 2 2 19
|
||||
all_2_2_0 3 2 21
|
||||
all_2_2_0 4 2 23
|
||||
all_2_2_0 5 2 25
|
||||
all_2_2_0 6 2 27
|
||||
all_2_2_0 7 1 29
|
||||
all_2_2_0 8 0 29
|
||||
all_1_1_0 25
|
||||
all_2_2_0 25
|
||||
adaptive const, after merge
|
||||
all_1_2_1 0 4 0
|
||||
all_1_2_1 1 4 4
|
||||
all_1_2_1 2 4 8
|
||||
all_1_2_1 3 4 12
|
||||
all_1_2_1 4 4 16
|
||||
all_1_2_1 5 4 20
|
||||
all_1_2_1 6 4 24
|
||||
all_1_2_1 7 2 28
|
||||
all_1_2_1 8 0 29
|
||||
all_1_2_1 25
|
@ -0,0 +1,53 @@
|
||||
DROP TABLE IF EXISTS t_index_granularity;
|
||||
|
||||
CREATE TABLE t_index_granularity (id UInt64, s String)
|
||||
ENGINE = MergeTree ORDER BY id
|
||||
SETTINGS min_bytes_for_wide_part = 0,
|
||||
index_granularity = 10,
|
||||
index_granularity_bytes = 4096,
|
||||
merge_max_block_size = 10,
|
||||
merge_max_block_size_bytes = 4096,
|
||||
enable_index_granularity_compression = 1,
|
||||
use_const_adaptive_granularity = 0,
|
||||
enable_vertical_merge_algorithm = 0;
|
||||
|
||||
INSERT INTO t_index_granularity SELECT number, 'a' FROM numbers(15);
|
||||
INSERT INTO t_index_granularity SELECT number, repeat('a', 2048) FROM numbers(15, 15);
|
||||
|
||||
SELECT 'adaptive non-const, before merge';
|
||||
SELECT * FROM mergeTreeIndex(currentDatabase(), t_index_granularity) ORDER BY ALL;
|
||||
SELECT name, index_granularity_bytes_in_memory FROM system.parts WHERE database = currentDatabase() AND table = 't_index_granularity' AND active;
|
||||
|
||||
OPTIMIZE TABLE t_index_granularity FINAL;
|
||||
|
||||
SELECT 'adaptive non-const, after merge';
|
||||
SELECT * FROM mergeTreeIndex(currentDatabase(), t_index_granularity) ORDER BY ALL;
|
||||
SELECT name, index_granularity_bytes_in_memory FROM system.parts WHERE database = currentDatabase() AND table = 't_index_granularity' AND active;
|
||||
|
||||
DROP TABLE t_index_granularity;
|
||||
|
||||
CREATE TABLE t_index_granularity (id UInt64, s String)
|
||||
ENGINE = MergeTree ORDER BY id
|
||||
SETTINGS min_bytes_for_wide_part = 0,
|
||||
index_granularity = 10,
|
||||
index_granularity_bytes = 4096,
|
||||
merge_max_block_size = 10,
|
||||
merge_max_block_size_bytes = 4096,
|
||||
enable_index_granularity_compression = 1,
|
||||
use_const_adaptive_granularity = 1,
|
||||
enable_vertical_merge_algorithm = 0;
|
||||
|
||||
INSERT INTO t_index_granularity SELECT number, 'a' FROM numbers(15);
|
||||
INSERT INTO t_index_granularity SELECT number, repeat('a', 2048) FROM numbers(15, 15);
|
||||
|
||||
SELECT 'adaptive const, before merge';
|
||||
SELECT * FROM mergeTreeIndex(currentDatabase(), t_index_granularity) ORDER BY ALL;
|
||||
SELECT name, index_granularity_bytes_in_memory FROM system.parts WHERE database = currentDatabase() AND table = 't_index_granularity' AND active;
|
||||
|
||||
OPTIMIZE TABLE t_index_granularity FINAL;
|
||||
|
||||
SELECT 'adaptive const, after merge';
|
||||
SELECT * FROM mergeTreeIndex(currentDatabase(), t_index_granularity) ORDER BY ALL;
|
||||
SELECT name, index_granularity_bytes_in_memory FROM system.parts WHERE database = currentDatabase() AND table = 't_index_granularity' AND active;
|
||||
|
||||
DROP TABLE t_index_granularity;
|
Loading…
Reference in New Issue
Block a user