mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Mixed parts by setting
This commit is contained in:
parent
dd2e4fd805
commit
fefce49c8f
@ -27,13 +27,13 @@ IMergedBlockOutputStream::IMergedBlockOutputStream(
|
||||
, min_compress_block_size(min_compress_block_size_)
|
||||
, max_compress_block_size(max_compress_block_size_)
|
||||
, aio_threshold(aio_threshold_)
|
||||
, marks_file_extension(storage.settings.index_granularity_bytes ? getAdaptiveMrkExtension() : getNonAdaptiveMrkExtension())
|
||||
, mark_size_in_bytes(storage.settings.index_granularity_bytes ? getAdaptiveMrkSize() : getNonAdaptiveMrkSize())
|
||||
, marks_file_extension(storage.canUseAdaptiveGranularity() ? getAdaptiveMrkExtension() : getNonAdaptiveMrkExtension())
|
||||
, mark_size_in_bytes(storage.canUseAdaptiveGranularity() ? getAdaptiveMrkSize() : getNonAdaptiveMrkSize())
|
||||
, blocks_are_granules_size(blocks_are_granules_size_)
|
||||
, index_granularity(index_granularity_)
|
||||
, compute_granularity(index_granularity.empty())
|
||||
, codec(std::move(codec_))
|
||||
, with_final_mark(storage.settings.write_final_mark && storage.settings.index_granularity_bytes)
|
||||
, with_final_mark(storage.settings.write_final_mark && storage.canUseAdaptiveGranularity())
|
||||
{
|
||||
if (blocks_are_granules_size && !index_granularity.empty())
|
||||
throw Exception("Can't take information about index granularity from blocks, when non empty index_granularity array specified", ErrorCodes::LOGICAL_ERROR);
|
||||
|
@ -743,6 +743,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
|
||||
auto lock = lockParts();
|
||||
data_parts_indexes.clear();
|
||||
|
||||
bool has_adaptive_parts = false, has_non_adaptive_parts = false;
|
||||
for (const String & file_name : part_file_names)
|
||||
{
|
||||
MergeTreePartInfo part_info;
|
||||
@ -827,6 +828,10 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
|
||||
|
||||
continue;
|
||||
}
|
||||
if (!part->index_granularity_info.is_adaptive)
|
||||
has_non_adaptive_parts = true;
|
||||
else
|
||||
has_adaptive_parts = true;
|
||||
|
||||
part->modification_time = Poco::File(full_path + file_name).getLastModified().epochTime();
|
||||
/// Assume that all parts are Committed, covered parts will be detected and marked as Outdated later
|
||||
@ -836,6 +841,11 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
|
||||
throw Exception("Part " + part->name + " already exists", ErrorCodes::DUPLICATE_DATA_PART);
|
||||
}
|
||||
|
||||
if (has_non_adaptive_parts && has_adaptive_parts && !settings.enable_mixed_granularity_parts)
|
||||
throw Exception("Table contains parts with adaptive and non adaptive marks, but `setting enable_mixed_granularity_parts` is disabled", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
has_non_adaptive_index_granularity_parts = has_non_adaptive_parts;
|
||||
|
||||
if (suspicious_broken_parts > settings.max_suspicious_broken_parts && !skip_sanity_checks)
|
||||
throw Exception("Suspiciously many (" + toString(suspicious_broken_parts) + ") broken parts to remove.",
|
||||
ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS);
|
||||
|
@ -574,6 +574,12 @@ public:
|
||||
|
||||
virtual std::vector<MergeTreeMutationStatus> getMutationsStatus() const = 0;
|
||||
|
||||
bool canUseAdaptiveGranularity() const
|
||||
{
|
||||
return settings.index_granularity_bytes != 0 &&
|
||||
(settings.enable_mixed_granularity_parts || !has_non_adaptive_index_granularity_parts);
|
||||
}
|
||||
|
||||
MergeTreeDataFormatVersion format_version;
|
||||
|
||||
Context global_context;
|
||||
@ -631,6 +637,8 @@ public:
|
||||
/// For generating names of temporary parts during insertion.
|
||||
SimpleIncrement insert_increment;
|
||||
|
||||
bool has_non_adaptive_index_granularity_parts = false;
|
||||
|
||||
protected:
|
||||
friend struct MergeTreeDataPart;
|
||||
friend class MergeTreeDataMergerMutator;
|
||||
|
@ -138,7 +138,7 @@ MergeTreeDataPart::MergeTreeDataPart(MergeTreeData & storage_, const String & na
|
||||
: storage(storage_)
|
||||
, name(name_)
|
||||
, info(MergeTreePartInfo::fromPartName(name_, storage.format_version))
|
||||
, index_granularity_info(storage.settings)
|
||||
, index_granularity_info(storage)
|
||||
{
|
||||
}
|
||||
|
||||
@ -146,7 +146,7 @@ MergeTreeDataPart::MergeTreeDataPart(const MergeTreeData & storage_, const Strin
|
||||
: storage(storage_)
|
||||
, name(name_)
|
||||
, info(info_)
|
||||
, index_granularity_info(storage.settings)
|
||||
, index_granularity_info(storage)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -627,13 +627,9 @@ size_t roundRowsOrBytesToMarks(
|
||||
size_t bytes_granularity)
|
||||
{
|
||||
if (bytes_granularity == 0)
|
||||
{
|
||||
return (rows_setting + rows_granularity - 1) / rows_granularity;
|
||||
}
|
||||
return (rows_setting + rows_granularity - 1) / rows_granularity;
|
||||
else
|
||||
{
|
||||
return (bytes_setting + bytes_granularity - 1) / bytes_granularity;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityInfo.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Poco/Path.h>
|
||||
#include <Poco/File.h>
|
||||
#include <Poco/DirectoryIterator.h>
|
||||
@ -22,14 +23,14 @@ std::optional<std::string> MergeTreeIndexGranularityInfo::getMrkExtensionFromFS(
|
||||
}
|
||||
|
||||
MergeTreeIndexGranularityInfo::MergeTreeIndexGranularityInfo(
|
||||
const MergeTreeSettings & storage_settings)
|
||||
const MergeTreeData & storage)
|
||||
{
|
||||
fixed_index_granularity = storage_settings.index_granularity;
|
||||
fixed_index_granularity = storage.settings.index_granularity;
|
||||
/// Granularity is fixed
|
||||
if (storage_settings.index_granularity_bytes == 0)
|
||||
if (!storage.canUseAdaptiveGranularity())
|
||||
setNonAdaptive();
|
||||
else
|
||||
setAdaptive(storage_settings.index_granularity_bytes);
|
||||
setAdaptive(storage.settings.index_granularity_bytes);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,12 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#include <Storages/MergeTree/MergeTreeSettings.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataFormatVersion.h>
|
||||
#include <optional>
|
||||
#include <Core/Types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class MergeTreeData;
|
||||
/// Meta information about index granularity
|
||||
struct MergeTreeIndexGranularityInfo
|
||||
{
|
||||
@ -27,7 +27,7 @@ public:
|
||||
size_t index_granularity_bytes;
|
||||
|
||||
MergeTreeIndexGranularityInfo(
|
||||
const MergeTreeSettings & storage_settings);
|
||||
const MergeTreeData & storage);
|
||||
|
||||
void changeGranularityIfRequired(const std::string & path_to_part);
|
||||
|
||||
|
BIN
dbms/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h.gch
Normal file
BIN
dbms/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h.gch
Normal file
Binary file not shown.
@ -79,7 +79,8 @@ struct MergeTreeSettings : public SettingsCollection<MergeTreeSettings>
|
||||
M(SettingUInt64, min_merge_bytes_to_use_direct_io, 10ULL * 1024 * 1024 * 1024, "Minimal amount of bytes to enable O_DIRECT in merge (0 - disabled).") \
|
||||
M(SettingUInt64, index_granularity_bytes, 10 * 1024 * 1024, "Approximate amount of bytes in single granule (0 - disabled).") \
|
||||
M(SettingInt64, merge_with_ttl_timeout, 3600 * 24, "Minimal time in seconds, when merge with TTL can be repeated.") \
|
||||
M(SettingBool, write_final_mark, 1, "Write final mark after end of column (0 - disabled, do nothing if index_granularity_bytes=0)")
|
||||
M(SettingBool, write_final_mark, 1, "Write final mark after end of column (0 - disabled, do nothing if index_granularity_bytes=0)") \
|
||||
M(SettingBool, enable_mixed_granularity_parts, 0, "Enable parts with adaptive and non adaptive granularity")
|
||||
|
||||
DECLARE_SETTINGS_COLLECTION(LIST_OF_MERGE_TREE_SETTINGS)
|
||||
|
||||
|
@ -29,7 +29,7 @@ MergeTreeThreadSelectBlockInputStream::MergeTreeThreadSelectBlockInputStream(
|
||||
/// round min_marks_to_read up to nearest multiple of block_size expressed in marks
|
||||
/// If granularity is adaptive it doesn't make sense
|
||||
/// Maybe it will make sence to add settings `max_block_size_bytes`
|
||||
if (max_block_size_rows && storage.settings.index_granularity_bytes == 0)
|
||||
if (max_block_size_rows && !storage.canUseAdaptiveGranularity())
|
||||
{
|
||||
size_t fixed_index_granularity = storage.settings.index_granularity;
|
||||
min_marks_to_read = (min_marks_to_read_ * fixed_index_granularity + max_block_size_rows - 1)
|
||||
|
@ -127,12 +127,12 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart(
|
||||
it->type->serializeBinaryBulkStateSuffix(serialize_settings, serialization_states[i]);
|
||||
}
|
||||
|
||||
if (with_final_mark)
|
||||
if (with_final_mark && rows_count != 0)
|
||||
writeFinalMark(it->name, it->type, offset_columns, false, serialize_settings.path);
|
||||
}
|
||||
}
|
||||
|
||||
if (with_final_mark)
|
||||
if (with_final_mark && rows_count != 0)
|
||||
index_granularity.appendMark(0); /// last mark
|
||||
|
||||
/// Finish skip index serialization
|
||||
@ -155,7 +155,7 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart(
|
||||
|
||||
if (index_stream)
|
||||
{
|
||||
if (with_final_mark)
|
||||
if (with_final_mark && rows_count != 0)
|
||||
{
|
||||
for (size_t j = 0; j < index_columns.size(); ++j)
|
||||
{
|
||||
@ -415,7 +415,7 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm
|
||||
writeIntBinary(stream.compressed.offset(), stream.marks);
|
||||
/// Actually this numbers is redundant, but we have to store them
|
||||
/// to be compatible with normal .mrk2 file format
|
||||
if (storage.settings.index_granularity_bytes != 0)
|
||||
if (storage.canUseAdaptiveGranularity())
|
||||
writeIntBinary(1UL, stream.marks);
|
||||
}
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. $CURDIR/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS preferred_block_size_bytes"
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE preferred_block_size_bytes (p Date, s String) ENGINE = MergeTree(p, p, 1)"
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE preferred_block_size_bytes (p Date, s String) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=1, index_granularity_bytes=0"
|
||||
$CLICKHOUSE_CLIENT -q "INSERT INTO preferred_block_size_bytes (s) SELECT '16_bytes_-_-_-_' AS s FROM system.numbers LIMIT 10, 90"
|
||||
$CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE preferred_block_size_bytes"
|
||||
$CLICKHOUSE_CLIENT --preferred_block_size_bytes=26 -q "SELECT DISTINCT blockSize(), ignore(p, s) FROM preferred_block_size_bytes"
|
||||
@ -17,7 +17,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS preferred_block_size_bytes"
|
||||
# PREWHERE using empty column
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS pbs"
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE pbs (p Date, i UInt64, sa Array(String)) ENGINE = MergeTree(p, p, 100)"
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE pbs (p Date, i UInt64, sa Array(String)) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=100, index_granularity_bytes=0"
|
||||
$CLICKHOUSE_CLIENT -q "INSERT INTO pbs (p, i, sa) SELECT toDate(i % 30) AS p, number AS i, ['a'] AS sa FROM system.numbers LIMIT 1000"
|
||||
$CLICKHOUSE_CLIENT -q "ALTER TABLE pbs ADD COLUMN s UInt8 DEFAULT 0"
|
||||
$CLICKHOUSE_CLIENT --preferred_block_size_bytes=100000 -q "SELECT count() FROM pbs PREWHERE s = 0"
|
||||
@ -28,7 +28,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE pbs"
|
||||
# Nullable PREWHERE
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS nullable_prewhere"
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE nullable_prewhere (p Date, f Nullable(UInt64), d UInt64) ENGINE = MergeTree(p, p, 8)"
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE nullable_prewhere (p Date, f Nullable(UInt64), d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=8, index_granularity_bytes=0"
|
||||
$CLICKHOUSE_CLIENT -q "INSERT INTO nullable_prewhere SELECT toDate(0) AS p, if(number % 2 = 0, CAST(number AS Nullable(UInt64)), CAST(NULL AS Nullable(UInt64))) AS f, number as d FROM system.numbers LIMIT 1001"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT sum(d), sum(f), max(d) FROM nullable_prewhere PREWHERE NOT isNull(f)"
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS nullable_prewhere"
|
||||
|
@ -22,12 +22,12 @@ ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS unsigned_integer_test_table;"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS enum_test_table;"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS date_test_table;"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE string_test_table (val String) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1;"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE fixed_string_test_table (val FixedString(1)) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1;"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE signed_integer_test_table (val Int32) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1;"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE unsigned_integer_test_table (val UInt32) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1;"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE enum_test_table (val Enum16('hello' = 1, 'world' = 2, 'yandex' = 256, 'clickhouse' = 257)) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1;"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE date_test_table (val Date) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1;"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE string_test_table (val String) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0;"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE fixed_string_test_table (val FixedString(1)) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0;"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE signed_integer_test_table (val Int32) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0;"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE unsigned_integer_test_table (val UInt32) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0;"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE enum_test_table (val Enum16('hello' = 1, 'world' = 2, 'yandex' = 256, 'clickhouse' = 257)) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0;"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE date_test_table (val Date) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0;"
|
||||
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO string_test_table VALUES ('0'), ('2'), ('2');"
|
||||
|
Loading…
Reference in New Issue
Block a user