From 7a37c4f6a99b85f32b9c8028c6538b9d2bf087bf Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Wed, 9 Oct 2019 15:02:05 +0200 Subject: [PATCH 01/51] Parser for extended TTL expressions. --- dbms/src/Parsers/ASTAlterQuery.cpp | 6 ++- dbms/src/Parsers/ASTAlterQuery.h | 9 +--- dbms/src/Parsers/ASTTTLElement.cpp | 26 ++++++++++++ dbms/src/Parsers/ASTTTLElement.h | 42 +++++++++++++++++++ dbms/src/Parsers/ExpressionElementParsers.cpp | 37 ++++++++++++++++ dbms/src/Parsers/ExpressionElementParsers.h | 10 +++++ dbms/src/Parsers/ExpressionListParsers.cpp | 7 ++++ dbms/src/Parsers/ExpressionListParsers.h | 9 ++++ dbms/src/Parsers/ParserAlterQuery.cpp | 16 ++++--- dbms/src/Parsers/ParserCreateQuery.cpp | 3 +- dbms/src/Storages/MergeTree/MergeTreeData.h | 2 +- dbms/src/Storages/PartitionCommands.cpp | 6 ++- 12 files changed, 155 insertions(+), 18 deletions(-) create mode 100644 dbms/src/Parsers/ASTTTLElement.cpp create mode 100644 dbms/src/Parsers/ASTTTLElement.h diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index 93f21ae5c5e..7a112ddc941 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -176,12 +176,14 @@ void ASTAlterCommand::formatImpl( settings.ostr << " TO "; switch (move_destination_type) { - case MoveDestinationType::DISK: + case ASTTTLElement::DestinationType::DISK: settings.ostr << "DISK "; break; - case MoveDestinationType::VOLUME: + case ASTTTLElement::DestinationType::VOLUME: settings.ostr << "VOLUME "; break; + default: + break; } settings.ostr << quoteString(move_destination_name); } diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index 162b9518824..3699f3aee95 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -128,13 +129,7 @@ public: bool if_exists = false; /// option for DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN - enum MoveDestinationType - { - DISK, - VOLUME, - }; - - MoveDestinationType move_destination_type; + ASTTTLElement::DestinationType move_destination_type; String move_destination_name; diff --git a/dbms/src/Parsers/ASTTTLElement.cpp b/dbms/src/Parsers/ASTTTLElement.cpp new file mode 100644 index 00000000000..d15278a8d44 --- /dev/null +++ b/dbms/src/Parsers/ASTTTLElement.cpp @@ -0,0 +1,26 @@ +#include +#include + + +namespace DB +{ + +void ASTTTLElement::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + children.front()->formatImpl(settings, state, frame); + if (destination_type == DestinationType::DISK) { + settings.ostr << " TO DISK "; + } else if (destination_type == DestinationType::VOLUME) { + settings.ostr << " TO VOLUME "; + } else if (destination_type == DestinationType::DELETE) { + settings.ostr << " DELETE"; + } + + if (destination_type == DestinationType::DISK || destination_type == DestinationType::VOLUME) { + WriteBufferFromOwnString destination_name_buf; + writeQuoted(destination_name, destination_name_buf); + settings.ostr << destination_name_buf.str(); + } +} + +} diff --git a/dbms/src/Parsers/ASTTTLElement.h b/dbms/src/Parsers/ASTTTLElement.h new file mode 100644 index 00000000000..b0b0991c6fb --- /dev/null +++ b/dbms/src/Parsers/ASTTTLElement.h @@ -0,0 +1,42 @@ +#pragma once + +#include + + +namespace DB +{ +/** Element of TTL expression. + */ +class ASTTTLElement : public IAST +{ +public: + enum DestinationType + { + DISK, + VOLUME, + DELETE, + }; + + DestinationType destination_type; + String destination_name; + + ASTTTLElement(DestinationType destination_type_, const String & destination_name_) + : destination_type(destination_type_) + , destination_name(destination_name_) + { + } + + String getID(char) const override { return "TTLElement"; } + + ASTPtr clone() const override + { + auto clone = std::make_shared(*this); + clone->cloneChildren(); + return clone; + } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; +}; + +} diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index 1f1ba4edee7..865b39a9c1b 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -1414,6 +1415,42 @@ bool ParserFunctionWithKeyValueArguments::parseImpl(Pos & pos, ASTPtr & node, Ex return true; } +bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_to_disk("TO DISK"); + ParserKeyword s_to_volume("TO VOLUME"); + ParserKeyword s_delete("DELETE"); + ParserStringLiteral parser_string_literal; + ParserExpression parser_exp; + + ASTPtr expr_elem; + if (!parser_exp.parse(pos, expr_elem, expected)) + return false; + + ASTTTLElement::DestinationType destination_type = ASTTTLElement::DestinationType::DELETE; + String destination_name; + if (s_to_disk.ignore(pos)) { + destination_type = ASTTTLElement::DestinationType::DISK; + } else if (s_to_volume.ignore(pos)) { + destination_type = ASTTTLElement::DestinationType::VOLUME; + } else { + s_delete.ignore(pos); + } + + if (destination_type == ASTTTLElement::DestinationType::DISK || destination_type == ASTTTLElement::DestinationType::VOLUME) { + ASTPtr ast_space_name; + if (!parser_string_literal.parse(pos, ast_space_name, expected)) + return false; + + destination_name = ast_space_name->as().value.get(); + } + + node = std::make_shared(destination_type, destination_name); + node->children.push_back(expr_elem); + + return true; +} + bool ParserIdentifierWithOptionalParameters::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserIdentifier non_parametric; diff --git a/dbms/src/Parsers/ExpressionElementParsers.h b/dbms/src/Parsers/ExpressionElementParsers.h index 63ed1348b13..fbcaeeb3d45 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.h +++ b/dbms/src/Parsers/ExpressionElementParsers.h @@ -320,4 +320,14 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected); }; +/** Element of TTL expression - same as expression element, but in addition, + * TO DISK 'xxx' | TO VOLUME 'xxx' | DELETE could be specified + */ +class ParserTTLElement : public IParserBase +{ +protected: + const char * getName() const { return "element of TTL expression"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected); +}; + } diff --git a/dbms/src/Parsers/ExpressionListParsers.cpp b/dbms/src/Parsers/ExpressionListParsers.cpp index 060d1e89f02..6029fc91566 100644 --- a/dbms/src/Parsers/ExpressionListParsers.cpp +++ b/dbms/src/Parsers/ExpressionListParsers.cpp @@ -557,6 +557,13 @@ bool ParserOrderByExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & } +bool ParserTTLExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + return ParserList(std::make_unique(), std::make_unique(TokenType::Comma), false) + .parse(pos, node, expected); +} + + bool ParserNullityChecking::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr node_comp; diff --git a/dbms/src/Parsers/ExpressionListParsers.h b/dbms/src/Parsers/ExpressionListParsers.h index 4fbee507f2d..ed37807eb67 100644 --- a/dbms/src/Parsers/ExpressionListParsers.h +++ b/dbms/src/Parsers/ExpressionListParsers.h @@ -386,6 +386,7 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; + /// Parser for list of key-value pairs. class ParserKeyValuePairsList : public IParserBase { @@ -394,4 +395,12 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; + +class ParserTTLExpressionList : public IParserBase +{ +protected: + const char * getName() const { return "ttl expression"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected); +}; + } diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 66f18c4367d..9cc53c7612f 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -87,6 +87,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected /* allow_empty = */ false); ParserSetQuery parser_settings(true); ParserNameList values_p; + ParserTTLExpressionList parser_ttl_list; if (is_live_view) { @@ -236,12 +237,14 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->part = true; if (s_to_disk.ignore(pos)) - command->move_destination_type = ASTAlterCommand::MoveDestinationType::DISK; + command->move_destination_type = ASTTTLElement::DestinationType::DISK; else if (s_to_volume.ignore(pos)) - command->move_destination_type = ASTAlterCommand::MoveDestinationType::VOLUME; + command->move_destination_type = ASTTTLElement::DestinationType::VOLUME; else return false; + // FIXME See ParserTTLElement + ASTPtr ast_space_name; if (!parser_string_literal.parse(pos, ast_space_name, expected)) return false; @@ -256,12 +259,14 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->type = ASTAlterCommand::MOVE_PARTITION; if (s_to_disk.ignore(pos)) - command->move_destination_type = ASTAlterCommand::MoveDestinationType::DISK; + command->move_destination_type = ASTTTLElement::DestinationType::DISK; else if (s_to_volume.ignore(pos)) - command->move_destination_type = ASTAlterCommand::MoveDestinationType::VOLUME; + command->move_destination_type = ASTTTLElement::DestinationType::VOLUME; else return false; + // FIXME See ParserTTLElement + ASTPtr ast_space_name; if (!parser_string_literal.parse(pos, ast_space_name, expected)) return false; @@ -431,7 +436,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } else if (s_modify_ttl.ignore(pos, expected)) { - if (!parser_exp_elem.parse(pos, command->ttl, expected)) + if (!parser_ttl_list.parse(pos, command->ttl, expected)) +// FIXME check if that is fine, can be `toDate(), toDate() TO DISK 'abc'` and that is not tuple TO DISK 'abc' return false; command->type = ASTAlterCommand::MODIFY_TTL; } diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index a014b861e77..79f517919a9 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -250,6 +250,7 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserIdentifierWithOptionalParameters ident_with_optional_params_p; ParserExpression expression_p; ParserSetQuery settings_p(/* parse_only_internals_ = */ true); + ParserTTLExpressionList parser_ttl_list; ASTPtr engine; ASTPtr partition_by; @@ -303,7 +304,7 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!ttl_table && s_ttl.ignore(pos, expected)) { - if (expression_p.parse(pos, ttl_table, expected)) + if (parser_ttl_list.parse(pos, ttl_table, expected)) continue; else return false; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 27f538afc26..92249e7f89d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -565,7 +565,7 @@ public: /// All MergeTreeData children have settings. void checkSettingCanBeChanged(const String & setting_name) const override; - /// Remove columns, that have been markedd as empty after zeroing values with expired ttl + /// Remove columns, that have been marked as empty after zeroing values with expired ttl void removeEmptyColumnsFromPart(MergeTreeData::MutableDataPartPtr & data_part); /// Freezes all parts. diff --git a/dbms/src/Storages/PartitionCommands.cpp b/dbms/src/Storages/PartitionCommands.cpp index 6a60037f43b..62d8b4a488f 100644 --- a/dbms/src/Storages/PartitionCommands.cpp +++ b/dbms/src/Storages/PartitionCommands.cpp @@ -47,12 +47,14 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * res.part = command_ast->part; switch (command_ast->move_destination_type) { - case ASTAlterCommand::MoveDestinationType::DISK: + case ASTTTLElement::DestinationType::DISK: res.move_destination_type = PartitionCommand::MoveDestinationType::DISK; break; - case ASTAlterCommand::MoveDestinationType::VOLUME: + case ASTTTLElement::DestinationType::VOLUME: res.move_destination_type = PartitionCommand::MoveDestinationType::VOLUME; break; + default: + break; } res.move_destination_name = command_ast->move_destination_name; return res; From 3e984609fb34d8033119fa895724e68df1ae1281 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Wed, 16 Oct 2019 10:32:37 +0300 Subject: [PATCH 02/51] Fixed ClickHouse after changing syntax. --- dbms/src/Parsers/ASTTTLElement.cpp | 22 ++++++++--------- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 24 +++++++++++++++++-- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/dbms/src/Parsers/ASTTTLElement.cpp b/dbms/src/Parsers/ASTTTLElement.cpp index d15278a8d44..ec8e6ca5c44 100644 --- a/dbms/src/Parsers/ASTTTLElement.cpp +++ b/dbms/src/Parsers/ASTTTLElement.cpp @@ -1,4 +1,5 @@ #include +#include #include @@ -8,19 +9,18 @@ namespace DB void ASTTTLElement::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { children.front()->formatImpl(settings, state, frame); - if (destination_type == DestinationType::DISK) { - settings.ostr << " TO DISK "; - } else if (destination_type == DestinationType::VOLUME) { - settings.ostr << " TO VOLUME "; - } else if (destination_type == DestinationType::DELETE) { + if (destination_type == DestinationType::DISK) + { + settings.ostr << " TO DISK " << quoteString(destination_name); + } + else if (destination_type == DestinationType::VOLUME) + { + settings.ostr << " TO VOLUME " << quoteString(destination_name); + } + else if (destination_type == DestinationType::DELETE) + { settings.ostr << " DELETE"; } - - if (destination_type == DestinationType::DISK || destination_type == DestinationType::VOLUME) { - WriteBufferFromOwnString destination_name_buf; - writeQuoted(destination_name, destination_name_buf); - settings.ostr << destination_name_buf.str(); - } } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 442f46ea552..0abbbc05812 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -604,12 +604,32 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new if (new_ttl_table_ast) { - auto new_ttl_table_entry = create_ttl_entry(new_ttl_table_ast); + ASTPtr new_delete_ttl_table_ast; + for (auto ttl_element_ptr : new_ttl_table_ast->children) + { + ASTTTLElement & ttl_element = static_cast(*ttl_element_ptr); + if (ttl_element.destination_type == ASTTTLElement::DELETE) + { + if (new_delete_ttl_table_ast) + { + throw Exception("Too many DELETE ttls.", ErrorCodes::BAD_TTL_EXPRESSION); + } + new_delete_ttl_table_ast = ttl_element.children[0]; + } + else + { + // FIXME: Read MOVE ttls. + } + } + + auto new_ttl_table_entry = create_ttl_entry(new_delete_ttl_table_ast); if (!only_check) { - ttl_table_ast = new_ttl_table_ast; + ttl_table_ast = new_delete_ttl_table_ast; ttl_table_entry = new_ttl_table_entry; } + + // FIXME: Apply MOVE ttls. } } From 5a12986159ef7699c4b9a8bfe74b90638aa06cf2 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Wed, 16 Oct 2019 13:42:10 +0300 Subject: [PATCH 03/51] Updated ttl test according to new TTL syntax. --- .../integration/test_ttl_replicated/test.py | 33 +++++++++++++++---- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/dbms/tests/integration/test_ttl_replicated/test.py b/dbms/tests/integration/test_ttl_replicated/test.py index f028b8fdbe5..ae4fa8404ac 100644 --- a/dbms/tests/integration/test_ttl_replicated/test.py +++ b/dbms/tests/integration/test_ttl_replicated/test.py @@ -1,6 +1,7 @@ import time import pytest +import helpers.client as client from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV @@ -9,7 +10,7 @@ node1 = cluster.add_instance('node1', with_zookeeper=True) node2 = cluster.add_instance('node2', with_zookeeper=True) @pytest.fixture(scope="module") -def start_cluster(): +def started_cluster(): try: cluster.start() @@ -25,7 +26,7 @@ def drop_table(nodes, table_name): for node in nodes: node.query("DROP TABLE IF EXISTS {}".format(table_name)) -def test_ttl_columns(start_cluster): +def test_ttl_columns(started_cluster): drop_table([node1, node2], "test_ttl") for node in [node1, node2]: node.query( @@ -43,8 +44,12 @@ def test_ttl_columns(start_cluster): expected = "1\t0\t0\n2\t0\t0\n" assert TSV(node1.query("SELECT id, a, b FROM test_ttl ORDER BY id")) == TSV(expected) assert TSV(node2.query("SELECT id, a, b FROM test_ttl ORDER BY id")) == TSV(expected) - -def test_ttl_table(start_cluster): + +@pytest.mark.parametrize("delete_suffix", [ + "", + "DELETE", +]) +def test_ttl_table(started_cluster, delete_suffix): drop_table([node1, node2], "test_ttl") for node in [node1, node2]: node.query( @@ -52,8 +57,8 @@ def test_ttl_table(start_cluster): CREATE TABLE test_ttl(date DateTime, id UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl', '{replica}') ORDER BY id PARTITION BY toDayOfMonth(date) - TTL date + INTERVAL 1 DAY SETTINGS merge_with_ttl_timeout=0; - '''.format(replica=node.name)) + TTL date + INTERVAL 1 DAY {delete_suffix} SETTINGS merge_with_ttl_timeout=0; + '''.format(replica=node.name, delete_suffix=delete_suffix)) node1.query("INSERT INTO test_ttl VALUES (toDateTime('2000-10-10 00:00:00'), 1)") node1.query("INSERT INTO test_ttl VALUES (toDateTime('2000-10-11 10:00:00'), 2)") @@ -62,4 +67,18 @@ def test_ttl_table(start_cluster): assert TSV(node1.query("SELECT * FROM test_ttl")) == TSV("") assert TSV(node2.query("SELECT * FROM test_ttl")) == TSV("") - + +def test_ttl_double_delete_rule_returns_error(started_cluster): + drop_table([node1, node2], "test_ttl") + try: + node1.query(''' + CREATE TABLE test_ttl(date DateTime, id UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl', '{replica}') + ORDER BY id PARTITION BY toDayOfMonth(date) + TTL date + INTERVAL 1 DAY, date + INTERVAL 2 DAY SETTINGS merge_with_ttl_timeout=0; + '''.format(replica=node1.name)) + assert False + except client.QueryRuntimeException: + pass + except: + assert False From 33ded274a3419fb2cc786f8d4cd347ddee5bae29 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 17 Oct 2019 19:01:28 +0300 Subject: [PATCH 04/51] First attempt to store min/max for move ttl expressions. --- dbms/src/Common/quoteString.cpp | 9 ++++ dbms/src/Common/quoteString.h | 3 ++ dbms/src/Storages/MergeTree/MergeTreeData.cpp | 16 +++++-- dbms/src/Storages/MergeTree/MergeTreeData.h | 2 + .../MergeTree/MergeTreeDataPartTTLInfo.cpp | 46 +++++++++++++++++-- .../MergeTree/MergeTreeDataPartTTLInfo.h | 2 + .../MergeTree/MergeTreeDataWriter.cpp | 13 +++--- 7 files changed, 78 insertions(+), 13 deletions(-) diff --git a/dbms/src/Common/quoteString.cpp b/dbms/src/Common/quoteString.cpp index bcc6906ddfa..6fc928ff022 100644 --- a/dbms/src/Common/quoteString.cpp +++ b/dbms/src/Common/quoteString.cpp @@ -14,6 +14,15 @@ String quoteString(const StringRef & x) } +String doubleQuoteString(const StringRef & x) +{ + String res(x.size, '\0'); + WriteBufferFromString wb(res); + writeDoubleQuotedString(x, wb); + return res; +} + + String backQuote(const StringRef & x) { String res(x.size, '\0'); diff --git a/dbms/src/Common/quoteString.h b/dbms/src/Common/quoteString.h index f17f6c7015d..426034e4803 100644 --- a/dbms/src/Common/quoteString.h +++ b/dbms/src/Common/quoteString.h @@ -9,6 +9,9 @@ namespace DB /// Quote the string. String quoteString(const StringRef & x); +/// Double quote the string. +String doubleQuoteString(const StringRef & x); + /// Quote the identifier with backquotes. String backQuote(const StringRef & x); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 0abbbc05812..90720d428ed 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -612,13 +612,21 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new { if (new_delete_ttl_table_ast) { - throw Exception("Too many DELETE ttls.", ErrorCodes::BAD_TTL_EXPRESSION); + throw Exception("Too many DELETE ttls", ErrorCodes::BAD_TTL_EXPRESSION); } new_delete_ttl_table_ast = ttl_element.children[0]; } else { - // FIXME: Read MOVE ttls. + auto new_ttl_entry = create_ttl_entry(ttl_element.children[0]); + if (!only_check) + { + std::ostringstream expression_text_stream; + IAST::FormatSettings settings(expression_text_stream, true); + ttl_element.children[0]->format(settings); + move_ttl_entries_by_name.emplace(expression_text_stream.str(), new_ttl_entry); + /// FIXME: Save TTLElement type and destination somehow. + } } } @@ -628,9 +636,9 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new ttl_table_ast = new_delete_ttl_table_ast; ttl_table_entry = new_ttl_table_entry; } - - // FIXME: Apply MOVE ttls. } + + // FIXME: In case of ALTER one need to clean up previous values to actually set expression but not merge them. } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 92249e7f89d..734b52209da 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -726,6 +726,8 @@ public: TTLEntry ttl_table_entry; + TTLEntriesByName move_ttl_entries_by_name; + String sampling_expr_column_name; Names columns_required_for_sampling; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp index 39665f03c84..807c103772d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include @@ -17,8 +18,14 @@ void MergeTreeDataPartTTLInfos::update(const MergeTreeDataPartTTLInfos & other_i table_ttl.update(other_infos.table_ttl); updatePartMinMaxTTL(table_ttl.min, table_ttl.max); + + for (const auto & [expression, ttl_info] : other_infos.moves_ttl) + { + moves_ttl[expression].update(ttl_info); + } } + void MergeTreeDataPartTTLInfos::read(ReadBuffer & in) { String json_str; @@ -48,8 +55,21 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in) updatePartMinMaxTTL(table_ttl.min, table_ttl.max); } + if (json.has("moves")) + { + JSON moves = json["moves"]; + for (auto move : moves) + { + MergeTreeDataPartTTLInfo ttl_info; + ttl_info.min = move["min"].getUInt(); + ttl_info.max = move["max"].getUInt(); + String expression = move["expression"].getString(); + moves_ttl.emplace(expression, ttl_info); + } + } } + void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const { writeString("ttl format version: 1\n", out); @@ -62,9 +82,9 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const if (it != columns_ttl.begin()) writeString(",", out); - writeString("{\"name\":\"", out); - writeString(it->first, out); - writeString("\",\"min\":", out); + writeString("{\"name\":", out); + writeString(doubleQuoteString(it->first), out); + writeString(",\"min\":", out); writeIntText(it->second.min, out); writeString(",\"max\":", out); writeIntText(it->second.max, out); @@ -82,6 +102,26 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const writeIntText(table_ttl.max, out); writeString("}", out); } + if (!moves_ttl.empty()) + { + if (!columns_ttl.empty() || table_ttl.min) + writeString(",", out); + writeString("\"moves\":[", out); + for (auto it = moves_ttl.begin(); it != moves_ttl.end(); ++it) + { + if (it != moves_ttl.begin()) + writeString(",", out); + + writeString("{\"expression\":", out); + writeString(doubleQuoteString(it->first), out); + writeString(",\"min\":", out); + writeIntText(it->second.min, out); + writeString(",\"max\":", out); + writeIntText(it->second.max, out); + writeString("}", out); + } + writeString("]", out); + } writeString("}", out); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h b/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h index 71a7c9f602f..8ad2e256fa7 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h @@ -38,6 +38,8 @@ struct MergeTreeDataPartTTLInfos time_t part_min_ttl = 0; time_t part_max_ttl = 0; + std::unordered_map moves_ttl; + void read(ReadBuffer & in); void write(WriteBuffer & out) const; void update(const MergeTreeDataPartTTLInfos & other_infos); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index cb76215897a..ae079a911ab 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -75,13 +75,11 @@ void buildScatterSelector( } /// Computes ttls and updates ttl infos -void updateTTL(const MergeTreeData::TTLEntry & ttl_entry, MergeTreeDataPart::TTLInfos & ttl_infos, Block & block, const String & column_name) +void updateTTL(const MergeTreeData::TTLEntry & ttl_entry, MergeTreeDataPart::TTLInfos & ttl_infos, DB::MergeTreeDataPartTTLInfo & ttl_info, Block & block) { if (!block.has(ttl_entry.result_column)) ttl_entry.expression->execute(block); - auto & ttl_info = (column_name.empty() ? ttl_infos.table_ttl : ttl_infos.columns_ttl[column_name]); - const auto & current = block.getByName(ttl_entry.result_column); const IColumn * column = current.column.get(); @@ -251,7 +249,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocks); - /// Sort. + /// Sort IColumn::Permutation * perm_ptr = nullptr; IColumn::Permutation perm; if (!sort_description.empty()) @@ -266,10 +264,13 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa } if (data.hasTableTTL()) - updateTTL(data.ttl_table_entry, new_data_part->ttl_infos, block, ""); + updateTTL(data.ttl_table_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block); for (const auto & [name, ttl_entry] : data.ttl_entries_by_name) - updateTTL(ttl_entry, new_data_part->ttl_infos, block, name); + updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block); + + for (const auto & [expression, ttl_entry] : data.move_ttl_entries_by_name) + updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.moves_ttl[expression], block); /// This effectively chooses minimal compression method: /// either default lz4 or compression method with zero thresholds on absolute and relative part size. From 893e0de37af8b2eec393e5158d8a976d8d082dbd Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 17 Oct 2019 21:55:07 +0300 Subject: [PATCH 05/51] Minor fix. --- dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h | 5 +++++ dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h b/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h index 8ad2e256fa7..e53f49f5205 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h @@ -52,6 +52,11 @@ struct MergeTreeDataPartTTLInfos if (time_max && (!part_max_ttl || time_max > part_max_ttl)) part_max_ttl = time_max; } + + bool empty() + { + return !part_min_ttl && moves_ttl.empty(); + } }; } diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 9e33b4594f3..27127c29128 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -186,7 +186,7 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart( checksums.files["count.txt"].file_hash = count_out_hashing.getHash(); } - if (new_part->ttl_infos.part_min_ttl) + if (!new_part->ttl_infos.empty()) { /// Write a file with ttl infos in json format. WriteBufferFromFile out(part_path + "ttl.txt", 4096); From 575de5ada6c41e913adf9cc3ee4b107cc3705ff7 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Tue, 22 Oct 2019 10:55:36 +0300 Subject: [PATCH 06/51] Attempt to add background moves by TTL expressions. --- dbms/src/DataStreams/TTLBlockInputStream.cpp | 25 +++++++ dbms/src/DataStreams/TTLBlockInputStream.h | 3 + dbms/src/Storages/MergeTree/MergeTreeData.cpp | 30 ++++---- dbms/src/Storages/MergeTree/MergeTreeData.h | 12 +++- .../MergeTree/MergeTreeDataWriter.cpp | 3 +- .../MergeTree/MergeTreePartsMover.cpp | 68 +++++++++++++++++-- 6 files changed, 118 insertions(+), 23 deletions(-) diff --git a/dbms/src/DataStreams/TTLBlockInputStream.cpp b/dbms/src/DataStreams/TTLBlockInputStream.cpp index 02191aec52d..884318b4b06 100644 --- a/dbms/src/DataStreams/TTLBlockInputStream.cpp +++ b/dbms/src/DataStreams/TTLBlockInputStream.cpp @@ -85,6 +85,8 @@ Block TTLBlockInputStream::readImpl() removeValuesWithExpiredColumnTTL(block); + updateMovesTTL(block); + return block; } @@ -197,6 +199,29 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) block.erase(elem.second.result_column); } +void TTLBlockInputStream::updateMovesTTL(Block & block) +{ + for (const auto & [name, ttl_entry] : storage.move_ttl_entries_by_name) + { + auto & new_ttl_info = new_ttl_infos.moves_ttl[name]; + + if (!block.has(ttl_entry.result_column)) + ttl_entry.expression->execute(block); + + const IColumn * ttl_column = block.getByName(ttl_entry.result_column).column.get(); + + for (size_t i = 0; i < block.rows(); ++i) + { + UInt32 cur_ttl = getTimestampByIndex(ttl_column, i); + new_ttl_info.update(cur_ttl); + } + } + + for (const auto & elem : storage.move_ttl_entries_by_name) + if (block.has(elem.second.result_column)) + block.erase(elem.second.result_column); +} + UInt32 TTLBlockInputStream::getTimestampByIndex(const IColumn * column, size_t ind) { if (const ColumnUInt16 * column_date = typeid_cast(column)) diff --git a/dbms/src/DataStreams/TTLBlockInputStream.h b/dbms/src/DataStreams/TTLBlockInputStream.h index 5ed6aa9e520..05893600fe6 100644 --- a/dbms/src/DataStreams/TTLBlockInputStream.h +++ b/dbms/src/DataStreams/TTLBlockInputStream.h @@ -58,6 +58,9 @@ private: /// Removes rows with expired table ttl and computes new ttl_infos for part void removeRowsWithExpiredTableTTL(Block & block); + /// Updates TTL for moves + void updateMovesTTL(Block & block); + UInt32 getTimestampByIndex(const IColumn * column, size_t ind); bool isTTLExpired(time_t ttl); }; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 90720d428ed..c010c9eb60c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -604,41 +604,41 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new if (new_ttl_table_ast) { - ASTPtr new_delete_ttl_table_ast; + bool seen_delete_ttl = false; for (auto ttl_element_ptr : new_ttl_table_ast->children) { ASTTTLElement & ttl_element = static_cast(*ttl_element_ptr); if (ttl_element.destination_type == ASTTTLElement::DELETE) { - if (new_delete_ttl_table_ast) + if (seen_delete_ttl) { throw Exception("Too many DELETE ttls", ErrorCodes::BAD_TTL_EXPRESSION); } - new_delete_ttl_table_ast = ttl_element.children[0]; + + auto new_ttl_table_entry = create_ttl_entry(ttl_element.children[0]); + if (!only_check) + { + ttl_table_ast = ttl_element.children[0]; + ttl_table_entry = new_ttl_table_entry; + } + + seen_delete_ttl = true; } else { auto new_ttl_entry = create_ttl_entry(ttl_element.children[0]); if (!only_check) { - std::ostringstream expression_text_stream; - IAST::FormatSettings settings(expression_text_stream, true); - ttl_element.children[0]->format(settings); - move_ttl_entries_by_name.emplace(expression_text_stream.str(), new_ttl_entry); - /// FIXME: Save TTLElement type and destination somehow. + MoveTTLEntry entry = { new_ttl_entry.expression, new_ttl_entry.result_column, ttl_element.destination_type, ttl_element.destination_name }; + move_ttl_entries_by_name.emplace(new_ttl_entry.result_column, entry); } } } - - auto new_ttl_table_entry = create_ttl_entry(new_delete_ttl_table_ast); - if (!only_check) - { - ttl_table_ast = new_delete_ttl_table_ast; - ttl_table_entry = new_ttl_table_entry; - } } // FIXME: In case of ALTER one need to clean up previous values to actually set expression but not merge them. + // TODO: Check if ALTER MODIFY replaces TTL + // TODO: Check if ALTER MODIFY COLUMN + TTL works well (changing of name works with ttl) } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 734b52209da..7398114993b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -726,7 +727,16 @@ public: TTLEntry ttl_table_entry; - TTLEntriesByName move_ttl_entries_by_name; + struct MoveTTLEntry + { + ExpressionActionsPtr expression; + String result_column; + ASTTTLElement::DestinationType destination_type; + String destination_name; + }; + + using MoveTTLEntriesByName = std::unordered_map; + MoveTTLEntriesByName move_ttl_entries_by_name; String sampling_expr_column_name; Names columns_required_for_sampling; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index ae079a911ab..cb48e13ffc1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -75,7 +75,8 @@ void buildScatterSelector( } /// Computes ttls and updates ttl infos -void updateTTL(const MergeTreeData::TTLEntry & ttl_entry, MergeTreeDataPart::TTLInfos & ttl_infos, DB::MergeTreeDataPartTTLInfo & ttl_info, Block & block) +template +void updateTTL(const TTLEntry & ttl_entry, MergeTreeDataPart::TTLInfos & ttl_infos, DB::MergeTreeDataPartTTLInfo & ttl_info, Block & block) { if (!block.has(ttl_entry.result_column)) ttl_entry.expression->execute(block); diff --git a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp index 9076c053900..405a3b0c3ce 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -94,14 +94,16 @@ bool MergeTreePartsMover::selectPartsForMove( { for (const auto & disk : volumes[i]->disks) { - UInt64 required_available_space = disk->getTotalSpace() * policy->getMoveFactor(); + UInt64 required_maximum_available_space = disk->getTotalSpace() * policy->getMoveFactor(); UInt64 unreserved_space = disk->getUnreservedSpace(); - if (required_available_space > unreserved_space) - need_to_move.emplace(disk, required_available_space - unreserved_space); + if (unreserved_space < required_maximum_available_space) + need_to_move.emplace(disk, required_maximum_available_space - unreserved_space); } } + auto current_time = time(nullptr); + for (const auto & part : data_parts) { String reason; @@ -109,6 +111,60 @@ bool MergeTreePartsMover::selectPartsForMove( if (!can_move(part, &reason)) continue; + const auto ttl_entries_end = part->storage.move_ttl_entries_by_name.end(); + auto best_ttl_entry_it = ttl_entries_end; + time_t max_max_ttl = 0; + for (auto & [name, ttl_info] : part->ttl_infos.moves_ttl) + { + auto move_ttl_entry_it = part->storage.move_ttl_entries_by_name.find(name); + if (move_ttl_entry_it != part->storage.move_ttl_entries_by_name.end()) + { + if (ttl_info.max < current_time && max_max_ttl < ttl_info.max) + { + best_ttl_entry_it = move_ttl_entry_it; + max_max_ttl = ttl_info.max; + } + } + } + if (best_ttl_entry_it != ttl_entries_end) + { + auto & move_ttl_entry = best_ttl_entry_it->second; + if (move_ttl_entry.destination_type == ASTTTLElement::DestinationType::VOLUME) + { + auto volume_ptr = policy->getVolumeByName(move_ttl_entry.destination_name); + if (volume_ptr) + { + auto reservation = volume_ptr->reserve(part->bytes_on_disk); + if (reservation) + { + parts_to_move.emplace_back(part, std::move(reservation)); + continue; + } + } + else + { + /// FIXME: log error + } + } + else if (move_ttl_entry.destination_type == ASTTTLElement::DestinationType::DISK) + { + auto disk_ptr = policy->getDiskByName(move_ttl_entry.destination_name); + if (disk_ptr) + { + auto reservation = disk_ptr->reserve(part->bytes_on_disk); + if (reservation) + { + parts_to_move.emplace_back(part, std::move(reservation)); + continue; + } + } + else + { + /// FIXME: log error + } + } + } + auto to_insert = need_to_move.find(part->disk); if (to_insert != need_to_move.end()) to_insert->second.add(part); @@ -116,13 +172,13 @@ bool MergeTreePartsMover::selectPartsForMove( for (auto && move : need_to_move) { - auto min_volume_priority = policy->getVolumeIndexByDisk(move.first) + 1; + auto min_volume_index = policy->getVolumeIndexByDisk(move.first) + 1; for (auto && part : move.second.getAccumulatedParts()) { - auto reservation = policy->reserve(part->bytes_on_disk, min_volume_priority); + auto reservation = policy->reserve(part->bytes_on_disk, min_volume_index); if (!reservation) { - /// Next parts to move from this disk has greater size and same min volume priority + /// Next parts to move from this disk has greater size and same min volume index /// There are no space for them /// But it can be possible to move data from other disks break; From 4cd75f926bdca9314f49e1ede6915cc74f583bbc Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Mon, 28 Oct 2019 14:09:50 +0300 Subject: [PATCH 07/51] Fixed 00933 ttl tests. --- dbms/tests/queries/0_stateless/00933_alter_ttl.reference | 2 +- .../0_stateless/00933_ttl_replicated_zookeeper.reference | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00933_alter_ttl.reference b/dbms/tests/queries/0_stateless/00933_alter_ttl.reference index 44ba49026a7..8262932df48 100644 --- a/dbms/tests/queries/0_stateless/00933_alter_ttl.reference +++ b/dbms/tests/queries/0_stateless/00933_alter_ttl.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.ttl (`d` Date, `a` Int32) ENGINE = MergeTree PARTITION BY toDayOfMonth(d) ORDER BY a TTL d + toIntervalDay(1) SETTINGS index_granularity = 8192 +CREATE TABLE default.ttl (`d` Date, `a` Int32) ENGINE = MergeTree PARTITION BY toDayOfMonth(d) ORDER BY a TTL d + toIntervalDay(1) DELETE SETTINGS index_granularity = 8192 2100-10-10 3 2100-10-10 4 d Date diff --git a/dbms/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.reference b/dbms/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.reference index 986bc6b4a24..76f44f98311 100644 --- a/dbms/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.reference +++ b/dbms/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.reference @@ -1,3 +1,3 @@ 200 400 -CREATE TABLE test.ttl_repl2 (`d` Date, `x` UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/ttl_repl\', \'2\') PARTITION BY toDayOfMonth(d) ORDER BY x TTL d + toIntervalDay(1) SETTINGS index_granularity = 8192 +CREATE TABLE test.ttl_repl2 (`d` Date, `x` UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/ttl_repl\', \'2\') PARTITION BY toDayOfMonth(d) ORDER BY x TTL d + toIntervalDay(1) DELETE SETTINGS index_granularity = 8192 From 76c4ac9f607cbb5f7f30e62bfdf91674787c2c99 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Tue, 29 Oct 2019 02:01:07 +0300 Subject: [PATCH 08/51] Fixed ttl move logic in background move task and added ttl enforcement on inserts. --- .../MergeTree/MergeTreeDataWriter.cpp | 55 +++++++++++++++++-- .../MergeTree/MergeTreePartsMover.cpp | 10 ++-- 2 files changed, 55 insertions(+), 10 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index cb48e13ffc1..3e89be98d17 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -211,10 +211,56 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa else part_name = new_part_info.getPartName(); - /// Size of part would not be grater than block.bytes() + epsilon - size_t expected_size = block.bytes(); - auto reservation = data.reserveSpace(expected_size); + auto current_time = time(nullptr); + const MergeTreeData::MoveTTLEntry * best_ttl_entry = nullptr; + time_t max_min_ttl = 0; + DB::MergeTreeDataPart::TTLInfos move_ttl_infos; + for (const auto & [expression, ttl_entry] : data.move_ttl_entries_by_name) + { + auto & ttl_info = move_ttl_infos.moves_ttl[expression]; + updateTTL(ttl_entry, move_ttl_infos, ttl_info, block); + if (ttl_info.min > current_time && max_min_ttl < ttl_info.min) + { + best_ttl_entry = &ttl_entry; + max_min_ttl = ttl_info.min; + } + } + + DiskSpace::ReservationPtr reservation; + /// Size of part would not be greater than block.bytes() + epsilon + size_t expected_size = block.bytes(); + if (best_ttl_entry != nullptr) + { + if (best_ttl_entry->destination_type == ASTTTLElement::DestinationType::VOLUME) + { + auto volume_ptr = data.getStoragePolicy()->getVolumeByName(best_ttl_entry->destination_name); + if (volume_ptr) + { + reservation = volume_ptr->reserve(expected_size); + } + else + { + /// FIXME: log warning + } + } + else if (best_ttl_entry->destination_type == ASTTTLElement::DestinationType::DISK) + { + auto disk_ptr = data.getStoragePolicy()->getDiskByName(best_ttl_entry->destination_name); + if (disk_ptr) + { + reservation = disk_ptr->reserve(expected_size); + } + else + { + /// FIXME: log warning + } + } + } + if (!reservation) + { + reservation = data.reserveSpace(expected_size); + } MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared(data, reservation->getDisk(), part_name, new_part_info); @@ -270,8 +316,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa for (const auto & [name, ttl_entry] : data.ttl_entries_by_name) updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block); - for (const auto & [expression, ttl_entry] : data.move_ttl_entries_by_name) - updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.moves_ttl[expression], block); + new_data_part->ttl_infos.update(move_ttl_infos); /// This effectively chooses minimal compression method: /// either default lz4 or compression method with zero thresholds on absolute and relative part size. diff --git a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp index 405a3b0c3ce..69ef7d88602 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -113,16 +113,16 @@ bool MergeTreePartsMover::selectPartsForMove( const auto ttl_entries_end = part->storage.move_ttl_entries_by_name.end(); auto best_ttl_entry_it = ttl_entries_end; - time_t max_max_ttl = 0; + time_t max_min_ttl = 0; for (auto & [name, ttl_info] : part->ttl_infos.moves_ttl) { auto move_ttl_entry_it = part->storage.move_ttl_entries_by_name.find(name); if (move_ttl_entry_it != part->storage.move_ttl_entries_by_name.end()) { - if (ttl_info.max < current_time && max_max_ttl < ttl_info.max) + if (ttl_info.min > current_time && max_min_ttl < ttl_info.min) { best_ttl_entry_it = move_ttl_entry_it; - max_max_ttl = ttl_info.max; + max_min_ttl = ttl_info.min; } } } @@ -143,7 +143,7 @@ bool MergeTreePartsMover::selectPartsForMove( } else { - /// FIXME: log error + /// FIXME: log warning? } } else if (move_ttl_entry.destination_type == ASTTTLElement::DestinationType::DISK) @@ -160,7 +160,7 @@ bool MergeTreePartsMover::selectPartsForMove( } else { - /// FIXME: log error + /// FIXME: log warning? } } } From dfe0edc03f1ba4a8d8f239c96b8353eb659e68e3 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Tue, 29 Oct 2019 02:40:03 +0300 Subject: [PATCH 09/51] Style fix. --- dbms/src/Parsers/ExpressionElementParsers.cpp | 10 +++++----- dbms/src/Storages/MergeTree/MergeTreeData.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index 865b39a9c1b..60378996d27 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -1429,15 +1429,15 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTTTLElement::DestinationType destination_type = ASTTTLElement::DestinationType::DELETE; String destination_name; - if (s_to_disk.ignore(pos)) { + if (s_to_disk.ignore(pos)) destination_type = ASTTTLElement::DestinationType::DISK; - } else if (s_to_volume.ignore(pos)) { + else if (s_to_volume.ignore(pos)) destination_type = ASTTTLElement::DestinationType::VOLUME; - } else { + else s_delete.ignore(pos); - } - if (destination_type == ASTTTLElement::DestinationType::DISK || destination_type == ASTTTLElement::DestinationType::VOLUME) { + if (destination_type == ASTTTLElement::DestinationType::DISK || destination_type == ASTTTLElement::DestinationType::VOLUME) + { ASTPtr ast_space_name; if (!parser_string_literal.parse(pos, ast_space_name, expected)) return false; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 7398114993b..b7bbeb98812 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -732,7 +732,7 @@ public: ExpressionActionsPtr expression; String result_column; ASTTTLElement::DestinationType destination_type; - String destination_name; + String destination_name; }; using MoveTTLEntriesByName = std::unordered_map; From 58e824b6a188631c9a8f6e74ad944b26f8a7c08a Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 31 Oct 2019 13:40:11 +0300 Subject: [PATCH 10/51] Refactored, added move enforcement on merges, fixed a bug with wrong disk selection in `ReplicatedMergeTree`. --- dbms/src/Parsers/ASTAlterQuery.cpp | 4 +- dbms/src/Parsers/ASTAlterQuery.h | 4 +- dbms/src/Parsers/ASTTTLElement.cpp | 7 +- dbms/src/Parsers/ASTTTLElement.h | 12 +- dbms/src/Parsers/ExpressionElementParsers.cpp | 8 +- dbms/src/Parsers/ParserAlterQuery.cpp | 8 +- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 132 ++++++++++++++++-- dbms/src/Storages/MergeTree/MergeTreeData.h | 31 ++-- .../MergeTree/MergeTreeDataPartTTLInfo.cpp | 2 +- .../MergeTree/MergeTreeDataWriter.cpp | 48 +------ .../MergeTree/MergeTreePartsMover.cpp | 119 ++++++---------- .../Storages/MergeTree/TTLDestinationType.h | 14 ++ dbms/src/Storages/PartitionCommands.cpp | 5 +- dbms/src/Storages/StorageMergeTree.cpp | 11 +- .../Storages/StorageReplicatedMergeTree.cpp | 19 +-- 15 files changed, 242 insertions(+), 182 deletions(-) create mode 100644 dbms/src/Storages/MergeTree/TTLDestinationType.h diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index 7a112ddc941..ce2900942be 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -176,10 +176,10 @@ void ASTAlterCommand::formatImpl( settings.ostr << " TO "; switch (move_destination_type) { - case ASTTTLElement::DestinationType::DISK: + case TTLDestinationType::DISK: settings.ostr << "DISK "; break; - case ASTTTLElement::DestinationType::VOLUME: + case TTLDestinationType::VOLUME: settings.ostr << "VOLUME "; break; default: diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index 3699f3aee95..f3f06fdea98 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -129,9 +129,9 @@ public: bool if_exists = false; /// option for DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN - ASTTTLElement::DestinationType move_destination_type; + TTLDestinationType move_destination_type; /// option for MOVE PART/PARTITION - String move_destination_name; + String move_destination_name; /// option for MOVE PART/PARTITION /** For FETCH PARTITION - the path in ZK to the shard, from which to download the partition. */ diff --git a/dbms/src/Parsers/ASTTTLElement.cpp b/dbms/src/Parsers/ASTTTLElement.cpp index ec8e6ca5c44..7dc38ad92c0 100644 --- a/dbms/src/Parsers/ASTTTLElement.cpp +++ b/dbms/src/Parsers/ASTTTLElement.cpp @@ -1,3 +1,4 @@ + #include #include #include @@ -9,15 +10,15 @@ namespace DB void ASTTTLElement::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { children.front()->formatImpl(settings, state, frame); - if (destination_type == DestinationType::DISK) + if (destination_type == TTLDestinationType::DISK) { settings.ostr << " TO DISK " << quoteString(destination_name); } - else if (destination_type == DestinationType::VOLUME) + else if (destination_type == TTLDestinationType::VOLUME) { settings.ostr << " TO VOLUME " << quoteString(destination_name); } - else if (destination_type == DestinationType::DELETE) + else if (destination_type == TTLDestinationType::DELETE) { settings.ostr << " DELETE"; } diff --git a/dbms/src/Parsers/ASTTTLElement.h b/dbms/src/Parsers/ASTTTLElement.h index b0b0991c6fb..969740e2da3 100644 --- a/dbms/src/Parsers/ASTTTLElement.h +++ b/dbms/src/Parsers/ASTTTLElement.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -10,17 +11,10 @@ namespace DB class ASTTTLElement : public IAST { public: - enum DestinationType - { - DISK, - VOLUME, - DELETE, - }; - - DestinationType destination_type; + TTLDestinationType destination_type; String destination_name; - ASTTTLElement(DestinationType destination_type_, const String & destination_name_) + ASTTTLElement(TTLDestinationType destination_type_, const String & destination_name_) : destination_type(destination_type_) , destination_name(destination_name_) { diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index 60378996d27..55e3ef42d28 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -1427,16 +1427,16 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!parser_exp.parse(pos, expr_elem, expected)) return false; - ASTTTLElement::DestinationType destination_type = ASTTTLElement::DestinationType::DELETE; + TTLDestinationType destination_type = TTLDestinationType::DELETE; String destination_name; if (s_to_disk.ignore(pos)) - destination_type = ASTTTLElement::DestinationType::DISK; + destination_type = TTLDestinationType::DISK; else if (s_to_volume.ignore(pos)) - destination_type = ASTTTLElement::DestinationType::VOLUME; + destination_type = TTLDestinationType::VOLUME; else s_delete.ignore(pos); - if (destination_type == ASTTTLElement::DestinationType::DISK || destination_type == ASTTTLElement::DestinationType::VOLUME) + if (destination_type == TTLDestinationType::DISK || destination_type == TTLDestinationType::VOLUME) { ASTPtr ast_space_name; if (!parser_string_literal.parse(pos, ast_space_name, expected)) diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 9cc53c7612f..b097cb72085 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -237,9 +237,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->part = true; if (s_to_disk.ignore(pos)) - command->move_destination_type = ASTTTLElement::DestinationType::DISK; + command->move_destination_type = TTLDestinationType::DISK; else if (s_to_volume.ignore(pos)) - command->move_destination_type = ASTTTLElement::DestinationType::VOLUME; + command->move_destination_type = TTLDestinationType::VOLUME; else return false; @@ -259,9 +259,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->type = ASTAlterCommand::MOVE_PARTITION; if (s_to_disk.ignore(pos)) - command->move_destination_type = ASTTTLElement::DestinationType::DISK; + command->move_destination_type = TTLDestinationType::DISK; else if (s_to_volume.ignore(pos)) - command->move_destination_type = ASTTTLElement::DestinationType::VOLUME; + command->move_destination_type = TTLDestinationType::VOLUME; else return false; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index c010c9eb60c..b81661d0362 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -70,6 +70,12 @@ namespace CurrentMetrics } +namespace +{ + constexpr UInt64 RESERVATION_MIN_ESTIMATION_SIZE = 1u * 1024u * 1024u; /// 1MB +} + + namespace DB { @@ -608,11 +614,11 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new for (auto ttl_element_ptr : new_ttl_table_ast->children) { ASTTTLElement & ttl_element = static_cast(*ttl_element_ptr); - if (ttl_element.destination_type == ASTTTLElement::DELETE) + if (ttl_element.destination_type == TTLDestinationType::DELETE) { if (seen_delete_ttl) { - throw Exception("Too many DELETE ttls", ErrorCodes::BAD_TTL_EXPRESSION); + throw Exception("More than one DELETE TTL expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION); } auto new_ttl_table_entry = create_ttl_entry(ttl_element.children[0]); @@ -629,7 +635,7 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new auto new_ttl_entry = create_ttl_entry(ttl_element.children[0]); if (!only_check) { - MoveTTLEntry entry = { new_ttl_entry.expression, new_ttl_entry.result_column, ttl_element.destination_type, ttl_element.destination_name }; + TTLEntry entry{new_ttl_entry.expression, new_ttl_entry.result_column, ttl_element.destination_type, ttl_element.destination_name}; move_ttl_entries_by_name.emplace(new_ttl_entry.result_column, entry); } } @@ -3124,18 +3130,100 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const return loaded_parts; } -DiskSpace::ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size) +namespace { - constexpr UInt64 RESERVATION_MIN_ESTIMATION_SIZE = 1u * 1024u * 1024u; /// 1MB +inline DiskSpace::ReservationPtr throwNotEnoughSpace(UInt64 expected_size) +{ + throw Exception("Cannot reserve " + formatReadableSizeWithBinarySuffix(expected_size) + ", not enough space", + ErrorCodes::NOT_ENOUGH_SPACE); + return {}; +} + +} + +DiskSpace::ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size) const +{ expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); auto reservation = storage_policy->reserve(expected_size); if (reservation) return reservation; - throw Exception("Cannot reserve " + formatReadableSizeWithBinarySuffix(expected_size) + ", not enough space.", - ErrorCodes::NOT_ENOUGH_SPACE); + return throwNotEnoughSpace(expected_size); +} + +DiskSpace::ReservationPtr MergeTreeData::reserveSpacePreferringMoveDestination(UInt64 expected_size, + const MergeTreeDataPart::TTLInfos & ttl_infos, + time_t minimum_time) const +{ + expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); + + auto reservation = tryReserveSpaceOnMoveDestination(expected_size, ttl_infos, minimum_time); + if (reservation) + return reservation; + + reservation = storage_policy->reserve(expected_size); + if (reservation) + return reservation; + + return throwNotEnoughSpace(expected_size); +} + +DiskSpace::ReservationPtr MergeTreeData::tryReserveSpaceOnMoveDestination(UInt64 expected_size, + const MergeTreeDataPart::TTLInfos & ttl_infos, + time_t minimum_time) const +{ + expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); + + auto ttl_entry = selectMoveDestination(ttl_infos, minimum_time); + if (ttl_entry != nullptr) + { + DiskSpace::ReservationPtr reservation; + if (ttl_entry->destination_type == TTLDestinationType::VOLUME) + { + auto volume_ptr = storage_policy->getVolumeByName(ttl_entry->destination_name); + if (volume_ptr) + { + reservation = volume_ptr->reserve(expected_size); + } + else + { + LOG_WARNING(log, "Would like to reserve space on volume '" + << ttl_entry->destination_name << "' by TTL rule of table '" + << log_name << "' but volume was not found"); + } + } + else if (ttl_entry->destination_type == TTLDestinationType::DISK) + { + auto disk_ptr = storage_policy->getDiskByName(ttl_entry->destination_name); + if (disk_ptr) + { + reservation = disk_ptr->reserve(expected_size); + } + else + { + LOG_WARNING(log, "Would like to reserve space on disk '" + << ttl_entry->destination_name << "' by TTL rule of table '" + << log_name << "' but disk was not found"); + } + } + if (reservation) + return reservation; + } + + return {}; +} + +DiskSpace::ReservationPtr MergeTreeData::reserveSpaceOnSpecificDisk(UInt64 expected_size, DiskSpace::DiskPtr disk) const +{ + expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); + + auto reservation = disk->reserve(expected_size); + if (reservation) + return reservation; + + return throwNotEnoughSpace(expected_size); } MergeTreeData::DataParts MergeTreeData::getDataParts(const DataPartStates & affordable_states) const @@ -3317,12 +3405,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::cloneAndLoadDataPartOnSameDisk( String dst_part_name = src_part->getNewName(dst_part_info); String tmp_dst_part_name = tmp_part_prefix + dst_part_name; - auto reservation = src_part->disk->reserve(src_part->bytes_on_disk); - if (!reservation) - { - throw Exception("Cannot reserve " + formatReadableSizeWithBinarySuffix(src_part->bytes_on_disk) + ", not enough space", - ErrorCodes::NOT_ENOUGH_SPACE); - } + auto reservation = reserveSpaceOnSpecificDisk(src_part->bytes_on_disk, src_part->disk); String dst_part_path = getFullPathOnDisk(reservation->getDisk()); Poco::Path dst_part_absolute_path = Poco::Path(dst_part_path + tmp_dst_part_name).absolute(); Poco::Path src_part_absolute_path = Poco::Path(src_part->getFullPath()).absolute(); @@ -3645,4 +3728,27 @@ bool MergeTreeData::moveParts(CurrentlyMovingPartsTagger && moving_tagger) return true; } +const MergeTreeData::TTLEntry * MergeTreeData::selectMoveDestination( + const MergeTreeDataPart::TTLInfos & ttl_infos, + time_t minimum_time) const +{ + const MergeTreeData::TTLEntry * result = nullptr; + /// Prefer TTL rule which went into action last. + time_t max_min_ttl = 0; + + for (const auto & [expression, ttl_entry] : move_ttl_entries_by_name) + { + auto ttl_info_it = ttl_infos.moves_ttl.find(expression); + if (ttl_info_it != ttl_infos.moves_ttl.end() + && ttl_info_it->second.min >= minimum_time + && max_min_ttl <= ttl_info_it->second.min) + { + result = &ttl_entry; + max_min_ttl = ttl_info_it->second.min; + } + } + + return result; +} + } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index b7bbeb98812..8bc1781c695 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -1,15 +1,16 @@ #pragma once #include +#include #include #include -#include #include #include #include #include #include #include +#include #include #include #include @@ -675,7 +676,14 @@ public: PathsWithDisks getDataPathsWithDisks() const; /// Reserves space at least 1MB - DiskSpace::ReservationPtr reserveSpace(UInt64 expected_size); + DiskSpace::ReservationPtr reserveSpace(UInt64 expected_size) const; + DiskSpace::ReservationPtr reserveSpacePreferringMoveDestination(UInt64 expected_size, + const MergeTreeDataPart::TTLInfos & ttl_infos, + time_t minimum_time) const; + DiskSpace::ReservationPtr tryReserveSpaceOnMoveDestination(UInt64 expected_size, + const MergeTreeDataPart::TTLInfos & ttl_infos, + time_t minimum_time) const; + DiskSpace::ReservationPtr reserveSpaceOnSpecificDisk(UInt64 expected_size, DiskSpace::DiskPtr disk) const; /// Choose disk with max available free space /// Reserves 0 bytes @@ -720,23 +728,17 @@ public: { ExpressionActionsPtr expression; String result_column; + + /// Name and type of a destination are only valid in table-level context. + TTLDestinationType destination_type; + String destination_name; }; using TTLEntriesByName = std::unordered_map; TTLEntriesByName ttl_entries_by_name; TTLEntry ttl_table_entry; - - struct MoveTTLEntry - { - ExpressionActionsPtr expression; - String result_column; - ASTTTLElement::DestinationType destination_type; - String destination_name; - }; - - using MoveTTLEntriesByName = std::unordered_map; - MoveTTLEntriesByName move_ttl_entries_by_name; + TTLEntriesByName move_ttl_entries_by_name; String sampling_expr_column_name; Names columns_required_for_sampling; @@ -975,6 +977,9 @@ private: /// Check selected parts for movements. Used by ALTER ... MOVE queries. CurrentlyMovingPartsTagger checkPartsForMove(const DataPartsVector & parts, DiskSpace::SpacePtr space); + + const MergeTreeData::TTLEntry * selectMoveDestination(const MergeTreeDataPart::TTLInfos & ttl_infos, + time_t minimum_time) const; }; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp index 807c103772d..f6dc9b6fde2 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp @@ -58,7 +58,7 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in) if (json.has("moves")) { JSON moves = json["moves"]; - for (auto move : moves) + for (const auto move : moves) { MergeTreeDataPartTTLInfo ttl_info; ttl_info.min = move["min"].getUInt(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 3e89be98d17..bf1425422f3 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -211,56 +211,16 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa else part_name = new_part_info.getPartName(); - auto current_time = time(nullptr); - const MergeTreeData::MoveTTLEntry * best_ttl_entry = nullptr; - time_t max_min_ttl = 0; + /// Size of part would not be greater than block.bytes() + epsilon + size_t expected_size = block.bytes(); DB::MergeTreeDataPart::TTLInfos move_ttl_infos; for (const auto & [expression, ttl_entry] : data.move_ttl_entries_by_name) { - auto & ttl_info = move_ttl_infos.moves_ttl[expression]; - updateTTL(ttl_entry, move_ttl_infos, ttl_info, block); - if (ttl_info.min > current_time && max_min_ttl < ttl_info.min) - { - best_ttl_entry = &ttl_entry; - max_min_ttl = ttl_info.min; - } + updateTTL(ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[expression], block); } - DiskSpace::ReservationPtr reservation; - /// Size of part would not be greater than block.bytes() + epsilon - size_t expected_size = block.bytes(); - if (best_ttl_entry != nullptr) - { - if (best_ttl_entry->destination_type == ASTTTLElement::DestinationType::VOLUME) - { - auto volume_ptr = data.getStoragePolicy()->getVolumeByName(best_ttl_entry->destination_name); - if (volume_ptr) - { - reservation = volume_ptr->reserve(expected_size); - } - else - { - /// FIXME: log warning - } - } - else if (best_ttl_entry->destination_type == ASTTTLElement::DestinationType::DISK) - { - auto disk_ptr = data.getStoragePolicy()->getDiskByName(best_ttl_entry->destination_name); - if (disk_ptr) - { - reservation = disk_ptr->reserve(expected_size); - } - else - { - /// FIXME: log warning - } - } - } - if (!reservation) - { - reservation = data.reserveSpace(expected_size); - } + DiskSpace::ReservationPtr reservation = data.reserveSpacePreferringMoveDestination(expected_size, move_ttl_infos, time(nullptr)); MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared(data, reservation->getDisk(), part_name, new_part_info); diff --git a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp index 69ef7d88602..5772ff6079f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -52,11 +52,14 @@ public: elems.emplace(part); current_size_sum += part->bytes_on_disk; - while (!elems.empty() && (current_size_sum - (*elems.begin())->bytes_on_disk >= required_size_sum)) - { - current_size_sum -= (*elems.begin())->bytes_on_disk; - elems.erase(elems.begin()); - } + removeRedundantElements(); + } + + /// Weaken requirements on size + void decreaseRequiredSize(UInt64 size_decrease) + { + required_size_sum -= std::min(size_decrease, required_size_sum); + removeRedundantElements(); } /// Returns parts ordered by size @@ -67,6 +70,16 @@ public: res.push_back(elem); return res; } + +private: + void removeRedundantElements() + { + while (!elems.empty() && (current_size_sum - (*elems.begin())->bytes_on_disk >= required_size_sum)) + { + current_size_sum -= (*elems.begin())->bytes_on_disk; + elems.erase(elems.begin()); + } + } }; } @@ -85,25 +98,22 @@ bool MergeTreePartsMover::selectPartsForMove( const auto & policy = data->getStoragePolicy(); const auto & volumes = policy->getVolumes(); - /// Do not check if policy has one volume - if (volumes.size() == 1) - return false; - - /// Do not check last volume - for (size_t i = 0; i != volumes.size() - 1; ++i) + if (volumes.size() > 0) { - for (const auto & disk : volumes[i]->disks) + /// Do not check last volume + for (size_t i = 0; i != volumes.size() - 1; ++i) { - UInt64 required_maximum_available_space = disk->getTotalSpace() * policy->getMoveFactor(); - UInt64 unreserved_space = disk->getUnreservedSpace(); + for (const auto & disk : volumes[i]->disks) + { + UInt64 required_maximum_available_space = disk->getTotalSpace() * policy->getMoveFactor(); + UInt64 unreserved_space = disk->getUnreservedSpace(); - if (unreserved_space < required_maximum_available_space) - need_to_move.emplace(disk, required_maximum_available_space - unreserved_space); + if (unreserved_space < required_maximum_available_space) + need_to_move.emplace(disk, required_maximum_available_space - unreserved_space); + } } } - auto current_time = time(nullptr); - for (const auto & part : data_parts) { String reason; @@ -111,63 +121,24 @@ bool MergeTreePartsMover::selectPartsForMove( if (!can_move(part, &reason)) continue; - const auto ttl_entries_end = part->storage.move_ttl_entries_by_name.end(); - auto best_ttl_entry_it = ttl_entries_end; - time_t max_min_ttl = 0; - for (auto & [name, ttl_info] : part->ttl_infos.moves_ttl) - { - auto move_ttl_entry_it = part->storage.move_ttl_entries_by_name.find(name); - if (move_ttl_entry_it != part->storage.move_ttl_entries_by_name.end()) - { - if (ttl_info.min > current_time && max_min_ttl < ttl_info.min) - { - best_ttl_entry_it = move_ttl_entry_it; - max_min_ttl = ttl_info.min; - } - } - } - if (best_ttl_entry_it != ttl_entries_end) - { - auto & move_ttl_entry = best_ttl_entry_it->second; - if (move_ttl_entry.destination_type == ASTTTLElement::DestinationType::VOLUME) - { - auto volume_ptr = policy->getVolumeByName(move_ttl_entry.destination_name); - if (volume_ptr) - { - auto reservation = volume_ptr->reserve(part->bytes_on_disk); - if (reservation) - { - parts_to_move.emplace_back(part, std::move(reservation)); - continue; - } - } - else - { - /// FIXME: log warning? - } - } - else if (move_ttl_entry.destination_type == ASTTTLElement::DestinationType::DISK) - { - auto disk_ptr = policy->getDiskByName(move_ttl_entry.destination_name); - if (disk_ptr) - { - auto reservation = disk_ptr->reserve(part->bytes_on_disk); - if (reservation) - { - parts_to_move.emplace_back(part, std::move(reservation)); - continue; - } - } - else - { - /// FIXME: log warning? - } - } - } - + auto reservation = part->storage.tryReserveSpaceOnMoveDestination(part->bytes_on_disk, part->ttl_infos, time(nullptr)); auto to_insert = need_to_move.find(part->disk); - if (to_insert != need_to_move.end()) - to_insert->second.add(part); + if (reservation) + { + parts_to_move.emplace_back(part, std::move(reservation)); + /// If table TTL rule satisfies on this part, won't apply policy rules on it. + /// In order to not over-move, we need to "release" required space on this disk, + /// possibly to zero. + if (to_insert != need_to_move.end()) + { + to_insert->second.decreaseRequiredSize(part->bytes_on_disk); + } + } + else + { + if (to_insert != need_to_move.end()) + to_insert->second.add(part); + } } for (auto && move : need_to_move) diff --git a/dbms/src/Storages/MergeTree/TTLDestinationType.h b/dbms/src/Storages/MergeTree/TTLDestinationType.h new file mode 100644 index 00000000000..5825268d7a4 --- /dev/null +++ b/dbms/src/Storages/MergeTree/TTLDestinationType.h @@ -0,0 +1,14 @@ +#pragma once + + +namespace DB +{ + +enum class TTLDestinationType +{ + DISK, + VOLUME, + DELETE, +}; + +} diff --git a/dbms/src/Storages/PartitionCommands.cpp b/dbms/src/Storages/PartitionCommands.cpp index 62d8b4a488f..c52fc092741 100644 --- a/dbms/src/Storages/PartitionCommands.cpp +++ b/dbms/src/Storages/PartitionCommands.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -47,10 +48,10 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * res.part = command_ast->part; switch (command_ast->move_destination_type) { - case ASTTTLElement::DestinationType::DISK: + case TTLDestinationType::DISK: res.move_destination_type = PartitionCommand::MoveDestinationType::DISK; break; - case ASTTTLElement::DestinationType::VOLUME: + case TTLDestinationType::VOLUME: res.move_destination_type = PartitionCommand::MoveDestinationType::VOLUME; break; default: diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index ee40e254f4d..0de70784328 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -350,9 +350,16 @@ public: /// if we mutate part, than we should reserve space on the same disk, because mutations possible can create hardlinks if (is_mutation) - reserved_space = future_part_.parts[0]->disk->reserve(total_size); + reserved_space = storage.reserveSpaceOnSpecificDisk(total_size, future_part_.parts[0]->disk); else - reserved_space = storage.reserveSpace(total_size); + { + MergeTreeDataPart::TTLInfos ttl_infos; + for (auto & part_ptr : future_part_.parts) + { + ttl_infos.update(part_ptr->ttl_infos); + } + reserved_space = storage.reserveSpacePreferringMoveDestination(total_size, ttl_infos, time(nullptr)); + } if (!reserved_space) { if (is_mutation) diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 61f44628a3b..b1e2fe01a56 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -1005,8 +1005,14 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry) /// Start to make the main work size_t estimated_space_for_merge = MergeTreeDataMergerMutator::estimateNeededDiskSpace(parts); - /// Can throw an exception. - DiskSpace::ReservationPtr reserved_space = reserveSpace(estimated_space_for_merge); + /// Can throw an exception while reserving space. + MergeTreeDataPart::TTLInfos ttl_infos; + for (auto & part_ptr : parts) + { + ttl_infos.update(part_ptr->ttl_infos); + } + DiskSpace::ReservationPtr reserved_space = reserveSpacePreferringMoveDestination(estimated_space_for_merge, + ttl_infos, time(nullptr)); auto table_lock = lockStructureForShare(false, RWLockImpl::NO_QUERY); @@ -1139,14 +1145,9 @@ bool StorageReplicatedMergeTree::tryExecutePartMutation(const StorageReplicatedM entry.new_part_name, format_version); MutationCommands commands = queue.getMutationCommands(source_part, new_part_info.mutation); - /// Can throw an exception. /// Once we mutate part, we must reserve space on the same disk, because mutations can possibly create hardlinks. - DiskSpace::ReservationPtr reserved_space = source_part->disk->reserve(estimated_space_for_result); - if (!reserved_space) - { - throw Exception("Cannot reserve " + formatReadableSizeWithBinarySuffix(estimated_space_for_result) + ", not enough space", - ErrorCodes::NOT_ENOUGH_SPACE); - } + /// Can throw an exception. + DiskSpace::ReservationPtr reserved_space = reserveSpaceOnSpecificDisk(estimated_space_for_result, source_part->disk); auto table_lock = lockStructureForShare(false, RWLockImpl::NO_QUERY); From 773bf7b35ce840637ca3f596f4495f67bdabdcf9 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 31 Oct 2019 16:50:56 +0300 Subject: [PATCH 11/51] Removed 'DELETE' keyword from default TTL syntax description. --- dbms/src/Parsers/ASTTTLElement.cpp | 2 +- dbms/tests/queries/0_stateless/00933_alter_ttl.reference | 2 +- .../0_stateless/00933_ttl_replicated_zookeeper.reference | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Parsers/ASTTTLElement.cpp b/dbms/src/Parsers/ASTTTLElement.cpp index 7dc38ad92c0..c746b957b0f 100644 --- a/dbms/src/Parsers/ASTTTLElement.cpp +++ b/dbms/src/Parsers/ASTTTLElement.cpp @@ -20,7 +20,7 @@ void ASTTTLElement::formatImpl(const FormatSettings & settings, FormatState & st } else if (destination_type == TTLDestinationType::DELETE) { - settings.ostr << " DELETE"; + /// It would be better to output "DELETE" here but that will break compatibility with earlier versions. } } diff --git a/dbms/tests/queries/0_stateless/00933_alter_ttl.reference b/dbms/tests/queries/0_stateless/00933_alter_ttl.reference index 8262932df48..44ba49026a7 100644 --- a/dbms/tests/queries/0_stateless/00933_alter_ttl.reference +++ b/dbms/tests/queries/0_stateless/00933_alter_ttl.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.ttl (`d` Date, `a` Int32) ENGINE = MergeTree PARTITION BY toDayOfMonth(d) ORDER BY a TTL d + toIntervalDay(1) DELETE SETTINGS index_granularity = 8192 +CREATE TABLE default.ttl (`d` Date, `a` Int32) ENGINE = MergeTree PARTITION BY toDayOfMonth(d) ORDER BY a TTL d + toIntervalDay(1) SETTINGS index_granularity = 8192 2100-10-10 3 2100-10-10 4 d Date diff --git a/dbms/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.reference b/dbms/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.reference index 76f44f98311..986bc6b4a24 100644 --- a/dbms/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.reference +++ b/dbms/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.reference @@ -1,3 +1,3 @@ 200 400 -CREATE TABLE test.ttl_repl2 (`d` Date, `x` UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/ttl_repl\', \'2\') PARTITION BY toDayOfMonth(d) ORDER BY x TTL d + toIntervalDay(1) DELETE SETTINGS index_granularity = 8192 +CREATE TABLE test.ttl_repl2 (`d` Date, `x` UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/ttl_repl\', \'2\') PARTITION BY toDayOfMonth(d) ORDER BY x TTL d + toIntervalDay(1) SETTINGS index_granularity = 8192 From 342ea5c30aa42c0930ac8d95aa163d796f18a6a1 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Mon, 11 Nov 2019 08:50:05 +0300 Subject: [PATCH 12/51] Removed TODO comments. --- dbms/src/Parsers/ParserAlterQuery.cpp | 5 ----- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 4 ---- 2 files changed, 9 deletions(-) diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index b097cb72085..2eaba9e1e5f 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -243,8 +243,6 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected else return false; - // FIXME See ParserTTLElement - ASTPtr ast_space_name; if (!parser_string_literal.parse(pos, ast_space_name, expected)) return false; @@ -265,8 +263,6 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected else return false; - // FIXME See ParserTTLElement - ASTPtr ast_space_name; if (!parser_string_literal.parse(pos, ast_space_name, expected)) return false; @@ -437,7 +433,6 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected else if (s_modify_ttl.ignore(pos, expected)) { if (!parser_ttl_list.parse(pos, command->ttl, expected)) -// FIXME check if that is fine, can be `toDate(), toDate() TO DISK 'abc'` and that is not tuple TO DISK 'abc' return false; command->type = ASTAlterCommand::MODIFY_TTL; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index b81661d0362..6ea0d202883 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -641,10 +641,6 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new } } } - - // FIXME: In case of ALTER one need to clean up previous values to actually set expression but not merge them. - // TODO: Check if ALTER MODIFY replaces TTL - // TODO: Check if ALTER MODIFY COLUMN + TTL works well (changing of name works with ttl) } From e519c04bbe363ad57c2b0452e743f4b8d3e62ce2 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Mon, 11 Nov 2019 09:52:38 +0300 Subject: [PATCH 13/51] Minor fix of JSON handling in `MergeTreeDataPartTTLInfo`. --- .../Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp index f6dc9b6fde2..a0b9bb5cf6d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp @@ -35,8 +35,8 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in) JSON json(json_str); if (json.has("columns")) { - JSON columns = json["columns"]; - for (auto col : columns) + const JSON & columns = json["columns"]; + for (const auto & col : columns) { MergeTreeDataPartTTLInfo ttl_info; ttl_info.min = col["min"].getUInt(); @@ -49,7 +49,7 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in) } if (json.has("table")) { - JSON table = json["table"]; + const JSON & table = json["table"]; table_ttl.min = table["min"].getUInt(); table_ttl.max = table["max"].getUInt(); @@ -57,8 +57,8 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in) } if (json.has("moves")) { - JSON moves = json["moves"]; - for (const auto move : moves) + const JSON & moves = json["moves"]; + for (const auto & move : moves) { MergeTreeDataPartTTLInfo ttl_info; ttl_info.min = move["min"].getUInt(); From 3e3e9ac56f487c204650b0b5f8fe9c8546501823 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Mon, 11 Nov 2019 09:53:30 +0300 Subject: [PATCH 14/51] Moved settings for `BackgroundProcessingPool` to configuration. --- dbms/src/Interpreters/Context.cpp | 2 +- .../MergeTree/BackgroundProcessingPool.cpp | 24 +++++++++---------- .../MergeTree/BackgroundProcessingPool.h | 13 ++++++++++ 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index f303356be34..6437707ebeb 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1500,7 +1500,7 @@ BackgroundProcessingPool & Context::getBackgroundPool() { auto lock = getLock(); if (!shared->background_pool) - shared->background_pool.emplace(settings.background_pool_size); + shared->background_pool.emplace(settings.background_pool_size, getConfigRef()); return *shared->background_pool; } diff --git a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp index fa2b81a5eaa..9a4aa1d9dca 100644 --- a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp +++ b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp @@ -23,17 +23,6 @@ namespace CurrentMetrics namespace DB { -static constexpr double thread_sleep_seconds = 10; -static constexpr double thread_sleep_seconds_random_part = 1.0; -static constexpr double thread_sleep_seconds_if_nothing_to_do = 0.1; - -/// For exponential backoff. -static constexpr double task_sleep_seconds_when_no_work_min = 10; -static constexpr double task_sleep_seconds_when_no_work_max = 600; -static constexpr double task_sleep_seconds_when_no_work_multiplier = 1.1; -static constexpr double task_sleep_seconds_when_no_work_random_part = 1.0; - - void BackgroundProcessingPoolTaskInfo::wake() { Poco::Timestamp current_time; @@ -61,13 +50,24 @@ void BackgroundProcessingPoolTaskInfo::wake() } -BackgroundProcessingPool::BackgroundProcessingPool(int size_, const char * log_name, const char * thread_name_) +BackgroundProcessingPool::BackgroundProcessingPool(int size_, + const Poco::Util::AbstractConfiguration & config, + const char * log_name, + const char * thread_name_) : size(size_) , thread_name(thread_name_) { logger = &Logger::get(log_name); LOG_INFO(logger, "Create " << log_name << " with " << size << " threads"); + thread_sleep_seconds = config.getDouble("background_processing_pool_thread_sleep_seconds", 10); + thread_sleep_seconds_random_part = config.getDouble("background_processing_pool_thread_sleep_seconds_random_part", 1.0); + thread_sleep_seconds_if_nothing_to_do = config.getDouble("background_processing_pool_thread_sleep_seconds_if_nothing_to_do", 0.1); + task_sleep_seconds_when_no_work_min = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_min", 10); + task_sleep_seconds_when_no_work_max = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_max", 600); + task_sleep_seconds_when_no_work_multiplier = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_multiplier", 1.1); + task_sleep_seconds_when_no_work_random_part = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_random_part", 1.0); + threads.resize(size); for (auto & thread : threads) thread = ThreadFromGlobalPool([this] { threadFunction(); }); diff --git a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h index 774db582a3e..9929e380f25 100644 --- a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h +++ b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB @@ -47,6 +48,7 @@ public: BackgroundProcessingPool(int size_, + const Poco::Util::AbstractConfiguration & config, const char * log_name = "BackgroundProcessingPool", const char * thread_name_ = "BackgrProcPool"); @@ -84,6 +86,17 @@ protected: ThreadGroupStatusPtr thread_group; void threadFunction(); + +private: + double thread_sleep_seconds; + double thread_sleep_seconds_random_part; + double thread_sleep_seconds_if_nothing_to_do; + + /// For exponential backoff. + double task_sleep_seconds_when_no_work_min; + double task_sleep_seconds_when_no_work_max; + double task_sleep_seconds_when_no_work_multiplier; + double task_sleep_seconds_when_no_work_random_part; }; From 07c4fa3cc0774093ad9dc5f7cea67bd26af8548d Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Sat, 30 Nov 2019 22:35:37 +0300 Subject: [PATCH 15/51] Correct merge of background move pool. --- dbms/src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 6437707ebeb..e8ba306aecb 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1508,7 +1508,7 @@ BackgroundProcessingPool & Context::getBackgroundMovePool() { auto lock = getLock(); if (!shared->background_move_pool) - shared->background_move_pool.emplace(settings.background_move_pool_size, "BackgroundMovePool", "BgMoveProcPool"); + shared->background_move_pool.emplace(settings.background_move_pool_size, getConfigRef(), "BackgroundMovePool", "BgMoveProcPool"); return *shared->background_move_pool; } From 88c722025672bd91e79f9c19446ca605603e98b8 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Mon, 11 Nov 2019 13:01:04 +0300 Subject: [PATCH 16/51] Fixed Clang build. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 6ea0d202883..ea31fe2135d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -580,7 +580,7 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new String result_column = ttl_ast->getColumnName(); checkTTLExpression(expr, result_column); - return {expr, result_column}; + return {expr, result_column, TTLDestinationType::DELETE, {}}; }; if (!new_column_ttls.empty()) @@ -635,8 +635,9 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new auto new_ttl_entry = create_ttl_entry(ttl_element.children[0]); if (!only_check) { - TTLEntry entry{new_ttl_entry.expression, new_ttl_entry.result_column, ttl_element.destination_type, ttl_element.destination_name}; - move_ttl_entries_by_name.emplace(new_ttl_entry.result_column, entry); + new_ttl_entry.destination_type = ttl_element.destination_type; + new_ttl_entry.destination_name = ttl_element.destination_name; + move_ttl_entries_by_name.emplace(new_ttl_entry.result_column, new_ttl_entry); } } } @@ -3129,11 +3130,13 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const namespace { -inline DiskSpace::ReservationPtr throwNotEnoughSpace(UInt64 expected_size) +inline DiskSpace::ReservationPtr returnReservationOrThrowError(UInt64 expected_size, DiskSpace::ReservationPtr reservation) { + if (reservation) + return reservation; + throw Exception("Cannot reserve " + formatReadableSizeWithBinarySuffix(expected_size) + ", not enough space", ErrorCodes::NOT_ENOUGH_SPACE); - return {}; } } @@ -3143,10 +3146,8 @@ DiskSpace::ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size) cons expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); auto reservation = storage_policy->reserve(expected_size); - if (reservation) - return reservation; - return throwNotEnoughSpace(expected_size); + return returnReservationOrThrowError(expected_size, std::move(reservation)); } DiskSpace::ReservationPtr MergeTreeData::reserveSpacePreferringMoveDestination(UInt64 expected_size, @@ -3160,10 +3161,8 @@ DiskSpace::ReservationPtr MergeTreeData::reserveSpacePreferringMoveDestination(U return reservation; reservation = storage_policy->reserve(expected_size); - if (reservation) - return reservation; - return throwNotEnoughSpace(expected_size); + return returnReservationOrThrowError(expected_size, std::move(reservation)); } DiskSpace::ReservationPtr MergeTreeData::tryReserveSpaceOnMoveDestination(UInt64 expected_size, @@ -3216,10 +3215,8 @@ DiskSpace::ReservationPtr MergeTreeData::reserveSpaceOnSpecificDisk(UInt64 expec expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); auto reservation = disk->reserve(expected_size); - if (reservation) - return reservation; - return throwNotEnoughSpace(expected_size); + return returnReservationOrThrowError(expected_size, std::move(reservation)); } MergeTreeData::DataParts MergeTreeData::getDataParts(const DataPartStates & affordable_states) const From ce066d3bb0e35401da23ce9a9b842d169434866e Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Mon, 11 Nov 2019 15:10:48 +0300 Subject: [PATCH 17/51] Style fixes. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- dbms/src/Storages/MergeTree/MergeTreeData.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index ea31fe2135d..7bd9668b4d0 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -3159,7 +3159,7 @@ DiskSpace::ReservationPtr MergeTreeData::reserveSpacePreferringMoveDestination(U auto reservation = tryReserveSpaceOnMoveDestination(expected_size, ttl_infos, minimum_time); if (reservation) return reservation; - + reservation = storage_policy->reserve(expected_size); return returnReservationOrThrowError(expected_size, std::move(reservation)); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 8bc1781c695..5a4fd5ad403 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -21,7 +21,6 @@ #include #include #include -#include #include #include From cc0c21604ff43e52b90b86c9978e6e47db69c3ce Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Tue, 12 Nov 2019 15:37:30 +0300 Subject: [PATCH 18/51] Fixed Clang build one more time. --- dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp index a0b9bb5cf6d..cc8319e48e1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp @@ -36,7 +36,7 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in) if (json.has("columns")) { const JSON & columns = json["columns"]; - for (const auto & col : columns) + for (auto col : columns) { MergeTreeDataPartTTLInfo ttl_info; ttl_info.min = col["min"].getUInt(); @@ -58,7 +58,7 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in) if (json.has("moves")) { const JSON & moves = json["moves"]; - for (const auto & move : moves) + for (auto move : moves) { MergeTreeDataPartTTLInfo ttl_info; ttl_info.min = move["min"].getUInt(); From 62016feab0222c8871c039ae2fbed64af670a56e Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Wed, 20 Nov 2019 11:06:51 +0300 Subject: [PATCH 19/51] Renamed TTLDestinationType to PartDestinationType. --- dbms/src/Parsers/ASTAlterQuery.cpp | 4 ++-- dbms/src/Parsers/ASTAlterQuery.h | 2 +- dbms/src/Parsers/ASTTTLElement.cpp | 6 +++--- dbms/src/Parsers/ASTTTLElement.h | 6 +++--- dbms/src/Parsers/ExpressionElementParsers.cpp | 8 ++++---- dbms/src/Parsers/ParserAlterQuery.cpp | 8 ++++---- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 8 ++++---- dbms/src/Storages/MergeTree/MergeTreeData.h | 4 ++-- .../{TTLDestinationType.h => PartDestinationType.h} | 2 +- dbms/src/Storages/PartitionCommands.cpp | 6 +++--- 10 files changed, 27 insertions(+), 27 deletions(-) rename dbms/src/Storages/MergeTree/{TTLDestinationType.h => PartDestinationType.h} (70%) diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index ce2900942be..f4575623c7e 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -176,10 +176,10 @@ void ASTAlterCommand::formatImpl( settings.ostr << " TO "; switch (move_destination_type) { - case TTLDestinationType::DISK: + case PartDestinationType::DISK: settings.ostr << "DISK "; break; - case TTLDestinationType::VOLUME: + case PartDestinationType::VOLUME: settings.ostr << "VOLUME "; break; default: diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index f3f06fdea98..02e76d5555c 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -129,7 +129,7 @@ public: bool if_exists = false; /// option for DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN - TTLDestinationType move_destination_type; /// option for MOVE PART/PARTITION + PartDestinationType move_destination_type; /// option for MOVE PART/PARTITION String move_destination_name; /// option for MOVE PART/PARTITION diff --git a/dbms/src/Parsers/ASTTTLElement.cpp b/dbms/src/Parsers/ASTTTLElement.cpp index c746b957b0f..5fe401e828a 100644 --- a/dbms/src/Parsers/ASTTTLElement.cpp +++ b/dbms/src/Parsers/ASTTTLElement.cpp @@ -10,15 +10,15 @@ namespace DB void ASTTTLElement::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { children.front()->formatImpl(settings, state, frame); - if (destination_type == TTLDestinationType::DISK) + if (destination_type == PartDestinationType::DISK) { settings.ostr << " TO DISK " << quoteString(destination_name); } - else if (destination_type == TTLDestinationType::VOLUME) + else if (destination_type == PartDestinationType::VOLUME) { settings.ostr << " TO VOLUME " << quoteString(destination_name); } - else if (destination_type == TTLDestinationType::DELETE) + else if (destination_type == PartDestinationType::DELETE) { /// It would be better to output "DELETE" here but that will break compatibility with earlier versions. } diff --git a/dbms/src/Parsers/ASTTTLElement.h b/dbms/src/Parsers/ASTTTLElement.h index 969740e2da3..02f70094e04 100644 --- a/dbms/src/Parsers/ASTTTLElement.h +++ b/dbms/src/Parsers/ASTTTLElement.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB @@ -11,10 +11,10 @@ namespace DB class ASTTTLElement : public IAST { public: - TTLDestinationType destination_type; + PartDestinationType destination_type; String destination_name; - ASTTTLElement(TTLDestinationType destination_type_, const String & destination_name_) + ASTTTLElement(PartDestinationType destination_type_, const String & destination_name_) : destination_type(destination_type_) , destination_name(destination_name_) { diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index 55e3ef42d28..eb77d77a5c8 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -1427,16 +1427,16 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!parser_exp.parse(pos, expr_elem, expected)) return false; - TTLDestinationType destination_type = TTLDestinationType::DELETE; + PartDestinationType destination_type = PartDestinationType::DELETE; String destination_name; if (s_to_disk.ignore(pos)) - destination_type = TTLDestinationType::DISK; + destination_type = PartDestinationType::DISK; else if (s_to_volume.ignore(pos)) - destination_type = TTLDestinationType::VOLUME; + destination_type = PartDestinationType::VOLUME; else s_delete.ignore(pos); - if (destination_type == TTLDestinationType::DISK || destination_type == TTLDestinationType::VOLUME) + if (destination_type == PartDestinationType::DISK || destination_type == PartDestinationType::VOLUME) { ASTPtr ast_space_name; if (!parser_string_literal.parse(pos, ast_space_name, expected)) diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 2eaba9e1e5f..6fb0865d652 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -237,9 +237,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->part = true; if (s_to_disk.ignore(pos)) - command->move_destination_type = TTLDestinationType::DISK; + command->move_destination_type = PartDestinationType::DISK; else if (s_to_volume.ignore(pos)) - command->move_destination_type = TTLDestinationType::VOLUME; + command->move_destination_type = PartDestinationType::VOLUME; else return false; @@ -257,9 +257,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->type = ASTAlterCommand::MOVE_PARTITION; if (s_to_disk.ignore(pos)) - command->move_destination_type = TTLDestinationType::DISK; + command->move_destination_type = PartDestinationType::DISK; else if (s_to_volume.ignore(pos)) - command->move_destination_type = TTLDestinationType::VOLUME; + command->move_destination_type = PartDestinationType::VOLUME; else return false; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 7bd9668b4d0..aa03acdb751 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -580,7 +580,7 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new String result_column = ttl_ast->getColumnName(); checkTTLExpression(expr, result_column); - return {expr, result_column, TTLDestinationType::DELETE, {}}; + return {expr, result_column, PartDestinationType::DELETE, {}}; }; if (!new_column_ttls.empty()) @@ -614,7 +614,7 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new for (auto ttl_element_ptr : new_ttl_table_ast->children) { ASTTTLElement & ttl_element = static_cast(*ttl_element_ptr); - if (ttl_element.destination_type == TTLDestinationType::DELETE) + if (ttl_element.destination_type == PartDestinationType::DELETE) { if (seen_delete_ttl) { @@ -3175,7 +3175,7 @@ DiskSpace::ReservationPtr MergeTreeData::tryReserveSpaceOnMoveDestination(UInt64 if (ttl_entry != nullptr) { DiskSpace::ReservationPtr reservation; - if (ttl_entry->destination_type == TTLDestinationType::VOLUME) + if (ttl_entry->destination_type == PartDestinationType::VOLUME) { auto volume_ptr = storage_policy->getVolumeByName(ttl_entry->destination_name); if (volume_ptr) @@ -3189,7 +3189,7 @@ DiskSpace::ReservationPtr MergeTreeData::tryReserveSpaceOnMoveDestination(UInt64 << log_name << "' but volume was not found"); } } - else if (ttl_entry->destination_type == TTLDestinationType::DISK) + else if (ttl_entry->destination_type == PartDestinationType::DISK) { auto disk_ptr = storage_policy->getDiskByName(ttl_entry->destination_name); if (disk_ptr) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 5a4fd5ad403..2d009bb76b2 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include @@ -729,7 +729,7 @@ public: String result_column; /// Name and type of a destination are only valid in table-level context. - TTLDestinationType destination_type; + PartDestinationType destination_type; String destination_name; }; diff --git a/dbms/src/Storages/MergeTree/TTLDestinationType.h b/dbms/src/Storages/MergeTree/PartDestinationType.h similarity index 70% rename from dbms/src/Storages/MergeTree/TTLDestinationType.h rename to dbms/src/Storages/MergeTree/PartDestinationType.h index 5825268d7a4..0d63051acca 100644 --- a/dbms/src/Storages/MergeTree/TTLDestinationType.h +++ b/dbms/src/Storages/MergeTree/PartDestinationType.h @@ -4,7 +4,7 @@ namespace DB { -enum class TTLDestinationType +enum class PartDestinationType { DISK, VOLUME, diff --git a/dbms/src/Storages/PartitionCommands.cpp b/dbms/src/Storages/PartitionCommands.cpp index c52fc092741..885c6ddaec7 100644 --- a/dbms/src/Storages/PartitionCommands.cpp +++ b/dbms/src/Storages/PartitionCommands.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include @@ -48,10 +48,10 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * res.part = command_ast->part; switch (command_ast->move_destination_type) { - case TTLDestinationType::DISK: + case PartDestinationType::DISK: res.move_destination_type = PartitionCommand::MoveDestinationType::DISK; break; - case TTLDestinationType::VOLUME: + case PartDestinationType::VOLUME: res.move_destination_type = PartitionCommand::MoveDestinationType::VOLUME; break; default: From cb777f6c5080724dd515222baab1e4777be09712 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Wed, 20 Nov 2019 11:08:04 +0300 Subject: [PATCH 20/51] Removed redundant template from `updateTTL()` method. --- dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index bf1425422f3..a7604b106f7 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -75,8 +75,10 @@ void buildScatterSelector( } /// Computes ttls and updates ttl infos -template -void updateTTL(const TTLEntry & ttl_entry, MergeTreeDataPart::TTLInfos & ttl_infos, DB::MergeTreeDataPartTTLInfo & ttl_info, Block & block) +void updateTTL(const MergeTreeData::TTLEntry & ttl_entry, + MergeTreeDataPart::TTLInfos & ttl_infos, + DB::MergeTreeDataPartTTLInfo & ttl_info, + Block & block) { if (!block.has(ttl_entry.result_column)) ttl_entry.expression->execute(block); From 617bf4214fe0c1384d103f4c26d9874b422c4eb4 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Mon, 25 Nov 2019 10:46:58 +0300 Subject: [PATCH 21/51] Added first version of tests for extended TTL expressions (still need refactoring). --- .../integration/test_ttl_move/__init__.py | 0 .../configs/config.d/cluster.xml | 16 + .../configs/config.d/instant_moves.xml | 4 + .../config.d/storage_configuration.xml | 64 +++ .../test_ttl_move/configs/logs_config.xml | 17 + dbms/tests/integration/test_ttl_move/test.py | 530 ++++++++++++++++++ 6 files changed, 631 insertions(+) create mode 100644 dbms/tests/integration/test_ttl_move/__init__.py create mode 100644 dbms/tests/integration/test_ttl_move/configs/config.d/cluster.xml create mode 100644 dbms/tests/integration/test_ttl_move/configs/config.d/instant_moves.xml create mode 100644 dbms/tests/integration/test_ttl_move/configs/config.d/storage_configuration.xml create mode 100644 dbms/tests/integration/test_ttl_move/configs/logs_config.xml create mode 100644 dbms/tests/integration/test_ttl_move/test.py diff --git a/dbms/tests/integration/test_ttl_move/__init__.py b/dbms/tests/integration/test_ttl_move/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_ttl_move/configs/config.d/cluster.xml b/dbms/tests/integration/test_ttl_move/configs/config.d/cluster.xml new file mode 100644 index 00000000000..ec7c9b8e4f8 --- /dev/null +++ b/dbms/tests/integration/test_ttl_move/configs/config.d/cluster.xml @@ -0,0 +1,16 @@ + + + + + + node1 + 9000 + + + node2 + 9000 + + + + + \ No newline at end of file diff --git a/dbms/tests/integration/test_ttl_move/configs/config.d/instant_moves.xml b/dbms/tests/integration/test_ttl_move/configs/config.d/instant_moves.xml new file mode 100644 index 00000000000..ac5005061e9 --- /dev/null +++ b/dbms/tests/integration/test_ttl_move/configs/config.d/instant_moves.xml @@ -0,0 +1,4 @@ + + 0.5 + 0.5 + diff --git a/dbms/tests/integration/test_ttl_move/configs/config.d/storage_configuration.xml b/dbms/tests/integration/test_ttl_move/configs/config.d/storage_configuration.xml new file mode 100644 index 00000000000..454b78ec216 --- /dev/null +++ b/dbms/tests/integration/test_ttl_move/configs/config.d/storage_configuration.xml @@ -0,0 +1,64 @@ + + + + + + + + /jbod1/ + + + /jbod2/ + + + /external/ + + + + + + + + external + +
+ jbod1 + jbod2 +
+
+
+ + + +
+ jbod1 +
+ + external + +
+
+ + + +
+ jbod1 +
+ + jbod2 + +
+
+ + + +
+ jbod2 +
+
+
+
+ +
+ +
diff --git a/dbms/tests/integration/test_ttl_move/configs/logs_config.xml b/dbms/tests/integration/test_ttl_move/configs/logs_config.xml new file mode 100644 index 00000000000..bdf1bbc11c1 --- /dev/null +++ b/dbms/tests/integration/test_ttl_move/configs/logs_config.xml @@ -0,0 +1,17 @@ + + 3 + + trace + /var/log/clickhouse-server/log.log + /var/log/clickhouse-server/log.err.log + 1000M + 10 + /var/log/clickhouse-server/stderr.log + /var/log/clickhouse-server/stdout.log + + + system + part_log
+ 500 +
+
diff --git a/dbms/tests/integration/test_ttl_move/test.py b/dbms/tests/integration/test_ttl_move/test.py new file mode 100644 index 00000000000..c4c9b498483 --- /dev/null +++ b/dbms/tests/integration/test_ttl_move/test.py @@ -0,0 +1,530 @@ +import json +import pytest +import random +import re +import string +import time +from multiprocessing.dummy import Pool +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance('node1', + config_dir='configs', + main_configs=['configs/logs_config.xml'], + with_zookeeper=True, + tmpfs=['/jbod1:size=40M', '/jbod2:size=40M', '/external:size=200M'], + macros={"shard": 0, "replica": 1} ) + +node2 = cluster.add_instance('node2', + config_dir='configs', + main_configs=['configs/logs_config.xml'], + with_zookeeper=True, + tmpfs=['/jbod1:size=40M', '/jbod2:size=40M', '/external:size=200M'], + macros={"shard": 0, "replica": 2} ) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def get_random_string(length): + return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(length)) + + +def get_used_disks_for_table(node, table_name): + return node.query("select disk_name from system.parts where table == '{}' and active=1 order by modification_time".format(table_name)).strip().split('\n') + + +@pytest.mark.parametrize("name,engine", [ + ("mt_test_inserts_to_disk_work","MergeTree()"), + ("replicated_mt_test_inserts_to_disk_work","ReplicatedMergeTree('/clickhouse/replicated_test_inserts_to_disk_work', '1')",), +]) +def test_inserts_to_disk_work(started_cluster, name, engine): + try: + node1.query(""" + CREATE TABLE {name} ( + s1 String, + d1 DateTime DEFAULT now() + ) ENGINE = {engine} + ORDER BY tuple() + TTL toDateTime({time}) + toInt64(d1)*0 TO DISK 'external' + SETTINGS storage_policy='small_jbod_with_external' + """.format(name=name, engine=engine, time=time.time()-2)) + + data = [] # 10MB in total + for i in range(10): + data.append(get_random_string(1024 * 1024)) # 1MB row + + node1.query("INSERT INTO {} (s1) VALUES {}".format(name, ','.join(["('" + x + "')" for x in data]))) + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'external'} + + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 10 + + finally: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + + +@pytest.mark.parametrize("name,engine", [ + ("mt_test_inserts_to_disk_do_not_work","MergeTree()"), + ("replicated_mt_test_inserts_to_disk_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_inserts_to_disk_do_not_work', '1')",), +]) +def test_inserts_to_disk_do_not_work(started_cluster, name, engine): + try: + node1.query(""" + CREATE TABLE {name} ( + s1 String, + d1 DateTime + ) ENGINE = {engine} + ORDER BY tuple() + TTL d1 TO DISK 'external' + SETTINGS storage_policy='small_jbod_with_external' + """.format(name=name, engine=engine)) + + data = [] # 10MB in total + for i in range(10): + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()-2 if i > 0 else time.time()+2))) # 1MB row + + node1.query("INSERT INTO {} VALUES {}".format(name, ','.join(["(" + ",".join(x) + ")" for x in data]))) + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'jbod1'} + + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 10 + + finally: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + + +@pytest.mark.parametrize("name,engine", [ + ("mt_test_moves_to_disk_work","MergeTree()"), + ("replicated_mt_test_moves_to_disk_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_disk_work', '1')",), +]) +def test_moves_to_disk_work(started_cluster, name, engine): + try: + node1.query(""" + CREATE TABLE {name} ( + s1 String, + d1 DateTime DEFAULT now() + ) ENGINE = {engine} + ORDER BY tuple() + TTL toDateTime({time}) + toInt64(d1)*0 TO DISK 'external' + SETTINGS storage_policy='small_jbod_with_external' + """.format(name=name, engine=engine, time=time.time()+2)) + + data = [] # 10MB in total + for i in range(10): + data.append(get_random_string(1024 * 1024)) # 1MB row + + node1.query("INSERT INTO {} (s1) VALUES {}".format(name, ','.join(["('" + x + "')" for x in data]))) + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'jbod1'} + + time.sleep(4) + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'external'} + + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 10 + + finally: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + + +@pytest.mark.parametrize("name,engine", [ + ("mt_test_moves_to_disk_do_not_work","MergeTree()"), + ("replicated_mt_test_moves_to_disk_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_disk_do_not_work', '1')",), +]) +def test_moves_to_disk_do_not_work(started_cluster, name, engine): + try: + node1.query(""" + CREATE TABLE {name} ( + s1 String, + d1 DateTime + ) ENGINE = {engine} + ORDER BY tuple() + TTL d1 TO DISK 'external' + SETTINGS storage_policy='small_jbod_with_external' + """.format(name=name, engine=engine, time=time.time()+2)) + + data = [] # 10MB in total + for i in range(10): + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2 if i > 0 else time.time()+6))) # 1MB row + + node1.query("INSERT INTO {} (s1) VALUES {}".format(name, ','.join(["('" + x + "')" for x in data]))) + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'jbod1'} + + time.sleep(4) + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'jbod1'} + + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 10 + + finally: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + + +@pytest.mark.parametrize("name,engine", [ + ("mt_test_moves_to_volume_work","MergeTree()"), + ("replicated_mt_test_moves_to_volume_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_volume_work', '1')",), +]) +def test_moves_to_volume_work(started_cluster, name, engine): + try: + node1.query(""" + CREATE TABLE {name} ( + p1 Int64, + s1 String, + d1 DateTime DEFAULT now() + ) ENGINE = {engine} + ORDER BY tuple() + PARTITION BY p1 + TTL toDateTime({time}) + toInt64(d1)*0 TO VOLUME 'main' + SETTINGS storage_policy='external_with_jbods' + """.format(name=name, engine=engine, time=time.time()+2)) + + for _ in range(2): + data = [] # 10MB in total + for i in range(10): + data.append((p, "'{}'".format(get_random_string(1024 * 1024)))) # 1MB row + + node1.query("INSERT INTO {} (p1, s1) VALUES {}".format(name, ','.join(["(" + ",".join(x) + ")" for x in data]))) + + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'jbod1', 'jbod2'} + + time.sleep(4) + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'external'} + + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 20 + + finally: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + + +@pytest.mark.parametrize("name,engine", [ + ("mt_test_inserts_to_volume_work","MergeTree()"), + ("replicated_mt_test_inserts_to_volume_work","ReplicatedMergeTree('/clickhouse/replicated_test_inserts_to_volume_work', '1')",), +]) +def test_inserts_to_volume_work(started_cluster, name, engine): + try: + node1.query(""" + CREATE TABLE {name} ( + p1 Int64, + s1 String, + d1 DateTime DEFAULT now() + ) ENGINE = {engine} + ORDER BY tuple() + PARTITION BY p1 + TTL toDateTime({time}) + toInt64(d1)*0 TO VOLUME 'main' + SETTINGS storage_policy='external_with_jbods' + """.format(name=name, engine=engine, time=time.time()-2)) + + for _ in range(2): + data = [] # 10MB in total + for i in range(10): + data.append((p, "'{}'".format(get_random_string(1024 * 1024)))) # 1MB row + + node1.query("INSERT INTO {} (p1, s1) VALUES {}".format(name, ','.join(["(" + ",".join(x) + ")" for x in data]))) + + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'external'} + + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 20 + + finally: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + + +@pytest.mark.parametrize("name,engine", [ + ("mt_test_moves_to_disk_eventually_work","MergeTree()"), + ("replicated_mt_test_moves_to_disk_eventually_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_disk_eventually_work', '1')",), +]) +def test_moves_to_disk_eventually_work(started_cluster, name, engine): + try: + name_temp = name + "_temp" + + node1.query(""" + CREATE TABLE {name} ( + s1 String + ) ENGINE = {engine} + ORDER BY tuple() + SETTINGS storage_policy='only_jbod2' + """.format(name=name_temp, engine=engine)) + + data = [] # 35MB in total + for i in range(35): + data.append(get_random_string(1024 * 1024)) # 1MB row + + node1.query("INSERT INTO {} VALUES {}".format(name_temp, ','.join(["('" + x + "')" for x in data]))) + used_disks = get_used_disks_for_table(node1, name_temp) + assert set(used_disks) == {'jbod2'} + + node1.query(""" + CREATE TABLE {name} ( + s1 String, + d1 DateTime DEFAULT now() + ) ENGINE = {engine} + ORDER BY tuple() + TTL toDateTime({time}) + toInt64(d1)*0 TO DISK 'jbod2' + SETTINGS storage_policy='jbod1_with_jbod2' + """.format(name=name, engine=engine, time=time.time()-2)) + + data = [] # 10MB in total + for i in range(10): + data.append(get_random_string(1024 * 1024)) # 1MB row + + node1.query("INSERT INTO {} (s1) VALUES {}".format(name, ','.join(["('" + x + "')" for x in data]))) + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'jbod1'} + + node1.query("DROP TABLE {}".format(name_temp)) + + time.sleep(2) + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'jbod2'} + + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 10 + + finally: + node1.query("DROP TABLE IF EXISTS {}".format(name_temp)) + node1.query("DROP TABLE IF EXISTS {}".format(name)) + + +@pytest.mark.parametrize("name,engine", [ + ("mt_test_merges_to_disk_work","MergeTree()"), + ("replicated_mt_test_merges_to_disk_work","ReplicatedMergeTree('/clickhouse/replicated_test_merges_to_disk_work', '1')",), +]) +def test_merges_to_disk_work(started_cluster, name, engine): + try: + node1.query(""" + CREATE TABLE {name} ( + s1 String, + d1 DateTime DEFAULT now() + ) ENGINE = {engine} + ORDER BY tuple() + TTL toDateTime({time}) + toInt64(d1)*0 TO DISK 'external' + SETTINGS storage_policy='small_jbod_with_external' + """.format(name=name, engine=engine, time=time.time()+2)) + + node1.query("SYSTEM STOP MERGES {}".format(name)) + node1.query("SYSTEM STOP MOVES {}".format(name)) + + for _ in range(2): + data = [] # 16MB in total + for i in range(8): + data.append(get_random_string(1024 * 1024)) # 1MB row + + node1.query("INSERT INTO {} (s1) VALUES {}".format(name, ','.join(["('" + x + "')" for x in data]))) + + time.sleep(4) + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'jbod1'} + assert "2" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).count() + + node1.query("SYSTEM START MERGES {}".format(name)) + node1.query("OPTIMIZE TABLE {}".format(name)) + + time.sleep(1) + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'external'} + assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).count() + + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 16 + + finally: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + + +@pytest.mark.parametrize("name,engine", [ + ("mt_test_merges_to_full_disk_work","MergeTree()"), + ("replicated_mt_test_merges_to_full_disk_work","ReplicatedMergeTree('/clickhouse/replicated_test_merges_to_full_disk_work', '1')",), +]) +def test_merges_to_full_disk_work(started_cluster, name, engine): + try: + name_temp = name + "_temp" + + node1.query(""" + CREATE TABLE {name} ( + s1 String + ) ENGINE = {engine} + ORDER BY tuple() + SETTINGS storage_policy='only_jbod2' + """.format(name=name_temp, engine=engine)) + + data = [] # 35MB in total + for i in range(35): + data.append(get_random_string(1024 * 1024)) # 1MB row + + node1.query("INSERT INTO {} VALUES {}".format(name_temp, ','.join(["('" + x + "')" for x in data]))) + used_disks = get_used_disks_for_table(node1, name_temp) + assert set(used_disks) == {'jbod2'} + + node1.query(""" + CREATE TABLE {name} ( + s1 String, + d1 DateTime DEFAULT now() + ) ENGINE = {engine} + ORDER BY tuple() + TTL toDateTime({time}) + toInt64(d1)*0 TO DISK 'external' + SETTINGS storage_policy='small_jbod_with_external' + """.format(name=name, engine=engine, time=time.time()+2)) + + node1.query("SYSTEM STOP MOVES {}".format(name)) + + for _ in range(2): + data = [] # 10MB in total + for i in range(8): + data.append(get_random_string(1024 * 1024)) # 1MB row + + node1.query("INSERT INTO {} (s1) VALUES {}".format(name, ','.join(["('" + x + "')" for x in data]))) + + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'jbod1'} + assert "2" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).count() + + time.sleep(4) + node1.query("OPTIMIZE TABLE {}".format(name)) + + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'jbod1'} # Merged to the same disk against the rule. + assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).count() + + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 16 + + finally: + node1.query("DROP TABLE IF EXISTS {}".format(name_temp)) + node1.query("DROP TABLE IF EXISTS {}".format(name)) + +@pytest.mark.parametrize("name,engine", [ + ("mt_test_moves_after_merges_work","MergeTree()"), + ("replicated_mt_test_moves_after_merges_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_after_merges_work', '1')",), +]) +def test_moves_after_merges_work(started_cluster, name, engine): + try: + node1.query(""" + CREATE TABLE {name} ( + s1 String, + d1 DateTime DEFAULT now() + ) ENGINE = {engine} + ORDER BY tuple() + TTL toDateTime({time}) + toInt64(d1)*0 TO DISK 'external' + SETTINGS storage_policy='small_jbod_with_external' + """.format(name=name, engine=engine, time=time.time()+2)) + + for _ in range(2): + data = [] # 16MB in total + for i in range(8): + data.append(get_random_string(1024 * 1024)) # 1MB row + + node1.query("INSERT INTO {} (s1) VALUES {}".format(name, ','.join(["('" + x + "')" for x in data]))) + + node1.query("OPTIMIZE TABLE {}".format(name)) + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'jbod1'} + assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).count() + + time.sleep(4) + + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'external'} + + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 16 + + finally: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + + +@pytest.mark.parametrize("name,engine", [ + ("mt_test_merges_to_disk_do_not_work","MergeTree()"), + ("replicated_mt_test_merges_to_disk_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_merges_to_disk_do_not_work', '1')",), +]) +def test_merges_to_disk_do_not_work(started_cluster, name, engine): + try: + node1.query(""" + CREATE TABLE {name} ( + s1 String, + d1 DateTime + ) ENGINE = {engine} + ORDER BY tuple() + TTL d1 TO DISK 'external' + SETTINGS storage_policy='small_jbod_with_external' + """.format(name=name, engine=engine)) + + node1.query("SYSTEM STOP MERGES {}".format(name)) + node1.query("SYSTEM STOP MOVES {}".format(name)) + + for _ in range(2): + data = [] # 16MB in total + for i in range(8): + data.append(("'{}'".format(get_random_string(1024 * 1024)), 'toDateTime({})'.format(time.time()+2 if i > 0 else time.time()+7))) # 1MB row + + node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ','.join(["(" + ",".join(x) + ")" for x in data]))) + + time.sleep(4) + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'jbod1'} + assert "2" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).count() + + node1.query("SYSTEM START MERGES {}".format(name)) + node1.query("OPTIMIZE TABLE {}".format(name)) + + time.sleep(1) + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'jbod1'} + assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).count() + + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 16 + + finally: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + + +@pytest.mark.parametrize("name,engine", [ + ("mt_test_moves_after_merges_do_not_work","MergeTree()"), + ("replicated_mt_test_moves_after_merges_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_after_merges_do_not_work', '1')",), +]) +def test_moves_after_merges_do_not_work(started_cluster, name, engine): + try: + node1.query(""" + CREATE TABLE {name} ( + s1 String, + d1 DateTime + ) ENGINE = {engine} + ORDER BY tuple() + TTL d1 TO DISK 'external' + SETTINGS storage_policy='small_jbod_with_external' + """.format(name=name, engine=engine, time=time.time()+2)) + + for _ in range(2): + data = [] # 16MB in total + for i in range(8): + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2 if i > 0 else time.time()+6))) # 1MB row + + node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ','.join(["(" + ",".join(x) + ")" for x in data]))) + + node1.query("OPTIMIZE TABLE {}".format(name)) + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'jbod1'} + assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).count() + + time.sleep(4) + + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {'jbod1'} + + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 16 + + finally: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + +# FIXME refactor _do_not tests into main ones From cc70f717ed7101b2166844aec08d90f67800e692 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Tue, 26 Nov 2019 10:48:25 +0300 Subject: [PATCH 22/51] Fixed typos in `test_ttl_move` test. --- dbms/tests/integration/test_ttl_move/test.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/dbms/tests/integration/test_ttl_move/test.py b/dbms/tests/integration/test_ttl_move/test.py index c4c9b498483..946b07b67c3 100644 --- a/dbms/tests/integration/test_ttl_move/test.py +++ b/dbms/tests/integration/test_ttl_move/test.py @@ -329,7 +329,7 @@ def test_merges_to_disk_work(started_cluster, name, engine): time.sleep(4) used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {'jbod1'} - assert "2" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).count() + assert "2" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() node1.query("SYSTEM START MERGES {}".format(name)) node1.query("OPTIMIZE TABLE {}".format(name)) @@ -337,7 +337,7 @@ def test_merges_to_disk_work(started_cluster, name, engine): time.sleep(1) used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {'external'} - assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).count() + assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 16 @@ -390,14 +390,14 @@ def test_merges_to_full_disk_work(started_cluster, name, engine): used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {'jbod1'} - assert "2" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).count() + assert "2" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() time.sleep(4) node1.query("OPTIMIZE TABLE {}".format(name)) used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {'jbod1'} # Merged to the same disk against the rule. - assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).count() + assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 16 @@ -431,7 +431,7 @@ def test_moves_after_merges_work(started_cluster, name, engine): node1.query("OPTIMIZE TABLE {}".format(name)) used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {'jbod1'} - assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).count() + assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() time.sleep(4) @@ -473,7 +473,7 @@ def test_merges_to_disk_do_not_work(started_cluster, name, engine): time.sleep(4) used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {'jbod1'} - assert "2" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).count() + assert "2" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() node1.query("SYSTEM START MERGES {}".format(name)) node1.query("OPTIMIZE TABLE {}".format(name)) @@ -481,7 +481,7 @@ def test_merges_to_disk_do_not_work(started_cluster, name, engine): time.sleep(1) used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {'jbod1'} - assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).count() + assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 16 @@ -515,7 +515,7 @@ def test_moves_after_merges_do_not_work(started_cluster, name, engine): node1.query("OPTIMIZE TABLE {}".format(name)) used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {'jbod1'} - assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).count() + assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() time.sleep(4) From e163ebb7f090f617ede639d71e6571e9d0c3d534 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Tue, 26 Nov 2019 10:51:59 +0300 Subject: [PATCH 23/51] Fixed more typos in `test_ttl_move` test. --- dbms/tests/integration/test_ttl_move/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/tests/integration/test_ttl_move/test.py b/dbms/tests/integration/test_ttl_move/test.py index 946b07b67c3..1ff42eef331 100644 --- a/dbms/tests/integration/test_ttl_move/test.py +++ b/dbms/tests/integration/test_ttl_move/test.py @@ -192,7 +192,7 @@ def test_moves_to_volume_work(started_cluster, name, engine): """.format(name=name, engine=engine, time=time.time()+2)) for _ in range(2): - data = [] # 10MB in total + data = [] # 20MB in total for i in range(10): data.append((p, "'{}'".format(get_random_string(1024 * 1024)))) # 1MB row @@ -230,7 +230,7 @@ def test_inserts_to_volume_work(started_cluster, name, engine): """.format(name=name, engine=engine, time=time.time()-2)) for _ in range(2): - data = [] # 10MB in total + data = [] # 20MB in total for i in range(10): data.append((p, "'{}'".format(get_random_string(1024 * 1024)))) # 1MB row @@ -382,7 +382,7 @@ def test_merges_to_full_disk_work(started_cluster, name, engine): node1.query("SYSTEM STOP MOVES {}".format(name)) for _ in range(2): - data = [] # 10MB in total + data = [] # 16MB in total for i in range(8): data.append(get_random_string(1024 * 1024)) # 1MB row From ce851fc782fcfb70ca3dd78846807c88395db8d3 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Tue, 26 Nov 2019 11:02:48 +0300 Subject: [PATCH 24/51] Fixed `MergeTreeData::hasTableTTL()` method according to extended TTLs. --- dbms/src/DataStreams/TTLBlockInputStream.cpp | 4 ++-- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 8 ++++---- dbms/src/Storages/MergeTree/MergeTreeData.h | 6 +++--- .../Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp | 9 ++++++--- dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp | 10 +++++----- 5 files changed, 20 insertions(+), 17 deletions(-) diff --git a/dbms/src/DataStreams/TTLBlockInputStream.cpp b/dbms/src/DataStreams/TTLBlockInputStream.cpp index 884318b4b06..350b008505e 100644 --- a/dbms/src/DataStreams/TTLBlockInputStream.cpp +++ b/dbms/src/DataStreams/TTLBlockInputStream.cpp @@ -147,7 +147,7 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) defaults_expression->execute(block_with_defaults); } - for (const auto & [name, ttl_entry] : storage.ttl_entries_by_name) + for (const auto & [name, ttl_entry] : storage.column_ttl_entries_by_name) { const auto & old_ttl_info = old_ttl_infos.columns_ttl[name]; auto & new_ttl_info = new_ttl_infos.columns_ttl[name]; @@ -194,7 +194,7 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) column_with_type.column = std::move(result_column); } - for (const auto & elem : storage.ttl_entries_by_name) + for (const auto & elem : storage.column_ttl_entries_by_name) if (block.has(elem.second.result_column)) block.erase(elem.second.result_column); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index aa03acdb751..cdbd17b6e90 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -598,12 +598,12 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new for (const auto & [name, ast] : new_column_ttls) { if (columns_ttl_forbidden.count(name)) - throw Exception("Trying to set ttl for key column " + name, ErrorCodes::ILLEGAL_COLUMN); + throw Exception("Trying to set TTL for key column " + name, ErrorCodes::ILLEGAL_COLUMN); else { auto new_ttl_entry = create_ttl_entry(ast); if (!only_check) - ttl_entries_by_name.emplace(name, new_ttl_entry); + column_ttl_entries_by_name.emplace(name, new_ttl_entry); } } } @@ -3729,9 +3729,9 @@ const MergeTreeData::TTLEntry * MergeTreeData::selectMoveDestination( /// Prefer TTL rule which went into action last. time_t max_min_ttl = 0; - for (const auto & [expression, ttl_entry] : move_ttl_entries_by_name) + for (const auto & [name, ttl_entry] : move_ttl_entries_by_name) { - auto ttl_info_it = ttl_infos.moves_ttl.find(expression); + auto ttl_info_it = ttl_infos.moves_ttl.find(name); if (ttl_info_it != ttl_infos.moves_ttl.end() && ttl_info_it->second.min >= minimum_time && max_min_ttl <= ttl_info_it->second.min) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 2d009bb76b2..03836639343 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -587,8 +587,8 @@ public: bool hasSortingKey() const { return !sorting_key_columns.empty(); } bool hasPrimaryKey() const { return !primary_key_columns.empty(); } bool hasSkipIndices() const { return !skip_indices.empty(); } - bool hasTableTTL() const { return ttl_table_ast != nullptr; } - bool hasAnyColumnTTL() const { return !ttl_entries_by_name.empty(); } + bool hasTableTTL() const { return ttl_table_entry.expression != nullptr; } + bool hasAnyColumnTTL() const { return !column_ttl_entries_by_name.empty(); } /// Check that the part is not broken and calculate the checksums for it if they are not present. MutableDataPartPtr loadPartAndFixMetadata(const DiskSpace::DiskPtr & disk, const String & relative_path); @@ -734,7 +734,7 @@ public: }; using TTLEntriesByName = std::unordered_map; - TTLEntriesByName ttl_entries_by_name; + TTLEntriesByName column_ttl_entries_by_name; TTLEntry ttl_table_entry; TTLEntriesByName move_ttl_entries_by_name; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp index cc8319e48e1..8075f8f62d4 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp @@ -16,13 +16,16 @@ void MergeTreeDataPartTTLInfos::update(const MergeTreeDataPartTTLInfos & other_i updatePartMinMaxTTL(ttl_info.min, ttl_info.max); } - table_ttl.update(other_infos.table_ttl); - updatePartMinMaxTTL(table_ttl.min, table_ttl.max); - for (const auto & [expression, ttl_info] : other_infos.moves_ttl) { moves_ttl[expression].update(ttl_info); + updatePartMinMaxTTL(ttl_info.min, ttl_info.max); + /// FIXME: Possibly one need another logic here, because move TTL may spoil part min/max TTL + /// in this case we also need to skip updating part min/max in `updateTTL` method } + + table_ttl.update(other_infos.table_ttl); + updatePartMinMaxTTL(table_ttl.min, table_ttl.max); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index a7604b106f7..cb0ef57ac85 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -115,6 +115,8 @@ void updateTTL(const MergeTreeData::TTLEntry & ttl_entry, throw Exception("Unexpected type of result TTL column", ErrorCodes::LOGICAL_ERROR); ttl_infos.updatePartMinMaxTTL(ttl_info.min, ttl_info.max); + + /// FIXME why we don't erase new column from block? } } @@ -217,10 +219,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa size_t expected_size = block.bytes(); DB::MergeTreeDataPart::TTLInfos move_ttl_infos; - for (const auto & [expression, ttl_entry] : data.move_ttl_entries_by_name) - { - updateTTL(ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[expression], block); - } + for (const auto & [name, ttl_entry] : data.move_ttl_entries_by_name) + updateTTL(ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[name], block); DiskSpace::ReservationPtr reservation = data.reserveSpacePreferringMoveDestination(expected_size, move_ttl_infos, time(nullptr)); @@ -275,7 +275,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa if (data.hasTableTTL()) updateTTL(data.ttl_table_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block); - for (const auto & [name, ttl_entry] : data.ttl_entries_by_name) + for (const auto & [name, ttl_entry] : data.column_ttl_entries_by_name) updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block); new_data_part->ttl_infos.update(move_ttl_infos); From b3e261800d29fa56e88766a8934de0f033b72755 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Tue, 26 Nov 2019 11:52:55 +0300 Subject: [PATCH 25/51] Fixes of tests for extended TTL syntax. --- dbms/tests/integration/test_ttl_move/test.py | 140 +++++++++---------- 1 file changed, 70 insertions(+), 70 deletions(-) diff --git a/dbms/tests/integration/test_ttl_move/test.py b/dbms/tests/integration/test_ttl_move/test.py index 1ff42eef331..b4dbff6fa5c 100644 --- a/dbms/tests/integration/test_ttl_move/test.py +++ b/dbms/tests/integration/test_ttl_move/test.py @@ -54,20 +54,20 @@ def test_inserts_to_disk_work(started_cluster, name, engine): node1.query(""" CREATE TABLE {name} ( s1 String, - d1 DateTime DEFAULT now() + d1 DateTime ) ENGINE = {engine} ORDER BY tuple() - TTL toDateTime({time}) + toInt64(d1)*0 TO DISK 'external' + TTL d1 TO DISK 'external' SETTINGS storage_policy='small_jbod_with_external' - """.format(name=name, engine=engine, time=time.time()-2)) + """.format(name=name, engine=engine)) data = [] # 10MB in total for i in range(10): - data.append(get_random_string(1024 * 1024)) # 1MB row + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()-2))) # 1MB row - node1.query("INSERT INTO {} (s1) VALUES {}".format(name, ','.join(["('" + x + "')" for x in data]))) + node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'external'} + assert set(used_disks) == {"external"} assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 10 @@ -95,9 +95,9 @@ def test_inserts_to_disk_do_not_work(started_cluster, name, engine): for i in range(10): data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()-2 if i > 0 else time.time()+2))) # 1MB row - node1.query("INSERT INTO {} VALUES {}".format(name, ','.join(["(" + ",".join(x) + ")" for x in data]))) + node1.query("INSERT INTO {} VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'jbod1'} + assert set(used_disks) == {"jbod1"} assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 10 @@ -114,24 +114,24 @@ def test_moves_to_disk_work(started_cluster, name, engine): node1.query(""" CREATE TABLE {name} ( s1 String, - d1 DateTime DEFAULT now() + d1 DateTime ) ENGINE = {engine} ORDER BY tuple() - TTL toDateTime({time}) + toInt64(d1)*0 TO DISK 'external' + TTL d1 TO DISK 'external' SETTINGS storage_policy='small_jbod_with_external' - """.format(name=name, engine=engine, time=time.time()+2)) + """.format(name=name, engine=engine)) data = [] # 10MB in total for i in range(10): - data.append(get_random_string(1024 * 1024)) # 1MB row + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2))) # 1MB row - node1.query("INSERT INTO {} (s1) VALUES {}".format(name, ','.join(["('" + x + "')" for x in data]))) + node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'jbod1'} + assert set(used_disks) == {"jbod1"} time.sleep(4) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'external'} + assert set(used_disks) == {"external"} assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 10 @@ -159,13 +159,13 @@ def test_moves_to_disk_do_not_work(started_cluster, name, engine): for i in range(10): data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2 if i > 0 else time.time()+6))) # 1MB row - node1.query("INSERT INTO {} (s1) VALUES {}".format(name, ','.join(["('" + x + "')" for x in data]))) + node1.query("INSERT INTO {} (s1) VALUES {}".format(name, ",".join(["('" + x + "')" for x in data]))) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'jbod1'} + assert set(used_disks) == {"jbod1"} time.sleep(4) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'jbod1'} + assert set(used_disks) == {"jbod1"} assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 10 @@ -183,27 +183,27 @@ def test_moves_to_volume_work(started_cluster, name, engine): CREATE TABLE {name} ( p1 Int64, s1 String, - d1 DateTime DEFAULT now() + d1 DateTime ) ENGINE = {engine} ORDER BY tuple() PARTITION BY p1 - TTL toDateTime({time}) + toInt64(d1)*0 TO VOLUME 'main' + TTL d1 TO VOLUME 'main' SETTINGS storage_policy='external_with_jbods' - """.format(name=name, engine=engine, time=time.time()+2)) + """.format(name=name, engine=engine)) - for _ in range(2): + for p in range(2): data = [] # 20MB in total for i in range(10): - data.append((p, "'{}'".format(get_random_string(1024 * 1024)))) # 1MB row + data.append((p, "'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2))) # 1MB row - node1.query("INSERT INTO {} (p1, s1) VALUES {}".format(name, ','.join(["(" + ",".join(x) + ")" for x in data]))) + node1.query("INSERT INTO {} (p1, s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {'jbod1', 'jbod2'} time.sleep(4) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'external'} + assert set(used_disks) == {"external"} assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 20 @@ -221,23 +221,23 @@ def test_inserts_to_volume_work(started_cluster, name, engine): CREATE TABLE {name} ( p1 Int64, s1 String, - d1 DateTime DEFAULT now() + d1 DateTime ) ENGINE = {engine} ORDER BY tuple() PARTITION BY p1 - TTL toDateTime({time}) + toInt64(d1)*0 TO VOLUME 'main' + TTL d1 TO VOLUME 'main' SETTINGS storage_policy='external_with_jbods' - """.format(name=name, engine=engine, time=time.time()-2)) + """.format(name=name, engine=engine)) - for _ in range(2): + for p in range(2): data = [] # 20MB in total for i in range(10): - data.append((p, "'{}'".format(get_random_string(1024 * 1024)))) # 1MB row + data.append((p, "'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()-2))) # 1MB row - node1.query("INSERT INTO {} (p1, s1) VALUES {}".format(name, ','.join(["(" + ",".join(x) + ")" for x in data]))) + node1.query("INSERT INTO {} (p1, s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'external'} + assert set(used_disks) == {"external"} assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 20 @@ -265,33 +265,33 @@ def test_moves_to_disk_eventually_work(started_cluster, name, engine): for i in range(35): data.append(get_random_string(1024 * 1024)) # 1MB row - node1.query("INSERT INTO {} VALUES {}".format(name_temp, ','.join(["('" + x + "')" for x in data]))) + node1.query("INSERT INTO {} VALUES {}".format(name_temp, ",".join(["('" + x + "')" for x in data]))) used_disks = get_used_disks_for_table(node1, name_temp) - assert set(used_disks) == {'jbod2'} + assert set(used_disks) == {"jbod2"} node1.query(""" CREATE TABLE {name} ( s1 String, - d1 DateTime DEFAULT now() + d1 DateTime ) ENGINE = {engine} ORDER BY tuple() - TTL toDateTime({time}) + toInt64(d1)*0 TO DISK 'jbod2' + TTL d1 TO DISK 'jbod2' SETTINGS storage_policy='jbod1_with_jbod2' - """.format(name=name, engine=engine, time=time.time()-2)) + """.format(name=name, engine=engine)) data = [] # 10MB in total for i in range(10): - data.append(get_random_string(1024 * 1024)) # 1MB row + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()-2))) # 1MB row - node1.query("INSERT INTO {} (s1) VALUES {}".format(name, ','.join(["('" + x + "')" for x in data]))) + node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'jbod1'} + assert set(used_disks) == {"jbod1"} node1.query("DROP TABLE {}".format(name_temp)) time.sleep(2) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'jbod2'} + assert set(used_disks) == {"jbod2"} assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 10 @@ -309,12 +309,12 @@ def test_merges_to_disk_work(started_cluster, name, engine): node1.query(""" CREATE TABLE {name} ( s1 String, - d1 DateTime DEFAULT now() + d1 DateTime ) ENGINE = {engine} ORDER BY tuple() - TTL toDateTime({time}) + toInt64(d1)*0 TO DISK 'external' + TTL d1 TO DISK 'external' SETTINGS storage_policy='small_jbod_with_external' - """.format(name=name, engine=engine, time=time.time()+2)) + """.format(name=name, engine=engine)) node1.query("SYSTEM STOP MERGES {}".format(name)) node1.query("SYSTEM STOP MOVES {}".format(name)) @@ -322,13 +322,13 @@ def test_merges_to_disk_work(started_cluster, name, engine): for _ in range(2): data = [] # 16MB in total for i in range(8): - data.append(get_random_string(1024 * 1024)) # 1MB row + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2))) # 1MB row - node1.query("INSERT INTO {} (s1) VALUES {}".format(name, ','.join(["('" + x + "')" for x in data]))) + node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) time.sleep(4) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'jbod1'} + assert set(used_disks) == {"jbod1"} assert "2" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() node1.query("SYSTEM START MERGES {}".format(name)) @@ -336,7 +336,7 @@ def test_merges_to_disk_work(started_cluster, name, engine): time.sleep(1) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'external'} + assert set(used_disks) == {"external"} assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 16 @@ -365,38 +365,38 @@ def test_merges_to_full_disk_work(started_cluster, name, engine): for i in range(35): data.append(get_random_string(1024 * 1024)) # 1MB row - node1.query("INSERT INTO {} VALUES {}".format(name_temp, ','.join(["('" + x + "')" for x in data]))) + node1.query("INSERT INTO {} VALUES {}".format(name_temp, ",".join(["('" + x + "')" for x in data]))) used_disks = get_used_disks_for_table(node1, name_temp) - assert set(used_disks) == {'jbod2'} + assert set(used_disks) == {"jbod2"} node1.query(""" CREATE TABLE {name} ( s1 String, - d1 DateTime DEFAULT now() + d1 DateTime ) ENGINE = {engine} ORDER BY tuple() - TTL toDateTime({time}) + toInt64(d1)*0 TO DISK 'external' + TTL d1 TO DISK 'external' SETTINGS storage_policy='small_jbod_with_external' - """.format(name=name, engine=engine, time=time.time()+2)) + """.format(name=name, engine=engine)) node1.query("SYSTEM STOP MOVES {}".format(name)) for _ in range(2): data = [] # 16MB in total for i in range(8): - data.append(get_random_string(1024 * 1024)) # 1MB row + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2))) # 1MB row - node1.query("INSERT INTO {} (s1) VALUES {}".format(name, ','.join(["('" + x + "')" for x in data]))) + node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'jbod1'} + assert set(used_disks) == {"jbod1"} assert "2" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() time.sleep(4) node1.query("OPTIMIZE TABLE {}".format(name)) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'jbod1'} # Merged to the same disk against the rule. + assert set(used_disks) == {"jbod1"} # Merged to the same disk against the rule. assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 16 @@ -414,29 +414,29 @@ def test_moves_after_merges_work(started_cluster, name, engine): node1.query(""" CREATE TABLE {name} ( s1 String, - d1 DateTime DEFAULT now() + d1 DateTime ) ENGINE = {engine} ORDER BY tuple() - TTL toDateTime({time}) + toInt64(d1)*0 TO DISK 'external' + TTL d1 TO DISK 'external' SETTINGS storage_policy='small_jbod_with_external' - """.format(name=name, engine=engine, time=time.time()+2)) + """.format(name=name, engine=engine)) for _ in range(2): data = [] # 16MB in total for i in range(8): - data.append(get_random_string(1024 * 1024)) # 1MB row + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2))) # 1MB row - node1.query("INSERT INTO {} (s1) VALUES {}".format(name, ','.join(["('" + x + "')" for x in data]))) + node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) node1.query("OPTIMIZE TABLE {}".format(name)) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'jbod1'} + assert set(used_disks) == {"jbod1"} assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() time.sleep(4) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'external'} + assert set(used_disks) == {"external"} assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 16 @@ -468,11 +468,11 @@ def test_merges_to_disk_do_not_work(started_cluster, name, engine): for i in range(8): data.append(("'{}'".format(get_random_string(1024 * 1024)), 'toDateTime({})'.format(time.time()+2 if i > 0 else time.time()+7))) # 1MB row - node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ','.join(["(" + ",".join(x) + ")" for x in data]))) + node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) time.sleep(4) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'jbod1'} + assert set(used_disks) == {"jbod1"} assert "2" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() node1.query("SYSTEM START MERGES {}".format(name)) @@ -480,7 +480,7 @@ def test_merges_to_disk_do_not_work(started_cluster, name, engine): time.sleep(1) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'jbod1'} + assert set(used_disks) == {"jbod1"} assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 16 @@ -510,17 +510,17 @@ def test_moves_after_merges_do_not_work(started_cluster, name, engine): for i in range(8): data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2 if i > 0 else time.time()+6))) # 1MB row - node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ','.join(["(" + ",".join(x) + ")" for x in data]))) + node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) node1.query("OPTIMIZE TABLE {}".format(name)) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'jbod1'} + assert set(used_disks) == {"jbod1"} assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() time.sleep(4) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {'jbod1'} + assert set(used_disks) == {"jbod1"} assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 16 From 0da7234d99ffe86cdfbd87746a44acdf8d4aa2a0 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Tue, 26 Nov 2019 11:53:54 +0300 Subject: [PATCH 26/51] Added few FIXMEs to not forget. --- dbms/src/DataStreams/TTLBlockInputStream.cpp | 14 ++++++++------ .../src/Storages/MergeTree/MergeTreeDataWriter.cpp | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/dbms/src/DataStreams/TTLBlockInputStream.cpp b/dbms/src/DataStreams/TTLBlockInputStream.cpp index 350b008505e..874495c1176 100644 --- a/dbms/src/DataStreams/TTLBlockInputStream.cpp +++ b/dbms/src/DataStreams/TTLBlockInputStream.cpp @@ -194,9 +194,10 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) column_with_type.column = std::move(result_column); } - for (const auto & elem : storage.column_ttl_entries_by_name) - if (block.has(elem.second.result_column)) - block.erase(elem.second.result_column); + for (const auto & [_, ttl_entry] : storage.column_ttl_entries_by_name) + if (block.has(ttl_entry.result_column)) + block.erase(ttl_entry.result_column); + /// FIXME: what if table had legitimate column with this name? } void TTLBlockInputStream::updateMovesTTL(Block & block) @@ -217,9 +218,10 @@ void TTLBlockInputStream::updateMovesTTL(Block & block) } } - for (const auto & elem : storage.move_ttl_entries_by_name) - if (block.has(elem.second.result_column)) - block.erase(elem.second.result_column); + for (const auto & [_, ttl_entry] : storage.move_ttl_entries_by_name) + if (block.has(ttl_entry.result_column)) + block.erase(ttl_entry.result_column); + /// FIXME: what if table had legitimate column with this name? } UInt32 TTLBlockInputStream::getTimestampByIndex(const IColumn * column, size_t ind) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index cb0ef57ac85..44b2727eaf4 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -116,7 +116,7 @@ void updateTTL(const MergeTreeData::TTLEntry & ttl_entry, ttl_infos.updatePartMinMaxTTL(ttl_info.min, ttl_info.max); - /// FIXME why we don't erase new column from block? + /// FIXME: why we don't erase new column from block? } } From 108e2b33ebfbda9aff7c6acbea130693d3752de8 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Wed, 27 Nov 2019 21:39:47 +0300 Subject: [PATCH 27/51] Fixed a bug with erasing columns with same name. --- dbms/src/DataStreams/TTLBlockInputStream.cpp | 20 +++++++++++-------- .../MergeTree/MergeTreeDataWriter.cpp | 9 ++++++++- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/dbms/src/DataStreams/TTLBlockInputStream.cpp b/dbms/src/DataStreams/TTLBlockInputStream.cpp index 874495c1176..02b016668c9 100644 --- a/dbms/src/DataStreams/TTLBlockInputStream.cpp +++ b/dbms/src/DataStreams/TTLBlockInputStream.cpp @@ -147,6 +147,7 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) defaults_expression->execute(block_with_defaults); } + std::vector columns_to_remove; for (const auto & [name, ttl_entry] : storage.column_ttl_entries_by_name) { const auto & old_ttl_info = old_ttl_infos.columns_ttl[name]; @@ -161,7 +162,10 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) continue; if (!block.has(ttl_entry.result_column)) + { + columns_to_remove.push_back(ttl_entry.result_column); ttl_entry.expression->execute(block); + } ColumnPtr default_column = nullptr; if (block_with_defaults.has(name)) @@ -194,20 +198,22 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) column_with_type.column = std::move(result_column); } - for (const auto & [_, ttl_entry] : storage.column_ttl_entries_by_name) - if (block.has(ttl_entry.result_column)) - block.erase(ttl_entry.result_column); - /// FIXME: what if table had legitimate column with this name? + for (const String & column : columns_to_remove) + block.erase(column); } void TTLBlockInputStream::updateMovesTTL(Block & block) { + std::vector columns_to_remove; for (const auto & [name, ttl_entry] : storage.move_ttl_entries_by_name) { auto & new_ttl_info = new_ttl_infos.moves_ttl[name]; if (!block.has(ttl_entry.result_column)) + { + columns_to_remove.push_back(ttl_entry.result_column); ttl_entry.expression->execute(block); + } const IColumn * ttl_column = block.getByName(ttl_entry.result_column).column.get(); @@ -218,10 +224,8 @@ void TTLBlockInputStream::updateMovesTTL(Block & block) } } - for (const auto & [_, ttl_entry] : storage.move_ttl_entries_by_name) - if (block.has(ttl_entry.result_column)) - block.erase(ttl_entry.result_column); - /// FIXME: what if table had legitimate column with this name? + for (const String & column : columns_to_remove) + block.erase(column); } UInt32 TTLBlockInputStream::getTimestampByIndex(const IColumn * column, size_t ind) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 44b2727eaf4..b78e5271526 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -80,8 +80,12 @@ void updateTTL(const MergeTreeData::TTLEntry & ttl_entry, DB::MergeTreeDataPartTTLInfo & ttl_info, Block & block) { + bool remove_column = false; if (!block.has(ttl_entry.result_column)) + { ttl_entry.expression->execute(block); + remove_column = true; + } const auto & current = block.getByName(ttl_entry.result_column); @@ -116,7 +120,10 @@ void updateTTL(const MergeTreeData::TTLEntry & ttl_entry, ttl_infos.updatePartMinMaxTTL(ttl_info.min, ttl_info.max); - /// FIXME: why we don't erase new column from block? + if (remove_column) + { + block.erase(ttl_entry.result_column); + } } } From b3427af0c1a7edd39ba07cb29f7e5bd2b894344c Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Wed, 27 Nov 2019 22:13:37 +0300 Subject: [PATCH 28/51] Fixed typos in a test. --- dbms/tests/integration/test_ttl_move/test.py | 24 ++++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/dbms/tests/integration/test_ttl_move/test.py b/dbms/tests/integration/test_ttl_move/test.py index b4dbff6fa5c..2312c0cdfad 100644 --- a/dbms/tests/integration/test_ttl_move/test.py +++ b/dbms/tests/integration/test_ttl_move/test.py @@ -69,7 +69,7 @@ def test_inserts_to_disk_work(started_cluster, name, engine): used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"external"} - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 10 + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "10" finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) @@ -99,7 +99,7 @@ def test_inserts_to_disk_do_not_work(started_cluster, name, engine): used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"jbod1"} - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 10 + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "10" finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) @@ -133,7 +133,7 @@ def test_moves_to_disk_work(started_cluster, name, engine): used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"external"} - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 10 + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "10" finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) @@ -167,7 +167,7 @@ def test_moves_to_disk_do_not_work(started_cluster, name, engine): used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"jbod1"} - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 10 + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "10" finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) @@ -205,7 +205,7 @@ def test_moves_to_volume_work(started_cluster, name, engine): used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"external"} - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 20 + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "20" finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) @@ -239,7 +239,7 @@ def test_inserts_to_volume_work(started_cluster, name, engine): used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"external"} - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 20 + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "20" finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) @@ -293,7 +293,7 @@ def test_moves_to_disk_eventually_work(started_cluster, name, engine): used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"jbod2"} - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 10 + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "10" finally: node1.query("DROP TABLE IF EXISTS {}".format(name_temp)) @@ -339,7 +339,7 @@ def test_merges_to_disk_work(started_cluster, name, engine): assert set(used_disks) == {"external"} assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 16 + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "16" finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) @@ -399,7 +399,7 @@ def test_merges_to_full_disk_work(started_cluster, name, engine): assert set(used_disks) == {"jbod1"} # Merged to the same disk against the rule. assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 16 + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "16" finally: node1.query("DROP TABLE IF EXISTS {}".format(name_temp)) @@ -438,7 +438,7 @@ def test_moves_after_merges_work(started_cluster, name, engine): used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"external"} - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 16 + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "16" finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) @@ -483,7 +483,7 @@ def test_merges_to_disk_do_not_work(started_cluster, name, engine): assert set(used_disks) == {"jbod1"} assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 16 + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "16" finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) @@ -522,7 +522,7 @@ def test_moves_after_merges_do_not_work(started_cluster, name, engine): used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"jbod1"} - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == 16 + assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "16" finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) From 3250955b077947815020872714c4810f2e29a369 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 28 Nov 2019 08:07:22 +0300 Subject: [PATCH 29/51] Refactored tests a little bit. --- dbms/tests/integration/test_ttl_move/test.py | 206 +++---------------- 1 file changed, 32 insertions(+), 174 deletions(-) diff --git a/dbms/tests/integration/test_ttl_move/test.py b/dbms/tests/integration/test_ttl_move/test.py index 2312c0cdfad..ae76fe296e9 100644 --- a/dbms/tests/integration/test_ttl_move/test.py +++ b/dbms/tests/integration/test_ttl_move/test.py @@ -45,11 +45,13 @@ def get_used_disks_for_table(node, table_name): return node.query("select disk_name from system.parts where table == '{}' and active=1 order by modification_time".format(table_name)).strip().split('\n') -@pytest.mark.parametrize("name,engine", [ - ("mt_test_inserts_to_disk_work","MergeTree()"), - ("replicated_mt_test_inserts_to_disk_work","ReplicatedMergeTree('/clickhouse/replicated_test_inserts_to_disk_work', '1')",), +@pytest.mark.parametrize("name,engine,positive", [ + ("mt_test_inserts_to_disk_do_not_work","MergeTree()",0), + ("replicated_mt_test_inserts_to_disk_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_inserts_to_disk_do_not_work', '1')",0), + ("mt_test_inserts_to_disk_work","MergeTree()",1), + ("replicated_mt_test_inserts_to_disk_work","ReplicatedMergeTree('/clickhouse/replicated_test_inserts_to_disk_work', '1')",1), ]) -def test_inserts_to_disk_work(started_cluster, name, engine): +def test_inserts_to_disk_work(started_cluster, name, engine, positive): try: node1.query(""" CREATE TABLE {name} ( @@ -63,11 +65,11 @@ def test_inserts_to_disk_work(started_cluster, name, engine): data = [] # 10MB in total for i in range(10): - data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()-2))) # 1MB row + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()-2 if i > 0 or positive else time.time()+2))) # 1MB row node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {"external"} + assert set(used_disks) == {"external" if positive else "jbod1"} assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "10" @@ -75,11 +77,13 @@ def test_inserts_to_disk_work(started_cluster, name, engine): node1.query("DROP TABLE IF EXISTS {}".format(name)) -@pytest.mark.parametrize("name,engine", [ - ("mt_test_inserts_to_disk_do_not_work","MergeTree()"), - ("replicated_mt_test_inserts_to_disk_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_inserts_to_disk_do_not_work', '1')",), +@pytest.mark.parametrize("name,engine,positive", [ + ("mt_test_moves_to_disk_do_not_work","MergeTree()",0), + ("replicated_mt_test_moves_to_disk_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_disk_do_not_work', '1')",0), + ("mt_test_moves_to_disk_work","MergeTree()",1), + ("replicated_mt_test_moves_to_disk_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_disk_work', '1')",1), ]) -def test_inserts_to_disk_do_not_work(started_cluster, name, engine): +def test_moves_to_disk_work(started_cluster, name, engine, positive): try: node1.query(""" CREATE TABLE {name} ( @@ -93,37 +97,7 @@ def test_inserts_to_disk_do_not_work(started_cluster, name, engine): data = [] # 10MB in total for i in range(10): - data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()-2 if i > 0 else time.time()+2))) # 1MB row - - node1.query("INSERT INTO {} VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) - used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {"jbod1"} - - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "10" - - finally: - node1.query("DROP TABLE IF EXISTS {}".format(name)) - - -@pytest.mark.parametrize("name,engine", [ - ("mt_test_moves_to_disk_work","MergeTree()"), - ("replicated_mt_test_moves_to_disk_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_disk_work', '1')",), -]) -def test_moves_to_disk_work(started_cluster, name, engine): - try: - node1.query(""" - CREATE TABLE {name} ( - s1 String, - d1 DateTime - ) ENGINE = {engine} - ORDER BY tuple() - TTL d1 TO DISK 'external' - SETTINGS storage_policy='small_jbod_with_external' - """.format(name=name, engine=engine)) - - data = [] # 10MB in total - for i in range(10): - data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2))) # 1MB row + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2 if i > 0 or positive else time.time()+6))) # 1MB row node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) used_disks = get_used_disks_for_table(node1, name) @@ -131,41 +105,7 @@ def test_moves_to_disk_work(started_cluster, name, engine): time.sleep(4) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {"external"} - - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "10" - - finally: - node1.query("DROP TABLE IF EXISTS {}".format(name)) - - -@pytest.mark.parametrize("name,engine", [ - ("mt_test_moves_to_disk_do_not_work","MergeTree()"), - ("replicated_mt_test_moves_to_disk_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_disk_do_not_work', '1')",), -]) -def test_moves_to_disk_do_not_work(started_cluster, name, engine): - try: - node1.query(""" - CREATE TABLE {name} ( - s1 String, - d1 DateTime - ) ENGINE = {engine} - ORDER BY tuple() - TTL d1 TO DISK 'external' - SETTINGS storage_policy='small_jbod_with_external' - """.format(name=name, engine=engine, time=time.time()+2)) - - data = [] # 10MB in total - for i in range(10): - data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2 if i > 0 else time.time()+6))) # 1MB row - - node1.query("INSERT INTO {} (s1) VALUES {}".format(name, ",".join(["('" + x + "')" for x in data]))) - used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {"jbod1"} - - time.sleep(4) - used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {"jbod1"} + assert set(used_disks) == {"external" if positive else "jbod1"} assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "10" @@ -300,11 +240,13 @@ def test_moves_to_disk_eventually_work(started_cluster, name, engine): node1.query("DROP TABLE IF EXISTS {}".format(name)) -@pytest.mark.parametrize("name,engine", [ - ("mt_test_merges_to_disk_work","MergeTree()"), - ("replicated_mt_test_merges_to_disk_work","ReplicatedMergeTree('/clickhouse/replicated_test_merges_to_disk_work', '1')",), +@pytest.mark.parametrize("name,engine,positive", [ + ("mt_test_merges_to_disk_do_not_work","MergeTree()",0), + ("replicated_mt_test_merges_to_disk_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_merges_to_disk_do_not_work', '1')",0), + ("mt_test_merges_to_disk_work","MergeTree()",1), + ("replicated_mt_test_merges_to_disk_work","ReplicatedMergeTree('/clickhouse/replicated_test_merges_to_disk_work', '1')",1), ]) -def test_merges_to_disk_work(started_cluster, name, engine): +def test_merges_to_disk_work(started_cluster, name, engine, positive): try: node1.query(""" CREATE TABLE {name} ( @@ -322,7 +264,7 @@ def test_merges_to_disk_work(started_cluster, name, engine): for _ in range(2): data = [] # 16MB in total for i in range(8): - data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2))) # 1MB row + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2 if i > 0 or positive else time.time()+7))) # 1MB row node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) @@ -336,7 +278,7 @@ def test_merges_to_disk_work(started_cluster, name, engine): time.sleep(1) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {"external"} + assert set(used_disks) == {"external" if positive else "jbod1"} assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "16" @@ -405,11 +347,13 @@ def test_merges_to_full_disk_work(started_cluster, name, engine): node1.query("DROP TABLE IF EXISTS {}".format(name_temp)) node1.query("DROP TABLE IF EXISTS {}".format(name)) -@pytest.mark.parametrize("name,engine", [ - ("mt_test_moves_after_merges_work","MergeTree()"), - ("replicated_mt_test_moves_after_merges_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_after_merges_work', '1')",), +@pytest.mark.parametrize("name,engine,positive", [ + ("mt_test_moves_after_merges_do_not_work","MergeTree()",0), + ("replicated_mt_test_moves_after_merges_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_after_merges_do_not_work', '1')",0), + ("mt_test_moves_after_merges_work","MergeTree()",1), + ("replicated_mt_test_moves_after_merges_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_after_merges_work', '1')",1), ]) -def test_moves_after_merges_work(started_cluster, name, engine): +def test_moves_after_merges_work(started_cluster, name, engine, positive): try: node1.query(""" CREATE TABLE {name} ( @@ -424,7 +368,7 @@ def test_moves_after_merges_work(started_cluster, name, engine): for _ in range(2): data = [] # 16MB in total for i in range(8): - data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2))) # 1MB row + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2 if i > 0 or positive else time.time()+6))) # 1MB row node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) @@ -436,95 +380,9 @@ def test_moves_after_merges_work(started_cluster, name, engine): time.sleep(4) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {"external"} + assert set(used_disks) == {"external" if positive else "jbod1"} assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "16" finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) - - -@pytest.mark.parametrize("name,engine", [ - ("mt_test_merges_to_disk_do_not_work","MergeTree()"), - ("replicated_mt_test_merges_to_disk_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_merges_to_disk_do_not_work', '1')",), -]) -def test_merges_to_disk_do_not_work(started_cluster, name, engine): - try: - node1.query(""" - CREATE TABLE {name} ( - s1 String, - d1 DateTime - ) ENGINE = {engine} - ORDER BY tuple() - TTL d1 TO DISK 'external' - SETTINGS storage_policy='small_jbod_with_external' - """.format(name=name, engine=engine)) - - node1.query("SYSTEM STOP MERGES {}".format(name)) - node1.query("SYSTEM STOP MOVES {}".format(name)) - - for _ in range(2): - data = [] # 16MB in total - for i in range(8): - data.append(("'{}'".format(get_random_string(1024 * 1024)), 'toDateTime({})'.format(time.time()+2 if i > 0 else time.time()+7))) # 1MB row - - node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) - - time.sleep(4) - used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {"jbod1"} - assert "2" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() - - node1.query("SYSTEM START MERGES {}".format(name)) - node1.query("OPTIMIZE TABLE {}".format(name)) - - time.sleep(1) - used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {"jbod1"} - assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() - - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "16" - - finally: - node1.query("DROP TABLE IF EXISTS {}".format(name)) - - -@pytest.mark.parametrize("name,engine", [ - ("mt_test_moves_after_merges_do_not_work","MergeTree()"), - ("replicated_mt_test_moves_after_merges_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_after_merges_do_not_work', '1')",), -]) -def test_moves_after_merges_do_not_work(started_cluster, name, engine): - try: - node1.query(""" - CREATE TABLE {name} ( - s1 String, - d1 DateTime - ) ENGINE = {engine} - ORDER BY tuple() - TTL d1 TO DISK 'external' - SETTINGS storage_policy='small_jbod_with_external' - """.format(name=name, engine=engine, time=time.time()+2)) - - for _ in range(2): - data = [] # 16MB in total - for i in range(8): - data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2 if i > 0 else time.time()+6))) # 1MB row - - node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) - - node1.query("OPTIMIZE TABLE {}".format(name)) - used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {"jbod1"} - assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() - - time.sleep(4) - - used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {"jbod1"} - - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "16" - - finally: - node1.query("DROP TABLE IF EXISTS {}".format(name)) - -# FIXME refactor _do_not tests into main ones From ae4b2b4ace707a42848c7ffe4e8f55079df6124d Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 28 Nov 2019 08:32:26 +0300 Subject: [PATCH 30/51] Fixed some typos in `test_ttl_move` test. --- dbms/tests/integration/test_ttl_move/test.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/dbms/tests/integration/test_ttl_move/test.py b/dbms/tests/integration/test_ttl_move/test.py index ae76fe296e9..0cb5942220a 100644 --- a/dbms/tests/integration/test_ttl_move/test.py +++ b/dbms/tests/integration/test_ttl_move/test.py @@ -71,7 +71,7 @@ def test_inserts_to_disk_work(started_cluster, name, engine, positive): used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"external" if positive else "jbod1"} - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "10" + assert node1.query("SELECT count() FROM {name}".format(name=name)).strip() == "10" finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) @@ -107,7 +107,7 @@ def test_moves_to_disk_work(started_cluster, name, engine, positive): used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"external" if positive else "jbod1"} - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "10" + assert node1.query("SELECT count() FROM {name}".format(name=name)).strip() == "10" finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) @@ -145,7 +145,7 @@ def test_moves_to_volume_work(started_cluster, name, engine): used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"external"} - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "20" + assert node1.query("SELECT count() FROM {name}".format(name=name)).strip() == "20" finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) @@ -179,7 +179,7 @@ def test_inserts_to_volume_work(started_cluster, name, engine): used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"external"} - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "20" + assert node1.query("SELECT count() FROM {name}".format(name=name)).strip() == "20" finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) @@ -233,7 +233,7 @@ def test_moves_to_disk_eventually_work(started_cluster, name, engine): used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"jbod2"} - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "10" + assert node1.query("SELECT count() FROM {name}".format(name=name)).strip() == "10" finally: node1.query("DROP TABLE IF EXISTS {}".format(name_temp)) @@ -281,7 +281,7 @@ def test_merges_to_disk_work(started_cluster, name, engine, positive): assert set(used_disks) == {"external" if positive else "jbod1"} assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "16" + assert node1.query("SELECT count() FROM {name}".format(name=name)).strip() == "16" finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) @@ -341,7 +341,7 @@ def test_merges_to_full_disk_work(started_cluster, name, engine): assert set(used_disks) == {"jbod1"} # Merged to the same disk against the rule. assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "16" + assert node1.query("SELECT count() FROM {name}".format(name=name)).strip() == "16" finally: node1.query("DROP TABLE IF EXISTS {}".format(name_temp)) @@ -382,7 +382,7 @@ def test_moves_after_merges_work(started_cluster, name, engine, positive): used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"external" if positive else "jbod1"} - assert node1.query("SELECT count() FROM {}".format(name=name)).strip() == "16" + assert node1.query("SELECT count() FROM {name}".format(name=name)).strip() == "16" finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) From 7407f7c39d6dbac9ba0ac98b84955cef52a9b848 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 28 Nov 2019 09:44:26 +0300 Subject: [PATCH 31/51] Placed move TTL rules to a vector. --- dbms/src/DataStreams/TTLBlockInputStream.cpp | 4 ++-- dbms/src/Parsers/ASTTTLElement.cpp | 2 +- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 6 +++--- dbms/src/Storages/MergeTree/MergeTreeData.h | 2 +- dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/dbms/src/DataStreams/TTLBlockInputStream.cpp b/dbms/src/DataStreams/TTLBlockInputStream.cpp index 02b016668c9..339f81321e4 100644 --- a/dbms/src/DataStreams/TTLBlockInputStream.cpp +++ b/dbms/src/DataStreams/TTLBlockInputStream.cpp @@ -205,9 +205,9 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) void TTLBlockInputStream::updateMovesTTL(Block & block) { std::vector columns_to_remove; - for (const auto & [name, ttl_entry] : storage.move_ttl_entries_by_name) + for (const auto & ttl_entry : storage.move_ttl_entries) { - auto & new_ttl_info = new_ttl_infos.moves_ttl[name]; + auto & new_ttl_info = new_ttl_infos.moves_ttl[ttl_entry.result_column]; if (!block.has(ttl_entry.result_column)) { diff --git a/dbms/src/Parsers/ASTTTLElement.cpp b/dbms/src/Parsers/ASTTTLElement.cpp index 5fe401e828a..7e03a73e36d 100644 --- a/dbms/src/Parsers/ASTTTLElement.cpp +++ b/dbms/src/Parsers/ASTTTLElement.cpp @@ -20,7 +20,7 @@ void ASTTTLElement::formatImpl(const FormatSettings & settings, FormatState & st } else if (destination_type == PartDestinationType::DELETE) { - /// It would be better to output "DELETE" here but that will break compatibility with earlier versions. + /// It would be better to output "DELETE" here but that will break compatibility with earlier versions. } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index cdbd17b6e90..a28e80280ee 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -637,7 +637,7 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new { new_ttl_entry.destination_type = ttl_element.destination_type; new_ttl_entry.destination_name = ttl_element.destination_name; - move_ttl_entries_by_name.emplace(new_ttl_entry.result_column, new_ttl_entry); + move_ttl_entries.emplace_back(std::move(new_ttl_entry)); } } } @@ -3729,9 +3729,9 @@ const MergeTreeData::TTLEntry * MergeTreeData::selectMoveDestination( /// Prefer TTL rule which went into action last. time_t max_min_ttl = 0; - for (const auto & [name, ttl_entry] : move_ttl_entries_by_name) + for (const auto & ttl_entry : move_ttl_entries) { - auto ttl_info_it = ttl_infos.moves_ttl.find(name); + auto ttl_info_it = ttl_infos.moves_ttl.find(ttl_entry.result_column); if (ttl_info_it != ttl_infos.moves_ttl.end() && ttl_info_it->second.min >= minimum_time && max_min_ttl <= ttl_info_it->second.min) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 03836639343..377b9b55929 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -737,7 +737,7 @@ public: TTLEntriesByName column_ttl_entries_by_name; TTLEntry ttl_table_entry; - TTLEntriesByName move_ttl_entries_by_name; + std::vector move_ttl_entries; String sampling_expr_column_name; Names columns_required_for_sampling; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index b78e5271526..d83d95da564 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -226,8 +226,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa size_t expected_size = block.bytes(); DB::MergeTreeDataPart::TTLInfos move_ttl_infos; - for (const auto & [name, ttl_entry] : data.move_ttl_entries_by_name) - updateTTL(ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[name], block); + for (const auto & ttl_entry : data.move_ttl_entries) + updateTTL(ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[ttl_entry.result_column], block); DiskSpace::ReservationPtr reservation = data.reserveSpacePreferringMoveDestination(expected_size, move_ttl_infos, time(nullptr)); From 14cf5305428f6ebcc3462e1b981e88769af44473 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 28 Nov 2019 13:13:53 +0300 Subject: [PATCH 32/51] Added move TTL rules to metadata of `ReplicatedMergeTree`. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 4 +-- dbms/src/Storages/MergeTree/MergeTreeData.h | 4 ++- .../ReplicatedMergeTreeTableMetadata.cpp | 34 ++++++++++++++++++- .../ReplicatedMergeTreeTableMetadata.h | 6 +++- 4 files changed, 43 insertions(+), 5 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index a28e80280ee..60363ecf3a6 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -130,7 +131,6 @@ MergeTreeData::MergeTreeData( , merging_params(merging_params_) , partition_by_ast(partition_by_ast_) , sample_by_ast(sample_by_ast_) - , ttl_table_ast(ttl_table_ast_) , require_part_metadata(require_part_metadata_) , database_name(database_) , table_name(table_) @@ -580,7 +580,7 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new String result_column = ttl_ast->getColumnName(); checkTTLExpression(expr, result_column); - return {expr, result_column, PartDestinationType::DELETE, {}}; + return {expr, result_column, PartDestinationType::DELETE, {}, ttl_ast}; }; if (!new_column_ttls.empty()) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 377b9b55929..e6f1ac85fc3 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -587,7 +587,7 @@ public: bool hasSortingKey() const { return !sorting_key_columns.empty(); } bool hasPrimaryKey() const { return !primary_key_columns.empty(); } bool hasSkipIndices() const { return !skip_indices.empty(); } - bool hasTableTTL() const { return ttl_table_entry.expression != nullptr; } + bool hasTableTTL() const { return ttl_table_ast != nullptr; } bool hasAnyColumnTTL() const { return !column_ttl_entries_by_name.empty(); } /// Check that the part is not broken and calculate the checksums for it if they are not present. @@ -731,6 +731,8 @@ public: /// Name and type of a destination are only valid in table-level context. PartDestinationType destination_type; String destination_name; + + ASTPtr entry_ast; }; using TTLEntriesByName = std::unordered_map; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 5ada5d50b23..edc031bc53b 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -5,6 +5,7 @@ #include #include + namespace DB { @@ -47,6 +48,16 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr partition_key = formattedAST(MergeTreeData::extractKeyExpressionList(data.partition_by_ast)); ttl_table = formattedAST(data.ttl_table_ast); + + std::ostringstream ttl_move_stream; + for (const auto & ttl_entry : data.move_ttl_entries) + { + if (ttl_move_stream.tellp() > 0) + ttl_move_stream << ", "; + ttl_move_stream << formattedAST(ttl_entry.entry_ast); + } + ttl_move = ttl_move_stream.str(); + skip_indices = data.getIndices().toString(); if (data.canUseAdaptiveGranularity()) index_granularity_bytes = data_settings->index_granularity_bytes; @@ -78,6 +89,9 @@ void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const if (!ttl_table.empty()) out << "ttl: " << ttl_table << "\n"; + if (!ttl_move.empty()) + out << "move ttl: " << ttl_move << "\n"; + if (!skip_indices.empty()) out << "indices: " << skip_indices << "\n"; @@ -119,6 +133,9 @@ void ReplicatedMergeTreeTableMetadata::read(ReadBuffer & in) if (checkString("ttl: ", in)) in >> ttl_table >> "\n"; + if (checkString("move ttl: ", in)) + in >> ttl_move >> "\n"; + if (checkString("indices: ", in)) in >> skip_indices >> "\n"; @@ -223,12 +240,27 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl } else throw Exception( - "Existing table metadata in ZooKeeper differs in ttl." + "Existing table metadata in ZooKeeper differs in TTL." " Stored in ZooKeeper: " + from_zk.ttl_table + ", local: " + ttl_table, ErrorCodes::METADATA_MISMATCH); } + if (ttl_move != from_zk.ttl_move) + { + if (allow_alter) + { + diff.ttl_move_changed = true; + diff.new_ttl_move = from_zk.ttl_move; + } + else + throw Exception( + "Existing table metadata in ZooKeeper differs in move TTL." + " Stored in ZooKeeper: " + from_zk.ttl_move + + ", local: " + ttl_move, + ErrorCodes::METADATA_MISMATCH); + } + if (skip_indices != from_zk.skip_indices) { if (allow_alter) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index d8af3c2087a..23fc4f6a024 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -28,6 +28,7 @@ struct ReplicatedMergeTreeTableMetadata String skip_indices; String constraints; String ttl_table; + String ttl_move; UInt64 index_granularity_bytes; ReplicatedMergeTreeTableMetadata() = default; @@ -53,9 +54,12 @@ struct ReplicatedMergeTreeTableMetadata bool ttl_table_changed = false; String new_ttl_table; + bool ttl_move_changed = false; + String new_ttl_move; + bool empty() const { - return !sorting_key_changed && !skip_indices_changed && !ttl_table_changed && !constraints_changed; + return !sorting_key_changed && !skip_indices_changed && !ttl_table_changed && !constraints_changed && !ttl_move_changed; } }; From df002f20b15bc4d989eafcf45fd1aea93134111a Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 28 Nov 2019 13:30:51 +0300 Subject: [PATCH 33/51] Fixed move TTL metadata. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 60363ecf3a6..71f81034419 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -580,7 +580,7 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new String result_column = ttl_ast->getColumnName(); checkTTLExpression(expr, result_column); - return {expr, result_column, PartDestinationType::DELETE, {}, ttl_ast}; + return {expr, result_column, PartDestinationType::DELETE, {}, {}}; }; if (!new_column_ttls.empty()) @@ -635,6 +635,7 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new auto new_ttl_entry = create_ttl_entry(ttl_element.children[0]); if (!only_check) { + new_ttl_entry.entry_ast = ttl_element_ptr; new_ttl_entry.destination_type = ttl_element.destination_type; new_ttl_entry.destination_name = ttl_element.destination_name; move_ttl_entries.emplace_back(std::move(new_ttl_entry)); From 2e853e32112f7bdff7189aa8e659f8ae910c3b69 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 28 Nov 2019 13:57:58 +0300 Subject: [PATCH 34/51] Fixed more typos in `test_ttl_move`. --- dbms/tests/integration/test_ttl_move/test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/tests/integration/test_ttl_move/test.py b/dbms/tests/integration/test_ttl_move/test.py index 0cb5942220a..ac4d3bf516e 100644 --- a/dbms/tests/integration/test_ttl_move/test.py +++ b/dbms/tests/integration/test_ttl_move/test.py @@ -196,10 +196,10 @@ def test_moves_to_disk_eventually_work(started_cluster, name, engine): node1.query(""" CREATE TABLE {name} ( s1 String - ) ENGINE = {engine} + ) ENGINE = MergeTree() ORDER BY tuple() SETTINGS storage_policy='only_jbod2' - """.format(name=name_temp, engine=engine)) + """.format(name=name_temp)) data = [] # 35MB in total for i in range(35): @@ -298,10 +298,10 @@ def test_merges_to_full_disk_work(started_cluster, name, engine): node1.query(""" CREATE TABLE {name} ( s1 String - ) ENGINE = {engine} + ) ENGINE = MergeTree() ORDER BY tuple() SETTINGS storage_policy='only_jbod2' - """.format(name=name_temp, engine=engine)) + """.format(name=name_temp)) data = [] # 35MB in total for i in range(35): From 164360e7addcb3c90ba6f0c42f6b570f5b4077e6 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 28 Nov 2019 14:02:54 +0300 Subject: [PATCH 35/51] Fixed `test_ttl_move` again. --- dbms/tests/integration/test_ttl_move/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/integration/test_ttl_move/test.py b/dbms/tests/integration/test_ttl_move/test.py index ac4d3bf516e..85d9eb894af 100644 --- a/dbms/tests/integration/test_ttl_move/test.py +++ b/dbms/tests/integration/test_ttl_move/test.py @@ -134,7 +134,7 @@ def test_moves_to_volume_work(started_cluster, name, engine): for p in range(2): data = [] # 20MB in total for i in range(10): - data.append((p, "'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2))) # 1MB row + data.append((str(p), "'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2))) # 1MB row node1.query("INSERT INTO {} (p1, s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) @@ -172,7 +172,7 @@ def test_inserts_to_volume_work(started_cluster, name, engine): for p in range(2): data = [] # 20MB in total for i in range(10): - data.append((p, "'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()-2))) # 1MB row + data.append((str(p), "'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()-2))) # 1MB row node1.query("INSERT INTO {} (p1, s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) From 15fb64f54b4aac9ab849e323ff22f6a357b55867 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 28 Nov 2019 22:58:21 +0300 Subject: [PATCH 36/51] Fixed move TTL expiration logic. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 71f81034419..f7b484e5886 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -3734,7 +3734,7 @@ const MergeTreeData::TTLEntry * MergeTreeData::selectMoveDestination( { auto ttl_info_it = ttl_infos.moves_ttl.find(ttl_entry.result_column); if (ttl_info_it != ttl_infos.moves_ttl.end() - && ttl_info_it->second.min >= minimum_time + && ttl_info_it->second.min <= minimum_time && max_min_ttl <= ttl_info_it->second.min) { result = &ttl_entry; From 8ed37801ca869b6754a0b047ceec28cc7d1cbefb Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Fri, 29 Nov 2019 08:12:01 +0300 Subject: [PATCH 37/51] Finally fixed selection of destination of move TTL. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index f7b484e5886..03dfa8a64d8 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -3728,17 +3728,17 @@ const MergeTreeData::TTLEntry * MergeTreeData::selectMoveDestination( { const MergeTreeData::TTLEntry * result = nullptr; /// Prefer TTL rule which went into action last. - time_t max_min_ttl = 0; + time_t max_max_ttl = 0; for (const auto & ttl_entry : move_ttl_entries) { auto ttl_info_it = ttl_infos.moves_ttl.find(ttl_entry.result_column); if (ttl_info_it != ttl_infos.moves_ttl.end() - && ttl_info_it->second.min <= minimum_time - && max_min_ttl <= ttl_info_it->second.min) + && ttl_info_it->second.max <= minimum_time + && max_max_ttl >= ttl_info_it->second.max) { result = &ttl_entry; - max_min_ttl = ttl_info_it->second.min; + max_max_ttl = ttl_info_it->second.max; } } From f72da4ab0c1347e2ab89ddd5bd93925b6f5a0ef2 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Fri, 29 Nov 2019 08:41:09 +0300 Subject: [PATCH 38/51] Removing expired rows shall not be triggered on move TTL expiration. --- .../Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp | 3 --- .../src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h | 3 +++ dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp | 11 ++++++----- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp index 8075f8f62d4..05598c00aa7 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp @@ -19,9 +19,6 @@ void MergeTreeDataPartTTLInfos::update(const MergeTreeDataPartTTLInfos & other_i for (const auto & [expression, ttl_info] : other_infos.moves_ttl) { moves_ttl[expression].update(ttl_info); - updatePartMinMaxTTL(ttl_info.min, ttl_info.max); - /// FIXME: Possibly one need another logic here, because move TTL may spoil part min/max TTL - /// in this case we also need to skip updating part min/max in `updateTTL` method } table_ttl.update(other_infos.table_ttl); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h b/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h index e53f49f5205..0a96a483263 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h @@ -35,6 +35,9 @@ struct MergeTreeDataPartTTLInfos { std::unordered_map columns_ttl; MergeTreeDataPartTTLInfo table_ttl; + + /// `part_min_ttl` and `part_max_ttl` are TTLs which are used for selecting parts + /// to merge in order to remove expired rows. time_t part_min_ttl = 0; time_t part_max_ttl = 0; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index d83d95da564..187f7aa3439 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -78,7 +78,7 @@ void buildScatterSelector( void updateTTL(const MergeTreeData::TTLEntry & ttl_entry, MergeTreeDataPart::TTLInfos & ttl_infos, DB::MergeTreeDataPartTTLInfo & ttl_info, - Block & block) + Block & block, bool update_part_min_max_ttls) { bool remove_column = false; if (!block.has(ttl_entry.result_column)) @@ -118,7 +118,8 @@ void updateTTL(const MergeTreeData::TTLEntry & ttl_entry, else throw Exception("Unexpected type of result TTL column", ErrorCodes::LOGICAL_ERROR); - ttl_infos.updatePartMinMaxTTL(ttl_info.min, ttl_info.max); + if (update_part_min_max_ttls) + ttl_infos.updatePartMinMaxTTL(ttl_info.min, ttl_info.max); if (remove_column) { @@ -227,7 +228,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa DB::MergeTreeDataPart::TTLInfos move_ttl_infos; for (const auto & ttl_entry : data.move_ttl_entries) - updateTTL(ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[ttl_entry.result_column], block); + updateTTL(ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[ttl_entry.result_column], block, false); DiskSpace::ReservationPtr reservation = data.reserveSpacePreferringMoveDestination(expected_size, move_ttl_infos, time(nullptr)); @@ -280,10 +281,10 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa } if (data.hasTableTTL()) - updateTTL(data.ttl_table_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block); + updateTTL(data.ttl_table_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true); for (const auto & [name, ttl_entry] : data.column_ttl_entries_by_name) - updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block); + updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true); new_data_part->ttl_infos.update(move_ttl_infos); From 618a39cc8cf6c365a76112a99ac362b7967ce401 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Fri, 29 Nov 2019 10:00:43 +0300 Subject: [PATCH 39/51] Don't move anything if part already belongs their destination. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 149 +++++++++--------- dbms/src/Storages/MergeTree/MergeTreeData.h | 15 +- .../MergeTree/MergeTreePartsMover.cpp | 20 ++- dbms/src/Storages/StorageMergeTree.cpp | 2 +- .../Storages/StorageReplicatedMergeTree.cpp | 2 +- 5 files changed, 98 insertions(+), 90 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 03dfa8a64d8..f8998823a05 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -3153,73 +3153,95 @@ DiskSpace::ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size) cons DiskSpace::ReservationPtr MergeTreeData::reserveSpacePreferringMoveDestination(UInt64 expected_size, const MergeTreeDataPart::TTLInfos & ttl_infos, - time_t minimum_time) const + time_t time_of_move) const { expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); - auto reservation = tryReserveSpaceOnMoveDestination(expected_size, ttl_infos, minimum_time); - if (reservation) - return reservation; + DiskSpace::ReservationPtr reservation; + + auto ttl_entry = selectMoveDestination(ttl_infos, time_of_move); + if (ttl_entry != nullptr) + { + DiskSpace::SpacePtr destination_ptr = ttl_entry->getDestination(storage_policy); + if (!destination_ptr) + { + if (ttl_entry->destination_type == PartDestinationType::VOLUME) + LOG_WARNING(log, "Would like to reserve space on volume '" + << ttl_entry->destination_name << "' by TTL rule of table '" + << log_name << "' but volume was not found"); + else if (ttl_entry->destination_type == PartDestinationType::DISK) + LOG_WARNING(log, "Would like to reserve space on disk '" + << ttl_entry->destination_name << "' by TTL rule of table '" + << log_name << "' but disk was not found"); + } + else + { + reservation = destination_ptr->reserve(expected_size); + if (reservation) + return reservation; + } + } reservation = storage_policy->reserve(expected_size); return returnReservationOrThrowError(expected_size, std::move(reservation)); } -DiskSpace::ReservationPtr MergeTreeData::tryReserveSpaceOnMoveDestination(UInt64 expected_size, - const MergeTreeDataPart::TTLInfos & ttl_infos, - time_t minimum_time) const +DiskSpace::ReservationPtr MergeTreeData::reserveSpaceInSpecificSpace(UInt64 expected_size, DiskSpace::SpacePtr space) const { expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); - auto ttl_entry = selectMoveDestination(ttl_infos, minimum_time); - if (ttl_entry != nullptr) - { - DiskSpace::ReservationPtr reservation; - if (ttl_entry->destination_type == PartDestinationType::VOLUME) - { - auto volume_ptr = storage_policy->getVolumeByName(ttl_entry->destination_name); - if (volume_ptr) - { - reservation = volume_ptr->reserve(expected_size); - } - else - { - LOG_WARNING(log, "Would like to reserve space on volume '" - << ttl_entry->destination_name << "' by TTL rule of table '" - << log_name << "' but volume was not found"); - } - } - else if (ttl_entry->destination_type == PartDestinationType::DISK) - { - auto disk_ptr = storage_policy->getDiskByName(ttl_entry->destination_name); - if (disk_ptr) - { - reservation = disk_ptr->reserve(expected_size); - } - else - { - LOG_WARNING(log, "Would like to reserve space on disk '" - << ttl_entry->destination_name << "' by TTL rule of table '" - << log_name << "' but disk was not found"); - } - } - if (reservation) - return reservation; - } - - return {}; -} - -DiskSpace::ReservationPtr MergeTreeData::reserveSpaceOnSpecificDisk(UInt64 expected_size, DiskSpace::DiskPtr disk) const -{ - expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); - - auto reservation = disk->reserve(expected_size); + auto reservation = space->reserve(expected_size); return returnReservationOrThrowError(expected_size, std::move(reservation)); } +DiskSpace::SpacePtr MergeTreeData::TTLEntry::getDestination(const DiskSpace::StoragePolicyPtr & storage_policy) const +{ + if (destination_type == PartDestinationType::VOLUME) + return storage_policy->getVolumeByName(destination_name); + else if (destination_type == PartDestinationType::DISK) + return storage_policy->getDiskByName(destination_name); + else + return {}; +} + +bool MergeTreeData::TTLEntry::isPartInDestination(const DiskSpace::StoragePolicyPtr & storage_policy, const MergeTreeDataPart & part) const +{ + if (destination_type == PartDestinationType::VOLUME) + { + for (const auto & disk : storage_policy->getVolumeByName(destination_name)->disks) + if (disk->getName() == part.disk->getName()) + return true; + } + else if (destination_type == PartDestinationType::DISK) + return storage_policy->getDiskByName(destination_name)->getName() == part.disk->getName(); + return false; +} + +const MergeTreeData::TTLEntry * MergeTreeData::selectMoveDestination( + const MergeTreeDataPart::TTLInfos & ttl_infos, + time_t time_of_move) const +{ + const MergeTreeData::TTLEntry * result = nullptr; + /// Prefer TTL rule which went into action last. + time_t max_max_ttl = 0; + + for (const auto & ttl_entry : move_ttl_entries) + { + auto ttl_info_it = ttl_infos.moves_ttl.find(ttl_entry.result_column); + if (ttl_info_it != ttl_infos.moves_ttl.end() + && ttl_info_it->second.max <= time_of_move + && max_max_ttl >= ttl_info_it->second.max) + { + result = &ttl_entry; + max_max_ttl = ttl_info_it->second.max; + } + } + + return result; +} + MergeTreeData::DataParts MergeTreeData::getDataParts(const DataPartStates & affordable_states) const { DataParts res; @@ -3399,7 +3421,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::cloneAndLoadDataPartOnSameDisk( String dst_part_name = src_part->getNewName(dst_part_info); String tmp_dst_part_name = tmp_part_prefix + dst_part_name; - auto reservation = reserveSpaceOnSpecificDisk(src_part->bytes_on_disk, src_part->disk); + auto reservation = reserveSpaceInSpecificSpace(src_part->bytes_on_disk, src_part->disk); String dst_part_path = getFullPathOnDisk(reservation->getDisk()); Poco::Path dst_part_absolute_path = Poco::Path(dst_part_path + tmp_dst_part_name).absolute(); Poco::Path src_part_absolute_path = Poco::Path(src_part->getFullPath()).absolute(); @@ -3722,27 +3744,4 @@ bool MergeTreeData::moveParts(CurrentlyMovingPartsTagger && moving_tagger) return true; } -const MergeTreeData::TTLEntry * MergeTreeData::selectMoveDestination( - const MergeTreeDataPart::TTLInfos & ttl_infos, - time_t minimum_time) const -{ - const MergeTreeData::TTLEntry * result = nullptr; - /// Prefer TTL rule which went into action last. - time_t max_max_ttl = 0; - - for (const auto & ttl_entry : move_ttl_entries) - { - auto ttl_info_it = ttl_infos.moves_ttl.find(ttl_entry.result_column); - if (ttl_info_it != ttl_infos.moves_ttl.end() - && ttl_info_it->second.max <= minimum_time - && max_max_ttl >= ttl_info_it->second.max) - { - result = &ttl_entry; - max_max_ttl = ttl_info_it->second.max; - } - } - - return result; -} - } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index e6f1ac85fc3..eb52e4cea4a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -678,11 +678,8 @@ public: DiskSpace::ReservationPtr reserveSpace(UInt64 expected_size) const; DiskSpace::ReservationPtr reserveSpacePreferringMoveDestination(UInt64 expected_size, const MergeTreeDataPart::TTLInfos & ttl_infos, - time_t minimum_time) const; - DiskSpace::ReservationPtr tryReserveSpaceOnMoveDestination(UInt64 expected_size, - const MergeTreeDataPart::TTLInfos & ttl_infos, - time_t minimum_time) const; - DiskSpace::ReservationPtr reserveSpaceOnSpecificDisk(UInt64 expected_size, DiskSpace::DiskPtr disk) const; + time_t time_of_move) const; + DiskSpace::ReservationPtr reserveSpaceInSpecificSpace(UInt64 expected_size, DiskSpace::SpacePtr space) const; /// Choose disk with max available free space /// Reserves 0 bytes @@ -733,8 +730,13 @@ public: String destination_name; ASTPtr entry_ast; + + DiskSpace::SpacePtr getDestination(const DiskSpace::StoragePolicyPtr & storage_policy) const; + bool isPartInDestination(const DiskSpace::StoragePolicyPtr & storage_policy, const MergeTreeDataPart & part) const; }; + const TTLEntry * selectMoveDestination(const MergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const; + using TTLEntriesByName = std::unordered_map; TTLEntriesByName column_ttl_entries_by_name; @@ -978,9 +980,6 @@ private: /// Check selected parts for movements. Used by ALTER ... MOVE queries. CurrentlyMovingPartsTagger checkPartsForMove(const DataPartsVector & parts, DiskSpace::SpacePtr space); - - const MergeTreeData::TTLEntry * selectMoveDestination(const MergeTreeDataPart::TTLInfos & ttl_infos, - time_t minimum_time) const; }; } diff --git a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp index 5772ff6079f..60a6cbdd3f2 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -114,15 +114,25 @@ bool MergeTreePartsMover::selectPartsForMove( } } + time_t time_of_move = time(nullptr); + for (const auto & part : data_parts) { String reason; - /// Don't report message to log, because logging is excessive + /// Don't report message to log, because logging is excessive. if (!can_move(part, &reason)) continue; - auto reservation = part->storage.tryReserveSpaceOnMoveDestination(part->bytes_on_disk, part->ttl_infos, time(nullptr)); + const MergeTreeData::TTLEntry * ttl_entry_ptr = part->storage.selectMoveDestination(part->ttl_infos, time_of_move); auto to_insert = need_to_move.find(part->disk); + DiskSpace::ReservationPtr reservation; + if (ttl_entry_ptr) + { + auto destination = ttl_entry_ptr->getDestination(policy); + if (destination && !ttl_entry_ptr->isPartInDestination(policy, *part)) + reservation = part->storage.reserveSpaceInSpecificSpace(part->bytes_on_disk, ttl_entry_ptr->getDestination(policy)); + } + if (reservation) { parts_to_move.emplace_back(part, std::move(reservation)); @@ -149,9 +159,9 @@ bool MergeTreePartsMover::selectPartsForMove( auto reservation = policy->reserve(part->bytes_on_disk, min_volume_index); if (!reservation) { - /// Next parts to move from this disk has greater size and same min volume index - /// There are no space for them - /// But it can be possible to move data from other disks + /// Next parts to move from this disk has greater size and same min volume index. + /// There are no space for them. + /// But it can be possible to move data from other disks. break; } parts_to_move.emplace_back(part, std::move(reservation)); diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 0de70784328..a8c91a58c60 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -350,7 +350,7 @@ public: /// if we mutate part, than we should reserve space on the same disk, because mutations possible can create hardlinks if (is_mutation) - reserved_space = storage.reserveSpaceOnSpecificDisk(total_size, future_part_.parts[0]->disk); + reserved_space = storage.reserveSpaceInSpecificSpace(total_size, future_part_.parts[0]->disk); else { MergeTreeDataPart::TTLInfos ttl_infos; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index b1e2fe01a56..cad4dacdfa8 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -1147,7 +1147,7 @@ bool StorageReplicatedMergeTree::tryExecutePartMutation(const StorageReplicatedM /// Once we mutate part, we must reserve space on the same disk, because mutations can possibly create hardlinks. /// Can throw an exception. - DiskSpace::ReservationPtr reserved_space = reserveSpaceOnSpecificDisk(estimated_space_for_result, source_part->disk); + DiskSpace::ReservationPtr reserved_space = reserveSpaceInSpecificSpace(estimated_space_for_result, source_part->disk); auto table_lock = lockStructureForShare(false, RWLockImpl::NO_QUERY); From f83b28a1c1d025f0858dc5e05eab1acc13946620 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Fri, 29 Nov 2019 10:21:44 +0300 Subject: [PATCH 40/51] Fixed a typo in `MergeTreeData::selectMoveDestination()`. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index f8998823a05..a05910e313c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -3232,7 +3232,7 @@ const MergeTreeData::TTLEntry * MergeTreeData::selectMoveDestination( auto ttl_info_it = ttl_infos.moves_ttl.find(ttl_entry.result_column); if (ttl_info_it != ttl_infos.moves_ttl.end() && ttl_info_it->second.max <= time_of_move - && max_max_ttl >= ttl_info_it->second.max) + && max_max_ttl <= ttl_info_it->second.max) { result = &ttl_entry; max_max_ttl = ttl_info_it->second.max; From 67aa268bbbd280be2f3414d9d05da660d3b8201e Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Sat, 30 Nov 2019 22:22:01 +0300 Subject: [PATCH 41/51] Finally fixed tests and logic for extended TTL syntax. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 22 +++- dbms/src/Storages/MergeTree/MergeTreeData.h | 4 + .../MergeTree/MergeTreePartsMover.cpp | 2 +- dbms/src/Storages/StorageMergeTree.cpp | 4 +- .../config.d/storage_configuration.xml | 12 ++ dbms/tests/integration/test_ttl_move/test.py | 124 +++++++++++++----- 6 files changed, 128 insertions(+), 40 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index a05910e313c..4d659acc15a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -3157,6 +3157,17 @@ DiskSpace::ReservationPtr MergeTreeData::reserveSpacePreferringMoveDestination(U { expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); + DiskSpace::ReservationPtr reservation = tryReserveSpacePreferringMoveDestination(expected_size, ttl_infos, time_of_move); + + return returnReservationOrThrowError(expected_size, std::move(reservation)); +} + +DiskSpace::ReservationPtr MergeTreeData::tryReserveSpacePreferringMoveDestination(UInt64 expected_size, + const MergeTreeDataPart::TTLInfos & ttl_infos, + time_t time_of_move) const +{ + expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); + DiskSpace::ReservationPtr reservation; auto ttl_entry = selectMoveDestination(ttl_infos, time_of_move); @@ -3184,18 +3195,25 @@ DiskSpace::ReservationPtr MergeTreeData::reserveSpacePreferringMoveDestination(U reservation = storage_policy->reserve(expected_size); - return returnReservationOrThrowError(expected_size, std::move(reservation)); + return reservation; } DiskSpace::ReservationPtr MergeTreeData::reserveSpaceInSpecificSpace(UInt64 expected_size, DiskSpace::SpacePtr space) const { expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); - auto reservation = space->reserve(expected_size); + auto reservation = tryReserveSpaceInSpecificSpace(expected_size, space); return returnReservationOrThrowError(expected_size, std::move(reservation)); } +DiskSpace::ReservationPtr MergeTreeData::tryReserveSpaceInSpecificSpace(UInt64 expected_size, DiskSpace::SpacePtr space) const +{ + expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); + + return space->reserve(expected_size); +} + DiskSpace::SpacePtr MergeTreeData::TTLEntry::getDestination(const DiskSpace::StoragePolicyPtr & storage_policy) const { if (destination_type == PartDestinationType::VOLUME) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index eb52e4cea4a..47093da2ccc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -679,7 +679,11 @@ public: DiskSpace::ReservationPtr reserveSpacePreferringMoveDestination(UInt64 expected_size, const MergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const; + DiskSpace::ReservationPtr tryReserveSpacePreferringMoveDestination(UInt64 expected_size, + const MergeTreeDataPart::TTLInfos & ttl_infos, + time_t time_of_move) const; DiskSpace::ReservationPtr reserveSpaceInSpecificSpace(UInt64 expected_size, DiskSpace::SpacePtr space) const; + DiskSpace::ReservationPtr tryReserveSpaceInSpecificSpace(UInt64 expected_size, DiskSpace::SpacePtr space) const; /// Choose disk with max available free space /// Reserves 0 bytes diff --git a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp index 60a6cbdd3f2..de987acac72 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -130,7 +130,7 @@ bool MergeTreePartsMover::selectPartsForMove( { auto destination = ttl_entry_ptr->getDestination(policy); if (destination && !ttl_entry_ptr->isPartInDestination(policy, *part)) - reservation = part->storage.reserveSpaceInSpecificSpace(part->bytes_on_disk, ttl_entry_ptr->getDestination(policy)); + reservation = part->storage.tryReserveSpaceInSpecificSpace(part->bytes_on_disk, ttl_entry_ptr->getDestination(policy)); } if (reservation) diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index a8c91a58c60..4e4bea7b023 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -350,7 +350,7 @@ public: /// if we mutate part, than we should reserve space on the same disk, because mutations possible can create hardlinks if (is_mutation) - reserved_space = storage.reserveSpaceInSpecificSpace(total_size, future_part_.parts[0]->disk); + reserved_space = storage.tryReserveSpaceInSpecificSpace(total_size, future_part_.parts[0]->disk); else { MergeTreeDataPart::TTLInfos ttl_infos; @@ -358,7 +358,7 @@ public: { ttl_infos.update(part_ptr->ttl_infos); } - reserved_space = storage.reserveSpacePreferringMoveDestination(total_size, ttl_infos, time(nullptr)); + reserved_space = storage.tryReserveSpacePreferringMoveDestination(total_size, ttl_infos, time(nullptr)); } if (!reserved_space) { diff --git a/dbms/tests/integration/test_ttl_move/configs/config.d/storage_configuration.xml b/dbms/tests/integration/test_ttl_move/configs/config.d/storage_configuration.xml index 454b78ec216..b48de85007a 100644 --- a/dbms/tests/integration/test_ttl_move/configs/config.d/storage_configuration.xml +++ b/dbms/tests/integration/test_ttl_move/configs/config.d/storage_configuration.xml @@ -28,6 +28,18 @@ + + +
+ jbod1 + jbod2 +
+ + external + +
+
+
diff --git a/dbms/tests/integration/test_ttl_move/test.py b/dbms/tests/integration/test_ttl_move/test.py index 85d9eb894af..f35c5409841 100644 --- a/dbms/tests/integration/test_ttl_move/test.py +++ b/dbms/tests/integration/test_ttl_move/test.py @@ -3,6 +3,7 @@ import pytest import random import re import string +import threading import time from multiprocessing.dummy import Pool from helpers.client import QueryRuntimeException @@ -38,7 +39,11 @@ def started_cluster(): def get_random_string(length): - return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(length)) + symbols = bytes(string.ascii_uppercase + string.digits) + result_list = bytearray([0])*length + for i in range(length): + result_list[i] = random.choice(symbols) + return str(result_list) def get_used_disks_for_table(node, table_name): @@ -65,7 +70,7 @@ def test_inserts_to_disk_work(started_cluster, name, engine, positive): data = [] # 10MB in total for i in range(10): - data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()-2 if i > 0 or positive else time.time()+2))) # 1MB row + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()-1 if i > 0 or positive else time.time()+300))) # 1MB row node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) used_disks = get_used_disks_for_table(node1, name) @@ -95,15 +100,25 @@ def test_moves_to_disk_work(started_cluster, name, engine, positive): SETTINGS storage_policy='small_jbod_with_external' """.format(name=name, engine=engine)) + wait_expire_1 = 6 + wait_expire_2 = 4 + time_1 = time.time() + wait_expire_1 + time_2 = time.time() + wait_expire_1 + wait_expire_2 + + wait_expire_1_thread = threading.Thread(target=time.sleep, args=(wait_expire_1,)) + wait_expire_1_thread.start() + data = [] # 10MB in total for i in range(10): - data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2 if i > 0 or positive else time.time()+6))) # 1MB row + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time_1 if i > 0 or positive else time_2))) # 1MB row node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"jbod1"} - time.sleep(4) + wait_expire_1_thread.join() + time.sleep(wait_expire_2/2) + used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"external" if positive else "jbod1"} @@ -115,7 +130,7 @@ def test_moves_to_disk_work(started_cluster, name, engine, positive): @pytest.mark.parametrize("name,engine", [ ("mt_test_moves_to_volume_work","MergeTree()"), - ("replicated_mt_test_moves_to_volume_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_volume_work', '1')",), + ("replicated_mt_test_moves_to_volume_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_volume_work', '1')"), ]) def test_moves_to_volume_work(started_cluster, name, engine): try: @@ -127,21 +142,29 @@ def test_moves_to_volume_work(started_cluster, name, engine): ) ENGINE = {engine} ORDER BY tuple() PARTITION BY p1 - TTL d1 TO VOLUME 'main' - SETTINGS storage_policy='external_with_jbods' + TTL d1 TO VOLUME 'external' + SETTINGS storage_policy='jbods_with_external' """.format(name=name, engine=engine)) + wait_expire_1 = 10 + time_1 = time.time() + wait_expire_1 + + wait_expire_1_thread = threading.Thread(target=time.sleep, args=(wait_expire_1,)) + wait_expire_1_thread.start() + for p in range(2): data = [] # 20MB in total for i in range(10): - data.append((str(p), "'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2))) # 1MB row + data.append((str(p), "'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time_1))) # 1MB row node1.query("INSERT INTO {} (p1, s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {'jbod1', 'jbod2'} - time.sleep(4) + wait_expire_1_thread.join() + time.sleep(1) + used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"external"} @@ -151,11 +174,13 @@ def test_moves_to_volume_work(started_cluster, name, engine): node1.query("DROP TABLE IF EXISTS {}".format(name)) -@pytest.mark.parametrize("name,engine", [ - ("mt_test_inserts_to_volume_work","MergeTree()"), - ("replicated_mt_test_inserts_to_volume_work","ReplicatedMergeTree('/clickhouse/replicated_test_inserts_to_volume_work', '1')",), +@pytest.mark.parametrize("name,engine,positive", [ + ("mt_test_inserts_to_volume_do_not_work","MergeTree()",0), + ("replicated_mt_test_inserts_to_volume_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_inserts_to_volume_do_not_work', '1')",0), + ("mt_test_inserts_to_volume_work","MergeTree()",1), + ("replicated_mt_test_inserts_to_volume_work","ReplicatedMergeTree('/clickhouse/replicated_test_inserts_to_volume_work', '1')",1), ]) -def test_inserts_to_volume_work(started_cluster, name, engine): +def test_inserts_to_volume_work(started_cluster, name, engine, positive): try: node1.query(""" CREATE TABLE {name} ( @@ -165,19 +190,21 @@ def test_inserts_to_volume_work(started_cluster, name, engine): ) ENGINE = {engine} ORDER BY tuple() PARTITION BY p1 - TTL d1 TO VOLUME 'main' - SETTINGS storage_policy='external_with_jbods' + TTL d1 TO VOLUME 'external' + SETTINGS storage_policy='small_jbod_with_external' """.format(name=name, engine=engine)) + node1.query("SYSTEM STOP MOVES {name}".format(name=name)) + for p in range(2): data = [] # 20MB in total for i in range(10): - data.append((str(p), "'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()-2))) # 1MB row + data.append((str(p), "'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()-1 if i > 0 or positive else time.time()+300))) # 1MB row node1.query("INSERT INTO {} (p1, s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) used_disks = get_used_disks_for_table(node1, name) - assert set(used_disks) == {"external"} + assert set(used_disks) == {"external" if positive else "jbod1"} assert node1.query("SELECT count() FROM {name}".format(name=name)).strip() == "20" @@ -187,7 +214,7 @@ def test_inserts_to_volume_work(started_cluster, name, engine): @pytest.mark.parametrize("name,engine", [ ("mt_test_moves_to_disk_eventually_work","MergeTree()"), - ("replicated_mt_test_moves_to_disk_eventually_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_disk_eventually_work', '1')",), + ("replicated_mt_test_moves_to_disk_eventually_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_disk_eventually_work', '1')"), ]) def test_moves_to_disk_eventually_work(started_cluster, name, engine): try: @@ -221,7 +248,7 @@ def test_moves_to_disk_eventually_work(started_cluster, name, engine): data = [] # 10MB in total for i in range(10): - data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()-2))) # 1MB row + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()-1))) # 1MB row node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) used_disks = get_used_disks_for_table(node1, name) @@ -261,18 +288,28 @@ def test_merges_to_disk_work(started_cluster, name, engine, positive): node1.query("SYSTEM STOP MERGES {}".format(name)) node1.query("SYSTEM STOP MOVES {}".format(name)) + wait_expire_1 = 10 + wait_expire_2 = 4 + time_1 = time.time() + wait_expire_1 + time_2 = time.time() + wait_expire_1 + wait_expire_2 + + wait_expire_1_thread = threading.Thread(target=time.sleep, args=(wait_expire_1,)) + wait_expire_1_thread.start() + for _ in range(2): data = [] # 16MB in total for i in range(8): - data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2 if i > 0 or positive else time.time()+7))) # 1MB row + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time_1 if i > 0 or positive else time_2))) # 1MB row node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) - time.sleep(4) used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"jbod1"} assert "2" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() + wait_expire_1_thread.join() + time.sleep(wait_expire_2/2) + node1.query("SYSTEM START MERGES {}".format(name)) node1.query("OPTIMIZE TABLE {}".format(name)) @@ -288,10 +325,10 @@ def test_merges_to_disk_work(started_cluster, name, engine, positive): @pytest.mark.parametrize("name,engine", [ - ("mt_test_merges_to_full_disk_work","MergeTree()"), - ("replicated_mt_test_merges_to_full_disk_work","ReplicatedMergeTree('/clickhouse/replicated_test_merges_to_full_disk_work', '1')",), + ("mt_test_merges_with_full_disk_work","MergeTree()"), + ("replicated_mt_test_merges_with_full_disk_work","ReplicatedMergeTree('/clickhouse/replicated_test_merges_with_full_disk_work', '1')"), ]) -def test_merges_to_full_disk_work(started_cluster, name, engine): +def test_merges_with_full_disk_work(started_cluster, name, engine): try: name_temp = name + "_temp" @@ -317,36 +354,42 @@ def test_merges_to_full_disk_work(started_cluster, name, engine): d1 DateTime ) ENGINE = {engine} ORDER BY tuple() - TTL d1 TO DISK 'external' - SETTINGS storage_policy='small_jbod_with_external' + TTL d1 TO DISK 'jbod2' + SETTINGS storage_policy='jbod1_with_jbod2' """.format(name=name, engine=engine)) - node1.query("SYSTEM STOP MOVES {}".format(name)) + wait_expire_1 = 10 + time_1 = time.time() + wait_expire_1 + + wait_expire_1_thread = threading.Thread(target=time.sleep, args=(wait_expire_1,)) + wait_expire_1_thread.start() for _ in range(2): - data = [] # 16MB in total - for i in range(8): - data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2))) # 1MB row - + data = [] # 12MB in total + for i in range(6): + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time_1))) # 1MB row node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"jbod1"} assert "2" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() - time.sleep(4) + wait_expire_1_thread.join() + node1.query("OPTIMIZE TABLE {}".format(name)) + time.sleep(1) used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"jbod1"} # Merged to the same disk against the rule. assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() - assert node1.query("SELECT count() FROM {name}".format(name=name)).strip() == "16" + assert node1.query("SELECT count() FROM {name}".format(name=name)).strip() == "12" finally: node1.query("DROP TABLE IF EXISTS {}".format(name_temp)) node1.query("DROP TABLE IF EXISTS {}".format(name)) + @pytest.mark.parametrize("name,engine,positive", [ ("mt_test_moves_after_merges_do_not_work","MergeTree()",0), ("replicated_mt_test_moves_after_merges_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_after_merges_do_not_work', '1')",0), @@ -365,19 +408,30 @@ def test_moves_after_merges_work(started_cluster, name, engine, positive): SETTINGS storage_policy='small_jbod_with_external' """.format(name=name, engine=engine)) + wait_expire_1 = 10 + wait_expire_2 = 4 + time_1 = time.time() + wait_expire_1 + time_2 = time.time() + wait_expire_1 + wait_expire_2 + + wait_expire_1_thread = threading.Thread(target=time.sleep, args=(wait_expire_1,)) + wait_expire_1_thread.start() + for _ in range(2): data = [] # 16MB in total for i in range(8): - data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time.time()+2 if i > 0 or positive else time.time()+6))) # 1MB row + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time_1 if i > 0 or positive else time_2))) # 1MB row node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) node1.query("OPTIMIZE TABLE {}".format(name)) + time.sleep(1) + used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"jbod1"} assert "1" == node1.query("SELECT count() FROM system.parts WHERE table = '{}' AND active = 1".format(name)).strip() - time.sleep(4) + wait_expire_1_thread.join() + time.sleep(wait_expire_2/2) used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"external" if positive else "jbod1"} From e3ed311273f8c9905a658148f852b3c2188c4795 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Sun, 1 Dec 2019 08:52:53 +0300 Subject: [PATCH 42/51] Fixed Clang build. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 12 ++++++------ dbms/src/Storages/MergeTree/MergeTreeData.h | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 4d659acc15a..fa3e9112297 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -3214,26 +3214,26 @@ DiskSpace::ReservationPtr MergeTreeData::tryReserveSpaceInSpecificSpace(UInt64 e return space->reserve(expected_size); } -DiskSpace::SpacePtr MergeTreeData::TTLEntry::getDestination(const DiskSpace::StoragePolicyPtr & storage_policy) const +DiskSpace::SpacePtr MergeTreeData::TTLEntry::getDestination(const DiskSpace::StoragePolicyPtr & policy) const { if (destination_type == PartDestinationType::VOLUME) - return storage_policy->getVolumeByName(destination_name); + return policy->getVolumeByName(destination_name); else if (destination_type == PartDestinationType::DISK) - return storage_policy->getDiskByName(destination_name); + return policy->getDiskByName(destination_name); else return {}; } -bool MergeTreeData::TTLEntry::isPartInDestination(const DiskSpace::StoragePolicyPtr & storage_policy, const MergeTreeDataPart & part) const +bool MergeTreeData::TTLEntry::isPartInDestination(const DiskSpace::StoragePolicyPtr & policy, const MergeTreeDataPart & part) const { if (destination_type == PartDestinationType::VOLUME) { - for (const auto & disk : storage_policy->getVolumeByName(destination_name)->disks) + for (const auto & disk : policy->getVolumeByName(destination_name)->disks) if (disk->getName() == part.disk->getName()) return true; } else if (destination_type == PartDestinationType::DISK) - return storage_policy->getDiskByName(destination_name)->getName() == part.disk->getName(); + return policy->getDiskByName(destination_name)->getName() == part.disk->getName(); return false; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 47093da2ccc..3525e38d2bc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -735,8 +735,8 @@ public: ASTPtr entry_ast; - DiskSpace::SpacePtr getDestination(const DiskSpace::StoragePolicyPtr & storage_policy) const; - bool isPartInDestination(const DiskSpace::StoragePolicyPtr & storage_policy, const MergeTreeDataPart & part) const; + DiskSpace::SpacePtr getDestination(const DiskSpace::StoragePolicyPtr & policy) const; + bool isPartInDestination(const DiskSpace::StoragePolicyPtr & policy, const MergeTreeDataPart & part) const; }; const TTLEntry * selectMoveDestination(const MergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const; From 065b681a8c588df2a8c8b78531e1729c6a3fd6cd Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Sun, 1 Dec 2019 09:04:06 +0300 Subject: [PATCH 43/51] Boosted `test_ttl_move::test_moves_to_volume_work` a little. --- dbms/tests/integration/test_ttl_move/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/tests/integration/test_ttl_move/test.py b/dbms/tests/integration/test_ttl_move/test.py index f35c5409841..129a1d411bd 100644 --- a/dbms/tests/integration/test_ttl_move/test.py +++ b/dbms/tests/integration/test_ttl_move/test.py @@ -153,8 +153,8 @@ def test_moves_to_volume_work(started_cluster, name, engine): wait_expire_1_thread.start() for p in range(2): - data = [] # 20MB in total - for i in range(10): + data = [] # 10MB in total + for i in range(5): data.append((str(p), "'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time_1))) # 1MB row node1.query("INSERT INTO {} (p1, s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) @@ -168,7 +168,7 @@ def test_moves_to_volume_work(started_cluster, name, engine): used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"external"} - assert node1.query("SELECT count() FROM {name}".format(name=name)).strip() == "20" + assert node1.query("SELECT count() FROM {name}".format(name=name)).strip() == "10" finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) From 7fd1668fde8c25db251a236084cc2a2c02bbfb79 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 5 Dec 2019 11:05:07 +0300 Subject: [PATCH 44/51] Review fixes. --- dbms/src/Interpreters/Context.cpp | 15 +++++- .../MergeTree/BackgroundProcessingPool.cpp | 27 ++++------ .../MergeTree/BackgroundProcessingPool.h | 27 ++++++---- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 54 ++++++++++--------- dbms/src/Storages/MergeTree/MergeTreeData.h | 15 ++++-- .../MergeTree/MergeTreePartsMover.cpp | 2 +- dbms/src/Storages/StorageMergeTree.cpp | 2 +- .../Storages/StorageReplicatedMergeTree.cpp | 2 +- .../configs/config.d/instant_moves.xml | 4 +- 9 files changed, 84 insertions(+), 64 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index e8ba306aecb..9d54bc285e3 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1500,7 +1500,7 @@ BackgroundProcessingPool & Context::getBackgroundPool() { auto lock = getLock(); if (!shared->background_pool) - shared->background_pool.emplace(settings.background_pool_size, getConfigRef()); + shared->background_pool.emplace(settings.background_pool_size); return *shared->background_pool; } @@ -1508,7 +1508,18 @@ BackgroundProcessingPool & Context::getBackgroundMovePool() { auto lock = getLock(); if (!shared->background_move_pool) - shared->background_move_pool.emplace(settings.background_move_pool_size, getConfigRef(), "BackgroundMovePool", "BgMoveProcPool"); + { + BackgroundProcessingPool::PoolSettings pool_settings; + auto & config = getConfigRef(); + pool_settings.thread_sleep_seconds = config.getDouble("background_move_processing_pool_thread_sleep_seconds", 10); + pool_settings.thread_sleep_seconds_random_part = config.getDouble("background_move_processing_pool_thread_sleep_seconds_random_part", 1.0); + pool_settings.thread_sleep_seconds_if_nothing_to_do = config.getDouble("background_move_processing_pool_thread_sleep_seconds_if_nothing_to_do", 0.1); + pool_settings.task_sleep_seconds_when_no_work_min = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_min", 10); + pool_settings.task_sleep_seconds_when_no_work_max = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_max", 600); + pool_settings.task_sleep_seconds_when_no_work_multiplier = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_multiplier", 1.1); + pool_settings.task_sleep_seconds_when_no_work_random_part = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_random_part", 1.0); + shared->background_move_pool.emplace(settings.background_move_pool_size, pool_settings, "BackgroundMovePool", "BgMoveProcPool"); + } return *shared->background_move_pool; } diff --git a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp index 9a4aa1d9dca..44b83399afc 100644 --- a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp +++ b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp @@ -51,23 +51,16 @@ void BackgroundProcessingPoolTaskInfo::wake() BackgroundProcessingPool::BackgroundProcessingPool(int size_, - const Poco::Util::AbstractConfiguration & config, + const PoolSettings & pool_settings, const char * log_name, const char * thread_name_) : size(size_) , thread_name(thread_name_) + , settings(pool_settings) { logger = &Logger::get(log_name); LOG_INFO(logger, "Create " << log_name << " with " << size << " threads"); - thread_sleep_seconds = config.getDouble("background_processing_pool_thread_sleep_seconds", 10); - thread_sleep_seconds_random_part = config.getDouble("background_processing_pool_thread_sleep_seconds_random_part", 1.0); - thread_sleep_seconds_if_nothing_to_do = config.getDouble("background_processing_pool_thread_sleep_seconds_if_nothing_to_do", 0.1); - task_sleep_seconds_when_no_work_min = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_min", 10); - task_sleep_seconds_when_no_work_max = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_max", 600); - task_sleep_seconds_when_no_work_multiplier = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_multiplier", 1.1); - task_sleep_seconds_when_no_work_random_part = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_random_part", 1.0); - threads.resize(size); for (auto & thread : threads) thread = ThreadFromGlobalPool([this] { threadFunction(); }); @@ -147,7 +140,7 @@ void BackgroundProcessingPool::threadFunction() memory_tracker->setMetric(CurrentMetrics::MemoryTrackingInBackgroundProcessingPool); pcg64 rng(randomSeed()); - std::this_thread::sleep_for(std::chrono::duration(std::uniform_real_distribution(0, thread_sleep_seconds_random_part)(rng))); + std::this_thread::sleep_for(std::chrono::duration(std::uniform_real_distribution(0, settings.thread_sleep_seconds_random_part)(rng))); while (!shutdown) { @@ -182,8 +175,8 @@ void BackgroundProcessingPool::threadFunction() { std::unique_lock lock(tasks_mutex); wake_event.wait_for(lock, - std::chrono::duration(thread_sleep_seconds - + std::uniform_real_distribution(0, thread_sleep_seconds_random_part)(rng))); + std::chrono::duration(settings.thread_sleep_seconds + + std::uniform_real_distribution(0, settings.thread_sleep_seconds_random_part)(rng))); continue; } @@ -193,7 +186,7 @@ void BackgroundProcessingPool::threadFunction() { std::unique_lock lock(tasks_mutex); wake_event.wait_for(lock, std::chrono::microseconds( - min_time - current_time + std::uniform_int_distribution(0, thread_sleep_seconds_random_part * 1000000)(rng))); + min_time - current_time + std::uniform_int_distribution(0, settings.thread_sleep_seconds_random_part * 1000000)(rng))); } std::shared_lock rlock(task->rwlock); @@ -231,11 +224,11 @@ void BackgroundProcessingPool::threadFunction() Poco::Timestamp next_time_to_execute; /// current time if (task_result == TaskResult::ERROR) next_time_to_execute += 1000000 * (std::min( - task_sleep_seconds_when_no_work_max, - task_sleep_seconds_when_no_work_min * std::pow(task_sleep_seconds_when_no_work_multiplier, task->count_no_work_done)) - + std::uniform_real_distribution(0, task_sleep_seconds_when_no_work_random_part)(rng)); + settings.task_sleep_seconds_when_no_work_max, + settings.task_sleep_seconds_when_no_work_min * std::pow(settings.task_sleep_seconds_when_no_work_multiplier, task->count_no_work_done)) + + std::uniform_real_distribution(0, settings.task_sleep_seconds_when_no_work_random_part)(rng)); else if (task_result == TaskResult::NOTHING_TO_DO) - next_time_to_execute += 1000000 * thread_sleep_seconds_if_nothing_to_do; + next_time_to_execute += 1000000 * settings.thread_sleep_seconds_if_nothing_to_do; tasks.erase(task->iterator); task->iterator = tasks.emplace(next_time_to_execute, task); diff --git a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h index 9929e380f25..619e267ffe5 100644 --- a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h +++ b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h @@ -47,8 +47,23 @@ public: using TaskHandle = std::shared_ptr; + struct PoolSettings + { + double thread_sleep_seconds = 10; + double thread_sleep_seconds_random_part = 1.0; + double thread_sleep_seconds_if_nothing_to_do = 0.1; + + /// For exponential backoff. + double task_sleep_seconds_when_no_work_min = 10; + double task_sleep_seconds_when_no_work_max = 600; + double task_sleep_seconds_when_no_work_multiplier = 1.1; + double task_sleep_seconds_when_no_work_random_part = 1.0; + + PoolSettings() noexcept {} + }; + BackgroundProcessingPool(int size_, - const Poco::Util::AbstractConfiguration & config, + const PoolSettings & pool_settings = {}, const char * log_name = "BackgroundProcessingPool", const char * thread_name_ = "BackgrProcPool"); @@ -88,15 +103,7 @@ protected: void threadFunction(); private: - double thread_sleep_seconds; - double thread_sleep_seconds_random_part; - double thread_sleep_seconds_if_nothing_to_do; - - /// For exponential backoff. - double task_sleep_seconds_when_no_work_min; - double task_sleep_seconds_when_no_work_max; - double task_sleep_seconds_when_no_work_multiplier; - double task_sleep_seconds_when_no_work_random_part; + PoolSettings settings; }; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index fa3e9112297..e3cd882f47c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -572,15 +572,17 @@ void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const Strin void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new_column_ttls, const ASTPtr & new_ttl_table_ast, bool only_check) { - auto create_ttl_entry = [this](ASTPtr ttl_ast) -> TTLEntry + auto create_ttl_entry = [this](ASTPtr ttl_ast) { + TTLEntry result; + auto syntax_result = SyntaxAnalyzer(global_context).analyze(ttl_ast, getColumns().getAllPhysical()); - auto expr = ExpressionAnalyzer(ttl_ast, syntax_result, global_context).getActions(false); + result.expression = ExpressionAnalyzer(ttl_ast, syntax_result, global_context).getActions(false); + result.destination_type = PartDestinationType::DELETE; + result.result_column = ttl_ast->getColumnName(); - String result_column = ttl_ast->getColumnName(); - checkTTLExpression(expr, result_column); - - return {expr, result_column, PartDestinationType::DELETE, {}, {}}; + checkTTLExpression(result.expression, result.result_column); + return result; }; if (!new_column_ttls.empty()) @@ -3131,7 +3133,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const namespace { -inline DiskSpace::ReservationPtr returnReservationOrThrowError(UInt64 expected_size, DiskSpace::ReservationPtr reservation) +inline DiskSpace::ReservationPtr checkAndReturnReservation(UInt64 expected_size, DiskSpace::ReservationPtr reservation) { if (reservation) return reservation; @@ -3148,7 +3150,23 @@ DiskSpace::ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size) cons auto reservation = storage_policy->reserve(expected_size); - return returnReservationOrThrowError(expected_size, std::move(reservation)); + return checkAndReturnReservation(expected_size, std::move(reservation)); +} + +DiskSpace::ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size, DiskSpace::SpacePtr space) const +{ + expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); + + auto reservation = tryReserveSpace(expected_size, space); + + return checkAndReturnReservation(expected_size, std::move(reservation)); +} + +DiskSpace::ReservationPtr MergeTreeData::tryReserveSpace(UInt64 expected_size, DiskSpace::SpacePtr space) const +{ + expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); + + return space->reserve(expected_size); } DiskSpace::ReservationPtr MergeTreeData::reserveSpacePreferringMoveDestination(UInt64 expected_size, @@ -3159,7 +3177,7 @@ DiskSpace::ReservationPtr MergeTreeData::reserveSpacePreferringMoveDestination(U DiskSpace::ReservationPtr reservation = tryReserveSpacePreferringMoveDestination(expected_size, ttl_infos, time_of_move); - return returnReservationOrThrowError(expected_size, std::move(reservation)); + return checkAndReturnReservation(expected_size, std::move(reservation)); } DiskSpace::ReservationPtr MergeTreeData::tryReserveSpacePreferringMoveDestination(UInt64 expected_size, @@ -3198,22 +3216,6 @@ DiskSpace::ReservationPtr MergeTreeData::tryReserveSpacePreferringMoveDestinatio return reservation; } -DiskSpace::ReservationPtr MergeTreeData::reserveSpaceInSpecificSpace(UInt64 expected_size, DiskSpace::SpacePtr space) const -{ - expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); - - auto reservation = tryReserveSpaceInSpecificSpace(expected_size, space); - - return returnReservationOrThrowError(expected_size, std::move(reservation)); -} - -DiskSpace::ReservationPtr MergeTreeData::tryReserveSpaceInSpecificSpace(UInt64 expected_size, DiskSpace::SpacePtr space) const -{ - expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); - - return space->reserve(expected_size); -} - DiskSpace::SpacePtr MergeTreeData::TTLEntry::getDestination(const DiskSpace::StoragePolicyPtr & policy) const { if (destination_type == PartDestinationType::VOLUME) @@ -3439,7 +3441,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::cloneAndLoadDataPartOnSameDisk( String dst_part_name = src_part->getNewName(dst_part_info); String tmp_dst_part_name = tmp_part_prefix + dst_part_name; - auto reservation = reserveSpaceInSpecificSpace(src_part->bytes_on_disk, src_part->disk); + auto reservation = reserveSpace(src_part->bytes_on_disk, src_part->disk); String dst_part_path = getFullPathOnDisk(reservation->getDisk()); Poco::Path dst_part_absolute_path = Poco::Path(dst_part_path + tmp_dst_part_name).absolute(); Poco::Path src_part_absolute_path = Poco::Path(src_part->getFullPath()).absolute(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 3525e38d2bc..561572929e6 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -674,17 +674,21 @@ public: using PathsWithDisks = std::vector; PathsWithDisks getDataPathsWithDisks() const; - /// Reserves space at least 1MB + /// Reserves space at least 1MB. DiskSpace::ReservationPtr reserveSpace(UInt64 expected_size) const; + + /// Reserves space at least 1MB on specific disk or volume. + DiskSpace::ReservationPtr reserveSpace(UInt64 expected_size, DiskSpace::SpacePtr space) const; + DiskSpace::ReservationPtr tryReserveSpace(UInt64 expected_size, DiskSpace::SpacePtr space) const; + + + /// Reserves space at least 1MB preferring best destination according to `ttl_infos`. DiskSpace::ReservationPtr reserveSpacePreferringMoveDestination(UInt64 expected_size, const MergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const; DiskSpace::ReservationPtr tryReserveSpacePreferringMoveDestination(UInt64 expected_size, const MergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const; - DiskSpace::ReservationPtr reserveSpaceInSpecificSpace(UInt64 expected_size, DiskSpace::SpacePtr space) const; - DiskSpace::ReservationPtr tryReserveSpaceInSpecificSpace(UInt64 expected_size, DiskSpace::SpacePtr space) const; - /// Choose disk with max available free space /// Reserves 0 bytes DiskSpace::ReservationPtr makeEmptyReservationOnLargestDisk() { return storage_policy->makeEmptyReservationOnLargestDisk(); } @@ -735,7 +739,10 @@ public: ASTPtr entry_ast; + /// Returns destination disk or volume for this rule. DiskSpace::SpacePtr getDestination(const DiskSpace::StoragePolicyPtr & policy) const; + + /// Checks if given part already belongs destination disk or volume for this rule. bool isPartInDestination(const DiskSpace::StoragePolicyPtr & policy, const MergeTreeDataPart & part) const; }; diff --git a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp index de987acac72..5d07ea1a8a2 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -130,7 +130,7 @@ bool MergeTreePartsMover::selectPartsForMove( { auto destination = ttl_entry_ptr->getDestination(policy); if (destination && !ttl_entry_ptr->isPartInDestination(policy, *part)) - reservation = part->storage.tryReserveSpaceInSpecificSpace(part->bytes_on_disk, ttl_entry_ptr->getDestination(policy)); + reservation = part->storage.tryReserveSpace(part->bytes_on_disk, ttl_entry_ptr->getDestination(policy)); } if (reservation) diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 4e4bea7b023..82f0263ef95 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -350,7 +350,7 @@ public: /// if we mutate part, than we should reserve space on the same disk, because mutations possible can create hardlinks if (is_mutation) - reserved_space = storage.tryReserveSpaceInSpecificSpace(total_size, future_part_.parts[0]->disk); + reserved_space = storage.tryReserveSpace(total_size, future_part_.parts[0]->disk); else { MergeTreeDataPart::TTLInfos ttl_infos; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index cad4dacdfa8..43f195039cb 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -1147,7 +1147,7 @@ bool StorageReplicatedMergeTree::tryExecutePartMutation(const StorageReplicatedM /// Once we mutate part, we must reserve space on the same disk, because mutations can possibly create hardlinks. /// Can throw an exception. - DiskSpace::ReservationPtr reserved_space = reserveSpaceInSpecificSpace(estimated_space_for_result, source_part->disk); + DiskSpace::ReservationPtr reserved_space = reserveSpace(estimated_space_for_result, source_part->disk); auto table_lock = lockStructureForShare(false, RWLockImpl::NO_QUERY); diff --git a/dbms/tests/integration/test_ttl_move/configs/config.d/instant_moves.xml b/dbms/tests/integration/test_ttl_move/configs/config.d/instant_moves.xml index ac5005061e9..7b68c6946ca 100644 --- a/dbms/tests/integration/test_ttl_move/configs/config.d/instant_moves.xml +++ b/dbms/tests/integration/test_ttl_move/configs/config.d/instant_moves.xml @@ -1,4 +1,4 @@ - 0.5 - 0.5 + 0.5 + 0.5 From e41deb584830ede37297f85f731ea65696d43422 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 5 Dec 2019 11:14:47 +0300 Subject: [PATCH 45/51] Added comment to `MergeTreePartsMover::selectPartsForMove()`. --- dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp index 5d07ea1a8a2..be11c540976 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -133,7 +133,7 @@ bool MergeTreePartsMover::selectPartsForMove( reservation = part->storage.tryReserveSpace(part->bytes_on_disk, ttl_entry_ptr->getDestination(policy)); } - if (reservation) + if (reservation) /// Found reservation by TTL rule. { parts_to_move.emplace_back(part, std::move(reservation)); /// If table TTL rule satisfies on this part, won't apply policy rules on it. From f2aee640724767ddd1d13a70cf8defa167cf196c Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 5 Dec 2019 11:33:16 +0300 Subject: [PATCH 46/51] Minor style fix. --- dbms/src/Storages/MergeTree/MergeTreeData.h | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 561572929e6..a50010569fc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -681,7 +681,6 @@ public: DiskSpace::ReservationPtr reserveSpace(UInt64 expected_size, DiskSpace::SpacePtr space) const; DiskSpace::ReservationPtr tryReserveSpace(UInt64 expected_size, DiskSpace::SpacePtr space) const; - /// Reserves space at least 1MB preferring best destination according to `ttl_infos`. DiskSpace::ReservationPtr reserveSpacePreferringMoveDestination(UInt64 expected_size, const MergeTreeDataPart::TTLInfos & ttl_infos, From 0ff0a52249b160ef0f5fb7c44aedf29cb59690ad Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 5 Dec 2019 14:15:47 +0300 Subject: [PATCH 47/51] Missed review fixes. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 10 +++++----- dbms/src/Storages/MergeTree/MergeTreeData.h | 14 +++++++------- .../src/Storages/MergeTree/MergeTreeDataWriter.cpp | 4 +--- .../src/Storages/MergeTree/MergeTreePartsMover.cpp | 6 +++--- dbms/src/Storages/StorageMergeTree.cpp | 2 +- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 6 files changed, 18 insertions(+), 20 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index e3cd882f47c..9ebee832950 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -3169,18 +3169,18 @@ DiskSpace::ReservationPtr MergeTreeData::tryReserveSpace(UInt64 expected_size, D return space->reserve(expected_size); } -DiskSpace::ReservationPtr MergeTreeData::reserveSpacePreferringMoveDestination(UInt64 expected_size, +DiskSpace::ReservationPtr MergeTreeData::reserveSpacePreferringTTLRules(UInt64 expected_size, const MergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const { expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); - DiskSpace::ReservationPtr reservation = tryReserveSpacePreferringMoveDestination(expected_size, ttl_infos, time_of_move); + DiskSpace::ReservationPtr reservation = tryReserveSpacePreferringTTLRules(expected_size, ttl_infos, time_of_move); return checkAndReturnReservation(expected_size, std::move(reservation)); } -DiskSpace::ReservationPtr MergeTreeData::tryReserveSpacePreferringMoveDestination(UInt64 expected_size, +DiskSpace::ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules(UInt64 expected_size, const MergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const { @@ -3188,7 +3188,7 @@ DiskSpace::ReservationPtr MergeTreeData::tryReserveSpacePreferringMoveDestinatio DiskSpace::ReservationPtr reservation; - auto ttl_entry = selectMoveDestination(ttl_infos, time_of_move); + auto ttl_entry = selectTTLEntryForTTLInfos(ttl_infos, time_of_move); if (ttl_entry != nullptr) { DiskSpace::SpacePtr destination_ptr = ttl_entry->getDestination(storage_policy); @@ -3239,7 +3239,7 @@ bool MergeTreeData::TTLEntry::isPartInDestination(const DiskSpace::StoragePolicy return false; } -const MergeTreeData::TTLEntry * MergeTreeData::selectMoveDestination( +const MergeTreeData::TTLEntry * MergeTreeData::selectTTLEntryForTTLInfos( const MergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const { diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index a50010569fc..9aa6fc2878a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -682,12 +682,12 @@ public: DiskSpace::ReservationPtr tryReserveSpace(UInt64 expected_size, DiskSpace::SpacePtr space) const; /// Reserves space at least 1MB preferring best destination according to `ttl_infos`. - DiskSpace::ReservationPtr reserveSpacePreferringMoveDestination(UInt64 expected_size, - const MergeTreeDataPart::TTLInfos & ttl_infos, - time_t time_of_move) const; - DiskSpace::ReservationPtr tryReserveSpacePreferringMoveDestination(UInt64 expected_size, - const MergeTreeDataPart::TTLInfos & ttl_infos, - time_t time_of_move) const; + DiskSpace::ReservationPtr reserveSpacePreferringTTLRules(UInt64 expected_size, + const MergeTreeDataPart::TTLInfos & ttl_infos, + time_t time_of_move) const; + DiskSpace::ReservationPtr tryReserveSpacePreferringTTLRules(UInt64 expected_size, + const MergeTreeDataPart::TTLInfos & ttl_infos, + time_t time_of_move) const; /// Choose disk with max available free space /// Reserves 0 bytes DiskSpace::ReservationPtr makeEmptyReservationOnLargestDisk() { return storage_policy->makeEmptyReservationOnLargestDisk(); } @@ -745,7 +745,7 @@ public: bool isPartInDestination(const DiskSpace::StoragePolicyPtr & policy, const MergeTreeDataPart & part) const; }; - const TTLEntry * selectMoveDestination(const MergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const; + const TTLEntry * selectTTLEntryForTTLInfos(const MergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const; using TTLEntriesByName = std::unordered_map; TTLEntriesByName column_ttl_entries_by_name; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 187f7aa3439..155e77a9d31 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -122,9 +122,7 @@ void updateTTL(const MergeTreeData::TTLEntry & ttl_entry, ttl_infos.updatePartMinMaxTTL(ttl_info.min, ttl_info.max); if (remove_column) - { block.erase(ttl_entry.result_column); - } } } @@ -230,7 +228,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa for (const auto & ttl_entry : data.move_ttl_entries) updateTTL(ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[ttl_entry.result_column], block, false); - DiskSpace::ReservationPtr reservation = data.reserveSpacePreferringMoveDestination(expected_size, move_ttl_infos, time(nullptr)); + DiskSpace::ReservationPtr reservation = data.reserveSpacePreferringTTLRules(expected_size, move_ttl_infos, time(nullptr)); MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared(data, reservation->getDisk(), part_name, new_part_info); diff --git a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp index be11c540976..ff4e686b715 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -56,7 +56,7 @@ public: } /// Weaken requirements on size - void decreaseRequiredSize(UInt64 size_decrease) + void decreaseRequiredSizeAndRemoveRedundantParts(UInt64 size_decrease) { required_size_sum -= std::min(size_decrease, required_size_sum); removeRedundantElements(); @@ -123,7 +123,7 @@ bool MergeTreePartsMover::selectPartsForMove( if (!can_move(part, &reason)) continue; - const MergeTreeData::TTLEntry * ttl_entry_ptr = part->storage.selectMoveDestination(part->ttl_infos, time_of_move); + const MergeTreeData::TTLEntry * ttl_entry_ptr = part->storage.selectTTLEntryForTTLInfos(part->ttl_infos, time_of_move); auto to_insert = need_to_move.find(part->disk); DiskSpace::ReservationPtr reservation; if (ttl_entry_ptr) @@ -141,7 +141,7 @@ bool MergeTreePartsMover::selectPartsForMove( /// possibly to zero. if (to_insert != need_to_move.end()) { - to_insert->second.decreaseRequiredSize(part->bytes_on_disk); + to_insert->second.decreaseRequiredSizeAndRemoveRedundantParts(part->bytes_on_disk); } } else diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 82f0263ef95..cf4b175bbf3 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -358,7 +358,7 @@ public: { ttl_infos.update(part_ptr->ttl_infos); } - reserved_space = storage.tryReserveSpacePreferringMoveDestination(total_size, ttl_infos, time(nullptr)); + reserved_space = storage.tryReserveSpacePreferringTTLRules(total_size, ttl_infos, time(nullptr)); } if (!reserved_space) { diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 43f195039cb..36ece00f53b 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -1011,7 +1011,7 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry) { ttl_infos.update(part_ptr->ttl_infos); } - DiskSpace::ReservationPtr reserved_space = reserveSpacePreferringMoveDestination(estimated_space_for_merge, + DiskSpace::ReservationPtr reserved_space = reserveSpacePreferringTTLRules(estimated_space_for_merge, ttl_infos, time(nullptr)); auto table_lock = lockStructureForShare(false, RWLockImpl::NO_QUERY); From fd9483efbea531db968bd432861d7bb1f990abd3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 6 Dec 2019 12:03:04 +0300 Subject: [PATCH 48/51] Just trigger CI --- dbms/src/Storages/StorageMergeTree.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index cf4b175bbf3..2addbeb6f54 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -355,9 +355,8 @@ public: { MergeTreeDataPart::TTLInfos ttl_infos; for (auto & part_ptr : future_part_.parts) - { ttl_infos.update(part_ptr->ttl_infos); - } + reserved_space = storage.tryReserveSpacePreferringTTLRules(total_size, ttl_infos, time(nullptr)); } if (!reserved_space) From 2940aeff234ed5d9f371769aa1c259ddb7c80aa8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 6 Dec 2019 13:03:44 +0300 Subject: [PATCH 49/51] Fix style --- dbms/src/Storages/StorageMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 2addbeb6f54..99ad96405f1 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -356,7 +356,7 @@ public: MergeTreeDataPart::TTLInfos ttl_infos; for (auto & part_ptr : future_part_.parts) ttl_infos.update(part_ptr->ttl_infos); - + reserved_space = storage.tryReserveSpacePreferringTTLRules(total_size, ttl_infos, time(nullptr)); } if (!reserved_space) From 231a43aac56d37b4d6b99eb70eaa1359894c15f7 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 12 Dec 2019 02:49:05 +0300 Subject: [PATCH 50/51] Update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 258936d6b52..305021728a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -128,6 +128,8 @@ Yu](https://github.com/yuzhichang)) * Introduce CustomSeparated data format that supports custom escaping and delimiter rules. [#7118](https://github.com/ClickHouse/ClickHouse/pull/7118) ([tavplubix](https://github.com/tavplubix)) +* Support Redis as source of external dictionary. [#4361](https://github.com/ClickHouse/ClickHouse/pull/4361) [#6962](https://github.com/ClickHouse/ClickHouse/pull/6962) ([comunodi](https://github.com/comunodi), [Anton +Popov](https://github.com/CurtizJ)) ### Bug Fix * Fix wrong query result if it has `WHERE IN (SELECT ...)` section and `optimize_read_in_order` is From 07f945b2316b0ed8fe5350fa02a35f54cba26b99 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 12 Dec 2019 03:37:16 +0300 Subject: [PATCH 51/51] Update extended_roadmap.md --- docs/ru/extended_roadmap.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/ru/extended_roadmap.md b/docs/ru/extended_roadmap.md index ca5390a5213..e22b91f39b2 100644 --- a/docs/ru/extended_roadmap.md +++ b/docs/ru/extended_roadmap.md @@ -645,6 +645,10 @@ Maxim Fedotov, Wargaming + Yuri Baranov, Яндекс. При парсинге запроса преобразовывать синтаксис вида `@@version_full` в вызов функции `getGlobalVariable('version_full')`. Поддержать популярные MySQL переменные. Может быть поможет Юрий Баранов, если будет энтузиазм. +### 8.23. Подписка для импорта обновляемых и ротируемых логов в ФС. + +Желательно 2.15. + ## 9. Безопасность.