Fix mutations memory consumption finally

This commit is contained in:
alesapin 2020-03-25 13:52:32 +03:00
parent aa61bc1954
commit 83b2103fd0
3 changed files with 101 additions and 27 deletions

View File

@ -620,39 +620,24 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> &
}
select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression));
}
auto metadata = storage->getInMemoryMetadata();
/// We have to execute select in order of primary key
/// because we don't sort results additionaly and don't have
/// any guarantees on data order without ORDER BY. It's almost free, because we
/// have optimization for data read in primary key order.
if (metadata.order_by_ast)
if (ASTPtr key_expr = storage->getSortingKeyAST(); key_expr && !key_expr->children.empty())
{
ASTPtr dummy;
ASTPtr key_expr;
if (metadata.primary_key_ast)
key_expr = metadata.primary_key_ast;
else
key_expr = metadata.order_by_ast;
bool empty = false;
/// In all other cases we cannot have empty key
if (auto key_function = key_expr->as<ASTFunction>())
empty = key_function->arguments->children.empty();
/// Not explicitely spicified empty key
if (!empty)
auto res = std::make_shared<ASTExpressionList>();
for (const auto & key_part : key_expr->children)
{
auto order_by_expr = std::make_shared<ASTOrderByElement>(1, 1, false, dummy, false, dummy, dummy, dummy);
order_by_expr->children.push_back(key_part);
order_by_expr->children.push_back(key_expr);
auto res = std::make_shared<ASTExpressionList>();
res->children.push_back(order_by_expr);
}
select->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(res));
}
}
return select;
}

View File

@ -1,6 +1,6 @@
DROP TABLE IF EXISTS table_with_pk;
DROP TABLE IF EXISTS table_with_single_pk;
CREATE TABLE table_with_pk
CREATE TABLE table_with_single_pk
(
key UInt8,
value String
@ -8,9 +8,9 @@ CREATE TABLE table_with_pk
ENGINE = MergeTree
ORDER BY key;
INSERT INTO table_with_pk SELECT number, toString(number % 10) FROM numbers(10000000);
INSERT INTO table_with_single_pk SELECT number, toString(number % 10) FROM numbers(10000000);
ALTER TABLE table_with_pk DELETE WHERE key % 77 = 0 SETTINGS mutations_sync = 1;
ALTER TABLE table_with_single_pk DELETE WHERE key % 77 = 0 SETTINGS mutations_sync = 1;
SYSTEM FLUSH LOGS;
@ -20,6 +20,92 @@ SELECT
DISTINCT read_bytes >= peak_memory_usage
FROM
system.part_log
WHERE event_type = 'MutatePart' AND table = 'table_with_pk' AND database = currentDatabase();
WHERE event_type = 'MutatePart' AND table = 'table_with_single_pk' AND database = currentDatabase();
DROP TABLE IF EXISTS table_with_pk;
DROP TABLE IF EXISTS table_with_single_pk;
DROP TABLE IF EXISTS table_with_multi_pk;
CREATE TABLE table_with_multi_pk
(
key1 UInt8,
key2 UInt32,
key3 DateTime64(6, 'UTC'),
value String
)
ENGINE = MergeTree
ORDER BY (key1, key2, key3);
INSERT INTO table_with_multi_pk SELECT number % 32, number, toDateTime('2019-10-01 00:00:00'), toString(number % 10) FROM numbers(10000000);
ALTER TABLE table_with_multi_pk DELETE WHERE key1 % 77 = 0 SETTINGS mutations_sync = 1;
SYSTEM FLUSH LOGS;
-- Memory usage for all mutations must be almost constant and less than
-- read_bytes.
SELECT
DISTINCT read_bytes >= peak_memory_usage
FROM
system.part_log
WHERE event_type = 'MutatePart' AND table = 'table_with_multi_pk' AND database = currentDatabase();
DROP TABLE IF EXISTS table_with_multi_pk;
DROP TABLE IF EXISTS table_with_function_pk;
CREATE TABLE table_with_function_pk
(
key1 UInt8,
key2 UInt32,
key3 DateTime64(6, 'UTC'),
value String
)
ENGINE = MergeTree
ORDER BY (key1 + key2 + cast(value as UInt64), key2);
INSERT INTO table_with_function_pk SELECT number % 32, number, toDateTime('2019-10-01 00:00:00'), toString(number % 10) FROM numbers(10000000);
ALTER TABLE table_with_function_pk DELETE WHERE key1 % 77 = 0 SETTINGS mutations_sync = 1;
SYSTEM FLUSH LOGS;
-- Memory usage for all mutations must be almost constant and less than
-- read_bytes.
SELECT
DISTINCT read_bytes >= peak_memory_usage
FROM
system.part_log
WHERE event_type = 'MutatePart' AND table = 'table_with_function_pk' AND database = currentDatabase();
DROP TABLE IF EXISTS table_with_function_pk;
DROP TABLE IF EXISTS table_without_pk;
CREATE TABLE table_without_pk
(
key1 UInt8,
key2 UInt32,
key3 DateTime64(6, 'UTC'),
value String
)
ENGINE = MergeTree
ORDER BY tuple();
INSERT INTO table_without_pk SELECT number % 32, number, toDateTime('2019-10-01 00:00:00'), toString(number % 10) FROM numbers(10000000);
ALTER TABLE table_without_pk DELETE WHERE key1 % 77 = 0 SETTINGS mutations_sync = 1;
SYSTEM FLUSH LOGS;
-- Memory usage for all mutations must be almost constant and less than
-- read_bytes.
SELECT
DISTINCT read_bytes >= peak_memory_usage
FROM
system.part_log
WHERE event_type = 'MutatePart' AND table = 'table_without_pk' AND database = currentDatabase();
DROP TABLE IF EXISTS table_without_pk;