Merge pull request #58023 from canhld94/materialize_non_override_past_values

Materializing an ordinary column with default expression should not override past values
This commit is contained in:
Robert Schulze 2024-02-20 11:12:31 +01:00 committed by GitHub
commit 7e11fc79d9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 152 additions and 6 deletions

View File

@ -73,6 +73,7 @@ static void splitAndModifyMutationCommands(
LoggerPtr log) LoggerPtr log)
{ {
auto part_columns = part->getColumnsDescription(); auto part_columns = part->getColumnsDescription();
const auto & table_columns = metadata_snapshot->getColumns();
if (!isWidePart(part) || !isFullPartStorage(part->getDataPartStorage())) if (!isWidePart(part) || !isFullPartStorage(part->getDataPartStorage()))
{ {
@ -81,9 +82,19 @@ static void splitAndModifyMutationCommands(
for (const auto & command : commands) for (const auto & command : commands)
{ {
if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN)
{
/// For ordinary column with default or materialized expression, MATERIALIZE COLUMN should not override past values
/// So we only mutate column if `command.column_name` is a default/materialized column or if the part does not have physical column file
auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name);
if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary))
{
for_interpreter.push_back(command);
mutated_columns.emplace(command.column_name);
}
}
if (command.type == MutationCommand::Type::MATERIALIZE_INDEX if (command.type == MutationCommand::Type::MATERIALIZE_INDEX
|| command.type == MutationCommand::Type::MATERIALIZE_STATISTIC || command.type == MutationCommand::Type::MATERIALIZE_STATISTIC
|| command.type == MutationCommand::Type::MATERIALIZE_COLUMN
|| command.type == MutationCommand::Type::MATERIALIZE_PROJECTION || command.type == MutationCommand::Type::MATERIALIZE_PROJECTION
|| command.type == MutationCommand::Type::MATERIALIZE_TTL || command.type == MutationCommand::Type::MATERIALIZE_TTL
|| command.type == MutationCommand::Type::DELETE || command.type == MutationCommand::Type::DELETE
@ -93,9 +104,6 @@ static void splitAndModifyMutationCommands(
for_interpreter.push_back(command); for_interpreter.push_back(command);
for (const auto & [column_name, expr] : command.column_to_update_expression) for (const auto & [column_name, expr] : command.column_to_update_expression)
mutated_columns.emplace(column_name); mutated_columns.emplace(column_name);
if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN)
mutated_columns.emplace(command.column_name);
} }
else if (command.type == MutationCommand::Type::DROP_INDEX else if (command.type == MutationCommand::Type::DROP_INDEX
|| command.type == MutationCommand::Type::DROP_PROJECTION || command.type == MutationCommand::Type::DROP_PROJECTION
@ -205,8 +213,15 @@ static void splitAndModifyMutationCommands(
{ {
for (const auto & command : commands) for (const auto & command : commands)
{ {
if (command.type == MutationCommand::Type::MATERIALIZE_INDEX if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN)
|| command.type == MutationCommand::Type::MATERIALIZE_COLUMN {
/// For ordinary column with default or materialized expression, MATERIALIZE COLUMN should not override past values
/// So we only mutate column if `command.column_name` is a default/materialized column or if the part does not have physical column file
auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name);
if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary))
for_interpreter.push_back(command);
}
else if (command.type == MutationCommand::Type::MATERIALIZE_INDEX
|| command.type == MutationCommand::Type::MATERIALIZE_STATISTIC || command.type == MutationCommand::Type::MATERIALIZE_STATISTIC
|| command.type == MutationCommand::Type::MATERIALIZE_PROJECTION || command.type == MutationCommand::Type::MATERIALIZE_PROJECTION
|| command.type == MutationCommand::Type::MATERIALIZE_TTL || command.type == MutationCommand::Type::MATERIALIZE_TTL

View File

@ -17,6 +17,7 @@ ALTER TABLE tmp MATERIALIZE COLUMN s;
ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+2); ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+2);
SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp; SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp;
ALTER TABLE tmp CLEAR COLUMN s; -- Need to clear because MATERIALIZE COLUMN won't override past values;
ALTER TABLE tmp MATERIALIZE COLUMN s; ALTER TABLE tmp MATERIALIZE COLUMN s;
ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+3); ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+3);
SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp; SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp;

View File

@ -0,0 +1,45 @@
DEFAULT expressions
-- Compact parts
Before materialize
1 1
2 54321
After materialize
1 1
2 54321
-- Wide parts
Before materialize
1 1
2 54321
After materialize
1 1
2 54321
-- Nullable column != physically absent
Before materialize
1 1
2 \N
3 54321
After materialize
1 1
2 \N
3 54321
-- Parts with renamed column
Before materialize
1 1
2 54321
After rename
1 1
2 54321
After materialize
1 1
2 54321
MATERIALIZED expressions
-- Compact parts
Before materialize
1 54321
After materialize
1 65432
-- Compact parts
Before materialize
1 54321
After materialize
1 65432

View File

@ -0,0 +1,85 @@
SET mutations_sync = 2;
DROP TABLE IF EXISTS tab;
-- Tests that existing parts which contain a non-default value in columns with DEFAULT expression remain unchanged by MATERIALIZE COLUMN>
SELECT 'DEFAULT expressions';
SELECT '-- Compact parts';
CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id;
INSERT INTO tab (id, dflt) VALUES (1, 1);
INSERT INTO tab (id) VALUES (2);
SELECT 'Before materialize';
SELECT * FROM tab ORDER BY id;
ALTER TABLE tab MATERIALIZE COLUMN dflt;
SELECT 'After materialize';
SELECT * FROM tab ORDER BY id;
DROP TABLE tab;
SELECT '-- Wide parts';
CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1;
INSERT INTO tab (id, dflt) VALUES (1, 1);
INSERT INTO tab (id) VALUES (2);
SELECT 'Before materialize';
SELECT * FROM tab ORDER BY id;
ALTER TABLE tab MATERIALIZE COLUMN dflt;
SELECT 'After materialize';
SELECT * FROM tab ORDER BY id;
DROP TABLE tab;
SELECT '-- Nullable column != physically absent';
CREATE TABLE tab (id Int64, dflt Nullable(Int64) DEFAULT 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1;
INSERT INTO tab (id, dflt) VALUES (1, 1);
INSERT INTO tab (id, dflt) VALUES (2, NULL);
INSERT INTO tab (id) VALUES (3);
SELECT 'Before materialize';
SELECT * FROM tab ORDER BY id;
ALTER TABLE tab MATERIALIZE COLUMN dflt;
SELECT 'After materialize';
SELECT * FROM tab ORDER BY id;
DROP TABLE tab;
SELECT '-- Parts with renamed column';
CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id;
INSERT INTO tab (id, dflt) VALUES (1, 1);
INSERT INTO tab (id) VALUES (2);
SELECT 'Before materialize';
SELECT * FROM tab ORDER BY id;
ALTER TABLE tab RENAME COLUMN dflt TO dflt2;
SELECT 'After rename';
SELECT * FROM tab ORDER BY id;
ALTER TABLE tab MATERIALIZE COLUMN dflt2;
SELECT 'After materialize';
SELECT * FROM tab ORDER BY id;
DROP TABLE tab;
-- But for columns with MATERIALIZED expression, all existing parts should be rewritten in case a new expression was set in the meantime.
SELECT 'MATERIALIZED expressions';
SELECT '-- Compact parts';
CREATE TABLE tab (id Int64, mtrl Int64 MATERIALIZED 54321) ENGINE MergeTree ORDER BY id;
INSERT INTO tab (id) VALUES (1);
SELECT 'Before materialize';
SELECT id, mtrl FROM tab ORDER BY id;
ALTER TABLE tab MODIFY COLUMN mtrl Int64 MATERIALIZED 65432;
ALTER TABLE tab MATERIALIZE COLUMN mtrl;
SELECT 'After materialize';
SELECT id, mtrl FROM tab ORDER BY id;
DROP TABLE tab;
SELECT '-- Compact parts';
CREATE TABLE tab (id Int64, mtrl Int64 MATERIALIZED 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1;
INSERT INTO tab (id) VALUES (1);
SELECT 'Before materialize';
SELECT id, mtrl FROM tab ORDER BY id;
ALTER TABLE tab MODIFY COLUMN mtrl Int64 MATERIALIZED 65432;
ALTER TABLE tab MATERIALIZE COLUMN mtrl;
SELECT 'After materialize';
SELECT id, mtrl FROM tab ORDER BY id;
DROP TABLE tab;