From 6672904cb03e5e779fc17c4ef9f39714875ef3e4 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Wed, 13 Nov 2024 10:01:51 -0800 Subject: [PATCH 1/7] Fix bug with materializing a column in the sort key by throwing an exception --- src/Interpreters/MutationsInterpreter.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 0f25d5ac21c..e3bf536b9bf 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -790,6 +790,12 @@ void MutationsInterpreter::prepare(bool dry_run) if (stages.size() == 1) /// First stage only supports filtering and can't update columns. stages.emplace_back(context); + // Can't materialize a column in the sort key + Names sort_columns = metadata_snapshot->getSortingKeyColumns(); + if (std::find(sort_columns.begin(), sort_columns.end(), command.column_name) != sort_columns.end()) { + throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Failed to materialize column {} because it's in the sort key. Doing so would break sort order", command.column_name); + } + const auto & column = columns_desc.get(command.column_name); if (!column.default_desc.expression) From c70a7f7573fb2346110852436a302dd93c9edb55 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Wed, 13 Nov 2024 10:03:59 -0800 Subject: [PATCH 2/7] Add test 03263_forbid_materialize_sort_key --- .../03263_forbid_materialize_sort_key.reference | 0 .../0_stateless/03263_forbid_materialize_sort_key.sql | 9 +++++++++ 2 files changed, 9 insertions(+) create mode 100644 tests/queries/0_stateless/03263_forbid_materialize_sort_key.reference create mode 100644 tests/queries/0_stateless/03263_forbid_materialize_sort_key.sql diff --git a/tests/queries/0_stateless/03263_forbid_materialize_sort_key.reference b/tests/queries/0_stateless/03263_forbid_materialize_sort_key.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03263_forbid_materialize_sort_key.sql b/tests/queries/0_stateless/03263_forbid_materialize_sort_key.sql new file mode 100644 index 00000000000..ed76c8d59ce --- /dev/null +++ b/tests/queries/0_stateless/03263_forbid_materialize_sort_key.sql @@ -0,0 +1,9 @@ +CREATE TABLE IF NOT EXISTS test (a UInt64) ENGINE=MergeTree() ORDER BY a; + +INSERT INTO test (a) SELECT 1 FROM numbers(1000); + +ALTER TABLE test ADD COLUMN b Float64 AFTER a, MODIFY ORDER BY (a, b); +ALTER TABLE test MODIFY COLUMN b DEFAULT rand64() % 100000; +ALTER TABLE test MATERIALIZE COLUMN b; -- { serverError CANNOT_UPDATE_COLUMN } + +DROP TABLE test; \ No newline at end of file From 7166df0776fe857a03c95cb69c1502d41b789b72 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Wed, 13 Nov 2024 10:12:53 -0800 Subject: [PATCH 3/7] Update docs to explicitly mention that it's an invalid operation --- docs/en/sql-reference/statements/alter/column.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index 29df041ccc6..45829ea3d8f 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -272,14 +272,14 @@ ALTER TABLE table_name MODIFY COLUMN column_name RESET SETTING max_compress_bloc ## MATERIALIZE COLUMN -Materializes a column with a `DEFAULT` or `MATERIALIZED` value expression. When adding a materialized column using `ALTER TABLE table_name ADD COLUMN column_name MATERIALIZED`, existing rows without materialized values are not automatically filled. `MATERIALIZE COLUMN` statement can be used to rewrite existing column data after a `DEFAULT` or `MATERIALIZED` expression has been added or updated (which only updates the metadata but does not change existing data). +Materializes a column with a `DEFAULT` or `MATERIALIZED` value expression. When adding a materialized column using `ALTER TABLE table_name ADD COLUMN column_name MATERIALIZED`, existing rows without materialized values are not automatically filled. `MATERIALIZE COLUMN` statement can be used to rewrite existing column data after a `DEFAULT` or `MATERIALIZED` expression has been added or updated (which only updates the metadata but does not change existing data). Note that materializing a column in the sort key is an invalid operation because it would break the sort order. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). For columns with a new or updated `MATERIALIZED` value expression, all existing rows are rewritten. For columns with a new or updated `DEFAULT` value expression, the behavior depends on the ClickHouse version: - In ClickHouse < v24.2, all existing rows are rewritten. -- ClickHouse >= v24.2 distinguishes if a row value in a column with `DEFAULT` value expression was explicitly specified when it was inserted, or not, i.e. calculated from the `DEFAULT` value expression. If the value was explicitly specified, ClickHouse keeps it as is. If the value was was calculated, ClickHouse changes it to the new or updated `MATERIALIZED` value expression. +- ClickHouse >= v24.2 distinguishes if a row value in a column with `DEFAULT` value expression was explicitly specified when it was inserted, or not, i.e. calculated from the `DEFAULT` value expression. If the value was explicitly specified, ClickHouse keeps it as is. If the value was calculated, ClickHouse changes it to the new or updated `MATERIALIZED` value expression. Syntax: From e29ee5b96d5d755b0d57292d5d24031f37a26b2f Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Wed, 13 Nov 2024 10:31:24 -0800 Subject: [PATCH 4/7] Fix style in src code (bracket) --- src/Interpreters/MutationsInterpreter.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index e3bf536b9bf..0fac9d01a38 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -792,7 +792,8 @@ void MutationsInterpreter::prepare(bool dry_run) // Can't materialize a column in the sort key Names sort_columns = metadata_snapshot->getSortingKeyColumns(); - if (std::find(sort_columns.begin(), sort_columns.end(), command.column_name) != sort_columns.end()) { + if (std::find(sort_columns.begin(), sort_columns.end(), command.column_name) != sort_columns.end()) + { throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Failed to materialize column {} because it's in the sort key. Doing so would break sort order", command.column_name); } From 05fe163898fefa465d15ca0f581754c61e7ed5fc Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Wed, 13 Nov 2024 11:37:31 -0800 Subject: [PATCH 5/7] Add additional test case from issue #53421 --- .../03263_forbid_materialize_sort_key.sql | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03263_forbid_materialize_sort_key.sql b/tests/queries/0_stateless/03263_forbid_materialize_sort_key.sql index ed76c8d59ce..3bd46ae1ca0 100644 --- a/tests/queries/0_stateless/03263_forbid_materialize_sort_key.sql +++ b/tests/queries/0_stateless/03263_forbid_materialize_sort_key.sql @@ -6,4 +6,15 @@ ALTER TABLE test ADD COLUMN b Float64 AFTER a, MODIFY ORDER BY (a, b); ALTER TABLE test MODIFY COLUMN b DEFAULT rand64() % 100000; ALTER TABLE test MATERIALIZE COLUMN b; -- { serverError CANNOT_UPDATE_COLUMN } -DROP TABLE test; \ No newline at end of file + +CREATE TABLE IF NOT EXISTS tab (x UInt32, y UInt32) engine = MergeTree ORDER BY tuple(); +CREATE DICTIONARY IF NOT EXISTS dict (x UInt32, y UInt32) primary key x source(clickhouse(table 'tab')) LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); +INSERT INTO tab values (1, 2), (3, 4); +SYSTEM RELOAD DICTIONARY dict; +CREATE TABLE IF NOT EXISTS tab2 (x UInt32, y UInt32 materialized dictGet(dict, 'y', x)) engine = MergeTree order by (y); +INSERT INTO tab2 (x) values (1), (3); +TRUNCATE TABLE tab; +INSERT INTO tab values (1, 4), (3, 2); +SYSTEM RELOAD DICTIONARY dict; +SET mutations_sync=2; +ALTER TABLE tab2 materialize column y; -- { serverError CANNOT_UPDATE_COLUMN } From 8f44818d99a42b2a07ff2f70ab9d3e25cf78dce6 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sat, 16 Nov 2024 23:56:03 -0800 Subject: [PATCH 6/7] Empty commit (just wanna see what happens) From 05d23dc932ab1da05030cd0d82de5e593073b084 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sun, 17 Nov 2024 09:23:09 -0800 Subject: [PATCH 7/7] Add DROP statements to test --- .../03263_forbid_materialize_sort_key.sql | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/03263_forbid_materialize_sort_key.sql b/tests/queries/0_stateless/03263_forbid_materialize_sort_key.sql index 3bd46ae1ca0..a27be98e0aa 100644 --- a/tests/queries/0_stateless/03263_forbid_materialize_sort_key.sql +++ b/tests/queries/0_stateless/03263_forbid_materialize_sort_key.sql @@ -5,16 +5,20 @@ INSERT INTO test (a) SELECT 1 FROM numbers(1000); ALTER TABLE test ADD COLUMN b Float64 AFTER a, MODIFY ORDER BY (a, b); ALTER TABLE test MODIFY COLUMN b DEFAULT rand64() % 100000; ALTER TABLE test MATERIALIZE COLUMN b; -- { serverError CANNOT_UPDATE_COLUMN } +DROP TABLE IF EXISTS test; CREATE TABLE IF NOT EXISTS tab (x UInt32, y UInt32) engine = MergeTree ORDER BY tuple(); CREATE DICTIONARY IF NOT EXISTS dict (x UInt32, y UInt32) primary key x source(clickhouse(table 'tab')) LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); -INSERT INTO tab values (1, 2), (3, 4); +INSERT INTO tab VALUES (1, 2), (3, 4); SYSTEM RELOAD DICTIONARY dict; -CREATE TABLE IF NOT EXISTS tab2 (x UInt32, y UInt32 materialized dictGet(dict, 'y', x)) engine = MergeTree order by (y); -INSERT INTO tab2 (x) values (1), (3); +CREATE TABLE IF NOT EXISTS tab2 (x UInt32, y UInt32 materialized dictGet(dict, 'y', x)) engine = MergeTree ORDER BY (y); +INSERT INTO tab2 (x) VALUES (1), (3); TRUNCATE TABLE tab; -INSERT INTO tab values (1, 4), (3, 2); +INSERT INTO tab VALUES (1, 4), (3, 2); SYSTEM RELOAD DICTIONARY dict; SET mutations_sync=2; ALTER TABLE tab2 materialize column y; -- { serverError CANNOT_UPDATE_COLUMN } +DROP TABLE IF EXISTS tab2; +DROP DICTIONARY IF EXISTS dict; +DROP TABLE IF EXISTS tab;