diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index 11926b2aa08..087458a6646 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -131,8 +131,8 @@ For the query to run successfully, the following conditions must be met: - Both tables must have the same structure. - Both tables must have the same partition key, the same order by key and the same primary key. -- Both tables must have the same indices and projections. - Both tables must have the same storage policy. +- The destination table must include all indices and projections from the source table. If the `enforce_index_structure_match_on_partition_manipulation` setting is enabled in destination table, the indices and projections must be identical. Otherwise, the destination table can have a superset of the source table’s indices and projections. ## REPLACE PARTITION @@ -151,8 +151,8 @@ For the query to run successfully, the following conditions must be met: - Both tables must have the same structure. - Both tables must have the same partition key, the same order by key and the same primary key. -- Both tables must have the same indices and projections. - Both tables must have the same storage policy. +- The destination table must include all indices and projections from the source table. If the `enforce_index_structure_match_on_partition_manipulation` setting is enabled in destination table, the indices and projections must be identical. Otherwise, the destination table can have a superset of the source table’s indices and projections. ## MOVE PARTITION TO TABLE @@ -166,9 +166,9 @@ For the query to run successfully, the following conditions must be met: - Both tables must have the same structure. - Both tables must have the same partition key, the same order by key and the same primary key. -- Both tables must have the same indices and projections. - Both tables must have the same storage policy. - Both tables must be the same engine family (replicated or non-replicated). +- The destination table must include all indices and projections from the source table. If the `enforce_index_structure_match_on_partition_manipulation` setting is enabled in destination table, the indices and projections must be identical. Otherwise, the destination table can have a superset of the source table’s indices and projections. ## CLEAR COLUMN IN PARTITION diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index f0d3e001362..97b77a657a7 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -614,6 +614,7 @@ static std::initializer_listclone(), properties.columns, getContext()); if (properties.indices.has(index_desc.name)) - throw Exception(ErrorCodes::ILLEGAL_INDEX, "Duplicated index name {} is not allowed. Please use different index names.", backQuoteIfNeed(index_desc.name)); + throw Exception(ErrorCodes::ILLEGAL_INDEX, "Duplicated index name {} is not allowed. Please use a different index name", backQuoteIfNeed(index_desc.name)); const auto & settings = getContext()->getSettingsRef(); if (index_desc.type == FULL_TEXT_INDEX_NAME && !settings[Setting::allow_experimental_full_text_index]) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental full-text index feature is disabled. Turn on setting 'allow_experimental_full_text_index'"); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "The experimental full-text index feature is disabled. Enable the setting 'allow_experimental_full_text_index' to use it"); /// ---- /// Temporary check during a transition period. Please remove at the end of 2024. if (index_desc.type == INVERTED_INDEX_NAME && !settings[Setting::allow_experimental_inverted_index]) - throw Exception(ErrorCodes::ILLEGAL_INDEX, "Please use index type 'full_text' instead of 'inverted'"); + throw Exception(ErrorCodes::ILLEGAL_INDEX, "The 'inverted' index type is deprecated. Please use the 'full_text' index type instead"); /// ---- if (index_desc.type == "vector_similarity" && !settings[Setting::allow_experimental_vector_similarity_index]) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental vector similarity index is disabled. Turn on setting 'allow_experimental_vector_similarity_index'"); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "The experimental vector similarity index feature is disabled. Enable the setting 'allow_experimental_vector_similarity_index' to use it"); properties.indices.push_back(index_desc); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index b9e674a8eec..907fa0bc418 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -233,6 +233,7 @@ namespace MergeTreeSetting extern const MergeTreeSettingsString storage_policy; extern const MergeTreeSettingsFloat zero_copy_concurrent_part_removal_max_postpone_ratio; extern const MergeTreeSettingsUInt64 zero_copy_concurrent_part_removal_max_split_times; + extern const MergeTreeSettingsBool enforce_index_structure_match_on_partition_manipulation; extern const MergeTreeSettingsBool prewarm_mark_cache; } @@ -7533,10 +7534,11 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour if (query_to_string(my_snapshot->getPrimaryKeyAST()) != query_to_string(src_snapshot->getPrimaryKeyAST())) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different primary key"); - - const auto check_definitions = [](const auto & my_descriptions, const auto & src_descriptions) + const auto check_definitions = [this](const auto & my_descriptions, const auto & src_descriptions) { - if (my_descriptions.size() != src_descriptions.size()) + bool strict_match = (*getSettings())[MergeTreeSetting::enforce_index_structure_match_on_partition_manipulation]; + if ((my_descriptions.size() < src_descriptions.size()) || + (strict_match && my_descriptions.size() != src_descriptions.size())) return false; std::unordered_set my_query_strings; diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index 92e8d880417..9a7a135a407 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -100,6 +100,7 @@ namespace ErrorCodes DECLARE(String, merge_workload, "", "Name of workload to be used to access resources for merges", 0) \ DECLARE(String, mutation_workload, "", "Name of workload to be used to access resources for mutations", 0) \ DECLARE(Milliseconds, background_task_preferred_step_execution_time_ms, 50, "Target time to execution of one step of merge or mutation. Can be exceeded if one step takes longer time", 0) \ + DECLARE(Bool, enforce_index_structure_match_on_partition_manipulation, false, "If this setting is enabled for destination table of a partition manipulation query (`ATTACH/MOVE/REPLACE PARTITION`), the indices and projections must be identical between the source and destination tables. Otherwise, the destination table can have a superset of the source table's indices and projections.", 0) \ DECLARE(MergeSelectorAlgorithm, merge_selector_algorithm, MergeSelectorAlgorithm::SIMPLE, "The algorithm to select parts for merges assignment", EXPERIMENTAL) \ DECLARE(Bool, merge_selector_enable_heuristic_to_remove_small_parts_at_right, true, "Enable heuristic for selecting parts for merge which removes parts from right side of range, if their size is less than specified ratio (0.01) of sum_size. Works for Simple and StochasticSimple merge selectors", 0) \ DECLARE(Float, merge_selector_base, 5.0, "Affects write amplification of assigned merges (expert level setting, don't change if you don't understand what it is doing). Works for Simple and StochasticSimple merge selectors", 0) \ diff --git a/tests/integration/test_attach_with_different_projections_or_indices/__init__.py b/tests/integration/test_attach_with_different_projections_or_indices/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_attach_with_different_projections_or_indices/configs/config_with_check_table_structure_completely.xml b/tests/integration/test_attach_with_different_projections_or_indices/configs/config_with_check_table_structure_completely.xml new file mode 100644 index 00000000000..06a360847e4 --- /dev/null +++ b/tests/integration/test_attach_with_different_projections_or_indices/configs/config_with_check_table_structure_completely.xml @@ -0,0 +1,5 @@ + + + true + + diff --git a/tests/integration/test_attach_with_different_projections_or_indices/configs/config_without_check_table_structure_completely.xml b/tests/integration/test_attach_with_different_projections_or_indices/configs/config_without_check_table_structure_completely.xml new file mode 100644 index 00000000000..fd78e9d1954 --- /dev/null +++ b/tests/integration/test_attach_with_different_projections_or_indices/configs/config_without_check_table_structure_completely.xml @@ -0,0 +1,5 @@ + + + false + + \ No newline at end of file diff --git a/tests/integration/test_attach_with_different_projections_or_indices/test.py b/tests/integration/test_attach_with_different_projections_or_indices/test.py new file mode 100644 index 00000000000..2db2dc7a7f4 --- /dev/null +++ b/tests/integration/test_attach_with_different_projections_or_indices/test.py @@ -0,0 +1,473 @@ +import pytest + +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance( + "node1", main_configs=["configs/config_with_check_table_structure_completely.xml"] +) +# node1 = cluster.add_instance("node1") +node2 = cluster.add_instance( + "node2", + main_configs=["configs/config_without_check_table_structure_completely.xml"], +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +# def test_setting_check_table_structure_completely(start_cluster): +# assert node1.query("""select value from system.merge_tree_settings where name='enforce_index_structure_match_on_partition_manipulation';""") == "0\n" +def test_check_completely_attach_with_different_indices(start_cluster): + node1.query( + """ + CREATE TABLE attach_partition_t1 + ( + `a` UInt32, + `b` String, + `c` String, + INDEX bf b TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1 + ) + ENGINE = MergeTree + ORDER BY a + """ + ) + node1.query( + "INSERT INTO attach_partition_t1 SELECT number, toString(number), toString(number) FROM numbers(10);" + ) + node1.query( + """ + CREATE TABLE attach_partition_t2 + ( + `a` UInt32, + `b` String, + `c` String, + INDEX bf b TYPE bloom_filter GRANULARITY 1 + ) + ENGINE = MergeTree + ORDER BY a + """ + ) + # serverError 36 + with pytest.raises(QueryRuntimeException) as exc: + node1.query( + "ALTER TABLE attach_partition_t2 ATTACH PARTITION tuple() FROM attach_partition_t1;" + ) + assert "Tables have different secondary indices" in str(exc.value) + node1.query( + """ + CREATE TABLE attach_partition_t3 + ( + `a` UInt32, + `b` String, + `c` String, + INDEX bf b TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1, + INDEX cf c TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1 + ) + ENGINE = MergeTree + ORDER BY a + """ + ) + # serverError 36 + with pytest.raises(QueryRuntimeException) as exc: + node1.query( + "ALTER TABLE attach_partition_t3 ATTACH PARTITION tuple() FROM attach_partition_t1;" + ) + assert "Tables have different secondary indices" in str(exc.value) + node1.query("DROP TABLE attach_partition_t1") + node1.query("DROP TABLE attach_partition_t2") + node1.query("DROP TABLE attach_partition_t3") + + +def test_check_attach_with_different_indices(start_cluster): + node2.query( + """ + CREATE TABLE attach_partition_t1 + ( + `a` UInt32, + `b` String, + `c` String, + INDEX bf b TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1 + ) + ENGINE = MergeTree + ORDER BY a + """ + ) + node2.query( + "INSERT INTO attach_partition_t1 SELECT number, toString(number), toString(number) FROM numbers(10);" + ) + node2.query( + """ + CREATE TABLE attach_partition_t2 + ( + `a` UInt32, + `b` String, + `c` String, + INDEX bf b TYPE bloom_filter GRANULARITY 1 + ) + ENGINE = MergeTree + ORDER BY a + """ + ) + # serverError 36 + with pytest.raises(QueryRuntimeException) as exc: + node2.query( + "ALTER TABLE attach_partition_t2 ATTACH PARTITION tuple() FROM attach_partition_t1;" + ) + assert "Tables have different secondary indices" in str(exc.value) + node2.query( + """ + CREATE TABLE attach_partition_t3 + ( + `a` UInt32, + `b` String, + `c` String, + INDEX bf b TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1, + INDEX cf c TYPE bloom_filter GRANULARITY 1 + ) + ENGINE = MergeTree + ORDER BY a + """ + ) + node2.query( + "ALTER TABLE attach_partition_t3 ATTACH PARTITION tuple() FROM attach_partition_t1;" + ) + assert node2.query("SELECT COUNT() FROM attach_partition_t3") == "10\n" + assert node2.query("SELECT `a` FROM attach_partition_t3 WHERE `b` = '1'") == "1\n" + assert node2.query("SELECT `a` FROM attach_partition_t3 WHERE `c` = '1'") == "1\n" + node2.query("DROP TABLE attach_partition_t1") + node2.query("DROP TABLE attach_partition_t2") + node2.query("DROP TABLE attach_partition_t3") + + +def test_check_completely_attach_with_different_projections(start_cluster): + node1.query( + """ + CREATE TABLE attach_partition_t1 + ( + `a` UInt32, + `b` String, + PROJECTION proj1 ( + SELECT + b, + sum(a) + GROUP BY b + ) + ) + ENGINE = MergeTree + ORDER BY a + """ + ) + node1.query( + "INSERT INTO attach_partition_t1 SELECT number, toString(number) FROM numbers(10);" + ) + node1.query( + """ + CREATE TABLE attach_partition_t2 + ( + `a` UInt32, + `b` String, + PROJECTION differently_named_proj ( + SELECT + b, + sum(a) + GROUP BY b + ) + ) + ENGINE = MergeTree + ORDER BY a; + """ + ) + # serverError 36 + with pytest.raises(QueryRuntimeException) as exc: + node1.query( + "ALTER TABLE attach_partition_t2 ATTACH PARTITION tuple() FROM attach_partition_t1;" + ) + assert "Tables have different projections" in str(exc.value) + node1.query( + """ + CREATE TABLE attach_partition_t3 + ( + `a` UInt32, + `b` String, + PROJECTION proj1 ( + SELECT + b, + sum(a) + GROUP BY b + ), + PROJECTION proj2 ( + SELECT + b, + avg(a) + GROUP BY b + ) + ) + ENGINE = MergeTree + ORDER BY a + """ + ) + # serverError 36 + with pytest.raises(QueryRuntimeException) as exc: + node1.query( + "ALTER TABLE attach_partition_t3 ATTACH PARTITION tuple() FROM attach_partition_t1;" + ) + assert "Tables have different projections" in str(exc.value) + node1.query("DROP TABLE attach_partition_t1") + node1.query("DROP TABLE attach_partition_t2") + node1.query("DROP TABLE attach_partition_t3") + + +def test_check_attach_with_different_projections(start_cluster): + node2.query( + """ + CREATE TABLE attach_partition_t1 + ( + `a` UInt32, + `b` String, + PROJECTION proj1 ( + SELECT + b, + sum(a) + GROUP BY b + ) + ) + ENGINE = MergeTree + ORDER BY a + """ + ) + node2.query( + "INSERT INTO attach_partition_t1 SELECT number, toString(number) FROM numbers(10);" + ) + node2.query( + """ + CREATE TABLE attach_partition_t2 + ( + `a` UInt32, + `b` String, + PROJECTION differently_named_proj ( + SELECT + b, + sum(a) + GROUP BY b + ) + ) + ENGINE = MergeTree + ORDER BY a; + """ + ) + # serverError 36 + with pytest.raises(QueryRuntimeException) as exc: + node2.query( + "ALTER TABLE attach_partition_t2 ATTACH PARTITION tuple() FROM attach_partition_t1;" + ) + assert "Tables have different projections" in str(exc.value) + node2.query( + """ + CREATE TABLE attach_partition_t3 + ( + `a` UInt32, + `b` String, + PROJECTION proj1 ( + SELECT + b, + sum(a) + GROUP BY b + ), + PROJECTION proj2 ( + SELECT + b, + avg(a) + GROUP BY b + ) + ) + ENGINE = MergeTree + ORDER BY a + """ + ) + node2.query( + "ALTER TABLE attach_partition_t3 ATTACH PARTITION tuple() FROM attach_partition_t1;" + ) + assert node2.query("SELECT COUNT() FROM attach_partition_t3") == "10\n" + node2.query("DROP TABLE attach_partition_t1") + node2.query("DROP TABLE attach_partition_t2") + node2.query("DROP TABLE attach_partition_t3") + + +def test_check_completely_attach_with_different_indices_and_projections(start_cluster): + node1.query( + """ + CREATE TABLE attach_partition_t1 + ( + `a` UInt32, + `b` String, + `c` String, + PROJECTION proj1 ( + SELECT + b, + sum(a) + GROUP BY b + ), + INDEX bf b TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1 + ) + ENGINE = MergeTree + ORDER BY a + """ + ) + node1.query( + "INSERT INTO attach_partition_t1 SELECT number, toString(number), toString(number) FROM numbers(10);" + ) + node1.query( + """ + CREATE TABLE attach_partition_t2 + ( + `a` UInt32, + `b` String, + `c` String, + PROJECTION proj ( + SELECT + b, + sum(a) + GROUP BY b + ), + INDEX bf b TYPE bloom_filter GRANULARITY 1, + INDEX cf c TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1 + ) + ENGINE = MergeTree + ORDER BY a + """ + ) + # serverError 36 + with pytest.raises(QueryRuntimeException) as exc: + node1.query( + "ALTER TABLE attach_partition_t2 ATTACH PARTITION tuple() FROM attach_partition_t1;" + ) + assert "Tables have different secondary indices" in str(exc.value) + node1.query( + """ + CREATE TABLE attach_partition_t3 + ( + `a` UInt32, + `b` String, + `c` String, + PROJECTION proj1 ( + SELECT + b, + sum(a) + GROUP BY b + ), + PROJECTION proj2 ( + SELECT + b, + avg(a) + GROUP BY b + ), + INDEX bf b TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1, + INDEX cf c TYPE bloom_filter GRANULARITY 1 + ) + ENGINE = MergeTree + ORDER BY a + """ + ) + # serverError 36 + with pytest.raises(QueryRuntimeException) as exc: + node1.query( + "ALTER TABLE attach_partition_t3 ATTACH PARTITION tuple() FROM attach_partition_t1;" + ) + assert "Tables have different secondary indices" in str(exc.value) + node1.query("DROP TABLE attach_partition_t1") + node1.query("DROP TABLE attach_partition_t2") + node1.query("DROP TABLE attach_partition_t3") + + +def test_check_attach_with_different_indices_and_projections(start_cluster): + node2.query( + """ + CREATE TABLE attach_partition_t1 + ( + `a` UInt32, + `b` String, + `c` String, + PROJECTION proj1 ( + SELECT + b, + sum(a) + GROUP BY b + ), + INDEX bf b TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1 + ) + ENGINE = MergeTree + ORDER BY a + """ + ) + node2.query( + "INSERT INTO attach_partition_t1 SELECT number, toString(number), toString(number) FROM numbers(10);" + ) + node2.query( + """ + CREATE TABLE attach_partition_t2 + ( + `a` UInt32, + `b` String, + `c` String, + PROJECTION proj ( + SELECT + b, + sum(a) + GROUP BY b + ), + INDEX bf b TYPE bloom_filter GRANULARITY 1, + INDEX cf c TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1 + ) + ENGINE = MergeTree + ORDER BY a + """ + ) + # serverError 36 + with pytest.raises(QueryRuntimeException) as exc: + node2.query( + "ALTER TABLE attach_partition_t2 ATTACH PARTITION tuple() FROM attach_partition_t1;" + ) + assert "Tables have different secondary indices" in str(exc.value) + node2.query( + """ + CREATE TABLE attach_partition_t3 + ( + `a` UInt32, + `b` String, + `c` String, + PROJECTION proj1 ( + SELECT + b, + sum(a) + GROUP BY b + ), + PROJECTION proj2 ( + SELECT + b, + avg(a) + GROUP BY b + ), + INDEX bf b TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1, + INDEX cf c TYPE bloom_filter GRANULARITY 1 + ) + ENGINE = MergeTree + ORDER BY a + """ + ) + node2.query( + "ALTER TABLE attach_partition_t3 ATTACH PARTITION tuple() FROM attach_partition_t1;" + ) + assert node2.query("SELECT COUNT() FROM attach_partition_t3") == "10\n" + assert node2.query("SELECT `a` FROM attach_partition_t3 WHERE `b` = '1'") == "1\n" + assert node2.query("SELECT `a` FROM attach_partition_t3 WHERE `c` = '1'") == "1\n" + node2.query("DROP TABLE attach_partition_t1") + node2.query("DROP TABLE attach_partition_t2") + node2.query("DROP TABLE attach_partition_t3") diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 03e4d149a8d..302ee115a98 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1,4 +1,4 @@ -personal_ws-1.1 en 2984 +personal_ws-1.1 en 2985 AArch ACLs ALTERs @@ -2827,6 +2827,7 @@ summapwithoverflow summingmergetree sumwithoverflow superaggregates +superset supertype supremum symlink