This commit is contained in:
zwy991114 2024-11-21 14:00:35 +08:00 committed by GitHub
commit 89094c97db
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 499 additions and 11 deletions

View File

@ -131,8 +131,8 @@ For the query to run successfully, the following conditions must be met:
- Both tables must have the same structure.
- Both tables must have the same partition key, the same order by key and the same primary key.
- Both tables must have the same indices and projections.
- Both tables must have the same storage policy.
- The destination table must include all indices and projections from the source table. If the `enforce_index_structure_match_on_partition_manipulation` setting is enabled in destination table, the indices and projections must be identical. Otherwise, the destination table can have a superset of the source tables indices and projections.
## REPLACE PARTITION
@ -151,8 +151,8 @@ For the query to run successfully, the following conditions must be met:
- Both tables must have the same structure.
- Both tables must have the same partition key, the same order by key and the same primary key.
- Both tables must have the same indices and projections.
- Both tables must have the same storage policy.
- The destination table must include all indices and projections from the source table. If the `enforce_index_structure_match_on_partition_manipulation` setting is enabled in destination table, the indices and projections must be identical. Otherwise, the destination table can have a superset of the source tables indices and projections.
## MOVE PARTITION TO TABLE
@ -166,9 +166,9 @@ For the query to run successfully, the following conditions must be met:
- Both tables must have the same structure.
- Both tables must have the same partition key, the same order by key and the same primary key.
- Both tables must have the same indices and projections.
- Both tables must have the same storage policy.
- Both tables must be the same engine family (replicated or non-replicated).
- The destination table must include all indices and projections from the source table. If the `enforce_index_structure_match_on_partition_manipulation` setting is enabled in destination table, the indices and projections must be identical. Otherwise, the destination table can have a superset of the source tables indices and projections.
## CLEAR COLUMN IN PARTITION

View File

@ -614,6 +614,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
},
{"24.10",
{
{"enforce_index_structure_match_on_partition_manipulation", true, false, "Add new setting to allow attach when source table's projections and secondary indices is a subset of those in the target table."}
}
},
{"24.9",

View File

@ -818,18 +818,18 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
{
IndexDescription index_desc = IndexDescription::getIndexFromAST(index->clone(), properties.columns, getContext());
if (properties.indices.has(index_desc.name))
throw Exception(ErrorCodes::ILLEGAL_INDEX, "Duplicated index name {} is not allowed. Please use different index names.", backQuoteIfNeed(index_desc.name));
throw Exception(ErrorCodes::ILLEGAL_INDEX, "Duplicated index name {} is not allowed. Please use a different index name", backQuoteIfNeed(index_desc.name));
const auto & settings = getContext()->getSettingsRef();
if (index_desc.type == FULL_TEXT_INDEX_NAME && !settings[Setting::allow_experimental_full_text_index])
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental full-text index feature is disabled. Turn on setting 'allow_experimental_full_text_index'");
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "The experimental full-text index feature is disabled. Enable the setting 'allow_experimental_full_text_index' to use it");
/// ----
/// Temporary check during a transition period. Please remove at the end of 2024.
if (index_desc.type == INVERTED_INDEX_NAME && !settings[Setting::allow_experimental_inverted_index])
throw Exception(ErrorCodes::ILLEGAL_INDEX, "Please use index type 'full_text' instead of 'inverted'");
throw Exception(ErrorCodes::ILLEGAL_INDEX, "The 'inverted' index type is deprecated. Please use the 'full_text' index type instead");
/// ----
if (index_desc.type == "vector_similarity" && !settings[Setting::allow_experimental_vector_similarity_index])
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental vector similarity index is disabled. Turn on setting 'allow_experimental_vector_similarity_index'");
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "The experimental vector similarity index feature is disabled. Enable the setting 'allow_experimental_vector_similarity_index' to use it");
properties.indices.push_back(index_desc);
}

View File

@ -233,6 +233,7 @@ namespace MergeTreeSetting
extern const MergeTreeSettingsString storage_policy;
extern const MergeTreeSettingsFloat zero_copy_concurrent_part_removal_max_postpone_ratio;
extern const MergeTreeSettingsUInt64 zero_copy_concurrent_part_removal_max_split_times;
extern const MergeTreeSettingsBool enforce_index_structure_match_on_partition_manipulation;
extern const MergeTreeSettingsBool prewarm_mark_cache;
}
@ -7533,10 +7534,11 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour
if (query_to_string(my_snapshot->getPrimaryKeyAST()) != query_to_string(src_snapshot->getPrimaryKeyAST()))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different primary key");
const auto check_definitions = [](const auto & my_descriptions, const auto & src_descriptions)
const auto check_definitions = [this](const auto & my_descriptions, const auto & src_descriptions)
{
if (my_descriptions.size() != src_descriptions.size())
bool strict_match = (*getSettings())[MergeTreeSetting::enforce_index_structure_match_on_partition_manipulation];
if ((my_descriptions.size() < src_descriptions.size()) ||
(strict_match && my_descriptions.size() != src_descriptions.size()))
return false;
std::unordered_set<std::string> my_query_strings;

View File

@ -100,6 +100,7 @@ namespace ErrorCodes
DECLARE(String, merge_workload, "", "Name of workload to be used to access resources for merges", 0) \
DECLARE(String, mutation_workload, "", "Name of workload to be used to access resources for mutations", 0) \
DECLARE(Milliseconds, background_task_preferred_step_execution_time_ms, 50, "Target time to execution of one step of merge or mutation. Can be exceeded if one step takes longer time", 0) \
DECLARE(Bool, enforce_index_structure_match_on_partition_manipulation, false, "If this setting is enabled for destination table of a partition manipulation query (`ATTACH/MOVE/REPLACE PARTITION`), the indices and projections must be identical between the source and destination tables. Otherwise, the destination table can have a superset of the source table's indices and projections.", 0) \
DECLARE(MergeSelectorAlgorithm, merge_selector_algorithm, MergeSelectorAlgorithm::SIMPLE, "The algorithm to select parts for merges assignment", EXPERIMENTAL) \
DECLARE(Bool, merge_selector_enable_heuristic_to_remove_small_parts_at_right, true, "Enable heuristic for selecting parts for merge which removes parts from right side of range, if their size is less than specified ratio (0.01) of sum_size. Works for Simple and StochasticSimple merge selectors", 0) \
DECLARE(Float, merge_selector_base, 5.0, "Affects write amplification of assigned merges (expert level setting, don't change if you don't understand what it is doing). Works for Simple and StochasticSimple merge selectors", 0) \

View File

@ -0,0 +1,5 @@
<clickhouse>
<merge_tree>
<enforce_index_structure_match_on_partition_manipulation>true</enforce_index_structure_match_on_partition_manipulation>
</merge_tree>
</clickhouse>

View File

@ -0,0 +1,5 @@
<clickhouse>
<merge_tree>
<enforce_index_structure_match_on_partition_manipulation>false</enforce_index_structure_match_on_partition_manipulation>
</merge_tree>
</clickhouse>

View File

@ -0,0 +1,473 @@
import pytest
from helpers.client import QueryRuntimeException
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance(
"node1", main_configs=["configs/config_with_check_table_structure_completely.xml"]
)
# node1 = cluster.add_instance("node1")
node2 = cluster.add_instance(
"node2",
main_configs=["configs/config_without_check_table_structure_completely.xml"],
)
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
# def test_setting_check_table_structure_completely(start_cluster):
# assert node1.query("""select value from system.merge_tree_settings where name='enforce_index_structure_match_on_partition_manipulation';""") == "0\n"
def test_check_completely_attach_with_different_indices(start_cluster):
node1.query(
"""
CREATE TABLE attach_partition_t1
(
`a` UInt32,
`b` String,
`c` String,
INDEX bf b TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY a
"""
)
node1.query(
"INSERT INTO attach_partition_t1 SELECT number, toString(number), toString(number) FROM numbers(10);"
)
node1.query(
"""
CREATE TABLE attach_partition_t2
(
`a` UInt32,
`b` String,
`c` String,
INDEX bf b TYPE bloom_filter GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY a
"""
)
# serverError 36
with pytest.raises(QueryRuntimeException) as exc:
node1.query(
"ALTER TABLE attach_partition_t2 ATTACH PARTITION tuple() FROM attach_partition_t1;"
)
assert "Tables have different secondary indices" in str(exc.value)
node1.query(
"""
CREATE TABLE attach_partition_t3
(
`a` UInt32,
`b` String,
`c` String,
INDEX bf b TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1,
INDEX cf c TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY a
"""
)
# serverError 36
with pytest.raises(QueryRuntimeException) as exc:
node1.query(
"ALTER TABLE attach_partition_t3 ATTACH PARTITION tuple() FROM attach_partition_t1;"
)
assert "Tables have different secondary indices" in str(exc.value)
node1.query("DROP TABLE attach_partition_t1")
node1.query("DROP TABLE attach_partition_t2")
node1.query("DROP TABLE attach_partition_t3")
def test_check_attach_with_different_indices(start_cluster):
node2.query(
"""
CREATE TABLE attach_partition_t1
(
`a` UInt32,
`b` String,
`c` String,
INDEX bf b TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY a
"""
)
node2.query(
"INSERT INTO attach_partition_t1 SELECT number, toString(number), toString(number) FROM numbers(10);"
)
node2.query(
"""
CREATE TABLE attach_partition_t2
(
`a` UInt32,
`b` String,
`c` String,
INDEX bf b TYPE bloom_filter GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY a
"""
)
# serverError 36
with pytest.raises(QueryRuntimeException) as exc:
node2.query(
"ALTER TABLE attach_partition_t2 ATTACH PARTITION tuple() FROM attach_partition_t1;"
)
assert "Tables have different secondary indices" in str(exc.value)
node2.query(
"""
CREATE TABLE attach_partition_t3
(
`a` UInt32,
`b` String,
`c` String,
INDEX bf b TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1,
INDEX cf c TYPE bloom_filter GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY a
"""
)
node2.query(
"ALTER TABLE attach_partition_t3 ATTACH PARTITION tuple() FROM attach_partition_t1;"
)
assert node2.query("SELECT COUNT() FROM attach_partition_t3") == "10\n"
assert node2.query("SELECT `a` FROM attach_partition_t3 WHERE `b` = '1'") == "1\n"
assert node2.query("SELECT `a` FROM attach_partition_t3 WHERE `c` = '1'") == "1\n"
node2.query("DROP TABLE attach_partition_t1")
node2.query("DROP TABLE attach_partition_t2")
node2.query("DROP TABLE attach_partition_t3")
def test_check_completely_attach_with_different_projections(start_cluster):
node1.query(
"""
CREATE TABLE attach_partition_t1
(
`a` UInt32,
`b` String,
PROJECTION proj1 (
SELECT
b,
sum(a)
GROUP BY b
)
)
ENGINE = MergeTree
ORDER BY a
"""
)
node1.query(
"INSERT INTO attach_partition_t1 SELECT number, toString(number) FROM numbers(10);"
)
node1.query(
"""
CREATE TABLE attach_partition_t2
(
`a` UInt32,
`b` String,
PROJECTION differently_named_proj (
SELECT
b,
sum(a)
GROUP BY b
)
)
ENGINE = MergeTree
ORDER BY a;
"""
)
# serverError 36
with pytest.raises(QueryRuntimeException) as exc:
node1.query(
"ALTER TABLE attach_partition_t2 ATTACH PARTITION tuple() FROM attach_partition_t1;"
)
assert "Tables have different projections" in str(exc.value)
node1.query(
"""
CREATE TABLE attach_partition_t3
(
`a` UInt32,
`b` String,
PROJECTION proj1 (
SELECT
b,
sum(a)
GROUP BY b
),
PROJECTION proj2 (
SELECT
b,
avg(a)
GROUP BY b
)
)
ENGINE = MergeTree
ORDER BY a
"""
)
# serverError 36
with pytest.raises(QueryRuntimeException) as exc:
node1.query(
"ALTER TABLE attach_partition_t3 ATTACH PARTITION tuple() FROM attach_partition_t1;"
)
assert "Tables have different projections" in str(exc.value)
node1.query("DROP TABLE attach_partition_t1")
node1.query("DROP TABLE attach_partition_t2")
node1.query("DROP TABLE attach_partition_t3")
def test_check_attach_with_different_projections(start_cluster):
node2.query(
"""
CREATE TABLE attach_partition_t1
(
`a` UInt32,
`b` String,
PROJECTION proj1 (
SELECT
b,
sum(a)
GROUP BY b
)
)
ENGINE = MergeTree
ORDER BY a
"""
)
node2.query(
"INSERT INTO attach_partition_t1 SELECT number, toString(number) FROM numbers(10);"
)
node2.query(
"""
CREATE TABLE attach_partition_t2
(
`a` UInt32,
`b` String,
PROJECTION differently_named_proj (
SELECT
b,
sum(a)
GROUP BY b
)
)
ENGINE = MergeTree
ORDER BY a;
"""
)
# serverError 36
with pytest.raises(QueryRuntimeException) as exc:
node2.query(
"ALTER TABLE attach_partition_t2 ATTACH PARTITION tuple() FROM attach_partition_t1;"
)
assert "Tables have different projections" in str(exc.value)
node2.query(
"""
CREATE TABLE attach_partition_t3
(
`a` UInt32,
`b` String,
PROJECTION proj1 (
SELECT
b,
sum(a)
GROUP BY b
),
PROJECTION proj2 (
SELECT
b,
avg(a)
GROUP BY b
)
)
ENGINE = MergeTree
ORDER BY a
"""
)
node2.query(
"ALTER TABLE attach_partition_t3 ATTACH PARTITION tuple() FROM attach_partition_t1;"
)
assert node2.query("SELECT COUNT() FROM attach_partition_t3") == "10\n"
node2.query("DROP TABLE attach_partition_t1")
node2.query("DROP TABLE attach_partition_t2")
node2.query("DROP TABLE attach_partition_t3")
def test_check_completely_attach_with_different_indices_and_projections(start_cluster):
node1.query(
"""
CREATE TABLE attach_partition_t1
(
`a` UInt32,
`b` String,
`c` String,
PROJECTION proj1 (
SELECT
b,
sum(a)
GROUP BY b
),
INDEX bf b TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY a
"""
)
node1.query(
"INSERT INTO attach_partition_t1 SELECT number, toString(number), toString(number) FROM numbers(10);"
)
node1.query(
"""
CREATE TABLE attach_partition_t2
(
`a` UInt32,
`b` String,
`c` String,
PROJECTION proj (
SELECT
b,
sum(a)
GROUP BY b
),
INDEX bf b TYPE bloom_filter GRANULARITY 1,
INDEX cf c TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY a
"""
)
# serverError 36
with pytest.raises(QueryRuntimeException) as exc:
node1.query(
"ALTER TABLE attach_partition_t2 ATTACH PARTITION tuple() FROM attach_partition_t1;"
)
assert "Tables have different secondary indices" in str(exc.value)
node1.query(
"""
CREATE TABLE attach_partition_t3
(
`a` UInt32,
`b` String,
`c` String,
PROJECTION proj1 (
SELECT
b,
sum(a)
GROUP BY b
),
PROJECTION proj2 (
SELECT
b,
avg(a)
GROUP BY b
),
INDEX bf b TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1,
INDEX cf c TYPE bloom_filter GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY a
"""
)
# serverError 36
with pytest.raises(QueryRuntimeException) as exc:
node1.query(
"ALTER TABLE attach_partition_t3 ATTACH PARTITION tuple() FROM attach_partition_t1;"
)
assert "Tables have different secondary indices" in str(exc.value)
node1.query("DROP TABLE attach_partition_t1")
node1.query("DROP TABLE attach_partition_t2")
node1.query("DROP TABLE attach_partition_t3")
def test_check_attach_with_different_indices_and_projections(start_cluster):
node2.query(
"""
CREATE TABLE attach_partition_t1
(
`a` UInt32,
`b` String,
`c` String,
PROJECTION proj1 (
SELECT
b,
sum(a)
GROUP BY b
),
INDEX bf b TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY a
"""
)
node2.query(
"INSERT INTO attach_partition_t1 SELECT number, toString(number), toString(number) FROM numbers(10);"
)
node2.query(
"""
CREATE TABLE attach_partition_t2
(
`a` UInt32,
`b` String,
`c` String,
PROJECTION proj (
SELECT
b,
sum(a)
GROUP BY b
),
INDEX bf b TYPE bloom_filter GRANULARITY 1,
INDEX cf c TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY a
"""
)
# serverError 36
with pytest.raises(QueryRuntimeException) as exc:
node2.query(
"ALTER TABLE attach_partition_t2 ATTACH PARTITION tuple() FROM attach_partition_t1;"
)
assert "Tables have different secondary indices" in str(exc.value)
node2.query(
"""
CREATE TABLE attach_partition_t3
(
`a` UInt32,
`b` String,
`c` String,
PROJECTION proj1 (
SELECT
b,
sum(a)
GROUP BY b
),
PROJECTION proj2 (
SELECT
b,
avg(a)
GROUP BY b
),
INDEX bf b TYPE tokenbf_v1(8192, 3, 0) GRANULARITY 1,
INDEX cf c TYPE bloom_filter GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY a
"""
)
node2.query(
"ALTER TABLE attach_partition_t3 ATTACH PARTITION tuple() FROM attach_partition_t1;"
)
assert node2.query("SELECT COUNT() FROM attach_partition_t3") == "10\n"
assert node2.query("SELECT `a` FROM attach_partition_t3 WHERE `b` = '1'") == "1\n"
assert node2.query("SELECT `a` FROM attach_partition_t3 WHERE `c` = '1'") == "1\n"
node2.query("DROP TABLE attach_partition_t1")
node2.query("DROP TABLE attach_partition_t2")
node2.query("DROP TABLE attach_partition_t3")

View File

@ -1,4 +1,4 @@
personal_ws-1.1 en 2984
personal_ws-1.1 en 2985
AArch
ACLs
ALTERs
@ -2827,6 +2827,7 @@ summapwithoverflow
summingmergetree
sumwithoverflow
superaggregates
superset
supertype
supremum
symlink