mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-17 13:13:36 +00:00
Correct startup logic
This commit is contained in:
parent
d9ebec472b
commit
527325ef46
@ -150,11 +150,9 @@ ReplicatedMergeTreeTableMetadata ReplicatedMergeTreeTableMetadata::parse(const S
|
||||
return metadata;
|
||||
}
|
||||
|
||||
ReplicatedMergeTreeTableMetadata::Diff
|
||||
ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk, bool allow_alter) const
|
||||
{
|
||||
Diff diff;
|
||||
|
||||
void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const ReplicatedMergeTreeTableMetadata & from_zk) const
|
||||
{
|
||||
if (data_format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
|
||||
{
|
||||
if (date_column != from_zk.date_column)
|
||||
@ -163,10 +161,12 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl
|
||||
ErrorCodes::METADATA_MISMATCH);
|
||||
}
|
||||
else if (!from_zk.date_column.empty())
|
||||
{
|
||||
throw Exception(
|
||||
"Existing table metadata in ZooKeeper differs in date index column."
|
||||
" Stored in ZooKeeper: " + from_zk.date_column + ", local is custom-partitioned.",
|
||||
ErrorCodes::METADATA_MISMATCH);
|
||||
}
|
||||
|
||||
if (sampling_expression != from_zk.sampling_expression)
|
||||
throw Exception("Existing table metadata in ZooKeeper differs in sample expression."
|
||||
@ -208,63 +208,46 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl
|
||||
" Stored in ZooKeeper: " + from_zk.partition_key + ", local: " + partition_key,
|
||||
ErrorCodes::METADATA_MISMATCH);
|
||||
|
||||
}
|
||||
|
||||
void ReplicatedMergeTreeTableMetadata::checkEquals(const ReplicatedMergeTreeTableMetadata & from_zk) const
|
||||
{
|
||||
|
||||
checkImmutableFieldsEquals(from_zk);
|
||||
|
||||
if (sorting_key != from_zk.sorting_key)
|
||||
{
|
||||
if (allow_alter)
|
||||
{
|
||||
diff.sorting_key_changed = true;
|
||||
diff.new_sorting_key = from_zk.sorting_key;
|
||||
}
|
||||
else
|
||||
throw Exception(
|
||||
"Existing table metadata in ZooKeeper differs in sorting key expression."
|
||||
" Stored in ZooKeeper: " + from_zk.sorting_key + ", local: " + sorting_key,
|
||||
ErrorCodes::METADATA_MISMATCH);
|
||||
throw Exception(
|
||||
"Existing table metadata in ZooKeeper differs in sorting key expression."
|
||||
" Stored in ZooKeeper: " + from_zk.sorting_key + ", local: " + sorting_key,
|
||||
ErrorCodes::METADATA_MISMATCH);
|
||||
}
|
||||
|
||||
if (ttl_table != from_zk.ttl_table)
|
||||
{
|
||||
if (allow_alter)
|
||||
{
|
||||
diff.ttl_table_changed = true;
|
||||
diff.new_ttl_table = from_zk.ttl_table;
|
||||
}
|
||||
else
|
||||
throw Exception(
|
||||
"Existing table metadata in ZooKeeper differs in TTL."
|
||||
" Stored in ZooKeeper: " + from_zk.ttl_table +
|
||||
", local: " + ttl_table,
|
||||
ErrorCodes::METADATA_MISMATCH);
|
||||
throw Exception(
|
||||
"Existing table metadata in ZooKeeper differs in TTL."
|
||||
" Stored in ZooKeeper: " + from_zk.ttl_table +
|
||||
", local: " + ttl_table,
|
||||
ErrorCodes::METADATA_MISMATCH);
|
||||
}
|
||||
|
||||
if (skip_indices != from_zk.skip_indices)
|
||||
{
|
||||
if (allow_alter)
|
||||
{
|
||||
diff.skip_indices_changed = true;
|
||||
diff.new_skip_indices = from_zk.skip_indices;
|
||||
}
|
||||
else
|
||||
throw Exception(
|
||||
"Existing table metadata in ZooKeeper differs in skip indexes."
|
||||
" Stored in ZooKeeper: " + from_zk.skip_indices +
|
||||
", local: " + skip_indices,
|
||||
ErrorCodes::METADATA_MISMATCH);
|
||||
throw Exception(
|
||||
"Existing table metadata in ZooKeeper differs in skip indexes."
|
||||
" Stored in ZooKeeper: " + from_zk.skip_indices +
|
||||
", local: " + skip_indices,
|
||||
ErrorCodes::METADATA_MISMATCH);
|
||||
}
|
||||
|
||||
if (constraints != from_zk.constraints)
|
||||
{
|
||||
if (allow_alter)
|
||||
{
|
||||
diff.constraints_changed = true;
|
||||
diff.new_constraints = from_zk.constraints;
|
||||
}
|
||||
else
|
||||
throw Exception(
|
||||
"Existing table metadata in ZooKeeper differs in constraints."
|
||||
" Stored in ZooKeeper: " + from_zk.constraints +
|
||||
", local: " + constraints,
|
||||
ErrorCodes::METADATA_MISMATCH);
|
||||
throw Exception(
|
||||
"Existing table metadata in ZooKeeper differs in constraints."
|
||||
" Stored in ZooKeeper: " + from_zk.constraints +
|
||||
", local: " + constraints,
|
||||
ErrorCodes::METADATA_MISMATCH);
|
||||
}
|
||||
|
||||
if (from_zk.index_granularity_bytes_found_in_zk && index_granularity_bytes != from_zk.index_granularity_bytes)
|
||||
@ -272,9 +255,41 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl
|
||||
" Stored in ZooKeeper: " + DB::toString(from_zk.index_granularity_bytes) +
|
||||
", local: " + DB::toString(index_granularity_bytes),
|
||||
ErrorCodes::METADATA_MISMATCH);
|
||||
}
|
||||
|
||||
ReplicatedMergeTreeTableMetadata::Diff
|
||||
ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk) const
|
||||
{
|
||||
|
||||
checkImmutableFieldsEquals(from_zk);
|
||||
|
||||
Diff diff;
|
||||
|
||||
if (sorting_key != from_zk.sorting_key)
|
||||
{
|
||||
diff.sorting_key_changed = true;
|
||||
diff.new_sorting_key = from_zk.sorting_key;
|
||||
}
|
||||
|
||||
if (ttl_table != from_zk.ttl_table)
|
||||
{
|
||||
diff.ttl_table_changed = true;
|
||||
diff.new_ttl_table = from_zk.ttl_table;
|
||||
}
|
||||
|
||||
if (skip_indices != from_zk.skip_indices)
|
||||
{
|
||||
diff.skip_indices_changed = true;
|
||||
diff.new_skip_indices = from_zk.skip_indices;
|
||||
}
|
||||
|
||||
if (constraints != from_zk.constraints)
|
||||
{
|
||||
diff.constraints_changed = true;
|
||||
diff.new_constraints = from_zk.constraints;
|
||||
}
|
||||
|
||||
return diff;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -60,9 +60,14 @@ struct ReplicatedMergeTreeTableMetadata
|
||||
}
|
||||
};
|
||||
|
||||
Diff checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk, bool allow_alter) const;
|
||||
void checkEquals(const ReplicatedMergeTreeTableMetadata & from_zk) const;
|
||||
|
||||
Diff checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk) const;
|
||||
|
||||
private:
|
||||
|
||||
void checkImmutableFieldsEquals(const ReplicatedMergeTreeTableMetadata & from_zk) const;
|
||||
|
||||
bool index_granularity_bytes_found_in_zk = false;
|
||||
};
|
||||
|
||||
|
@ -286,14 +286,29 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
|
||||
|
||||
createTableIfNotExists();
|
||||
|
||||
checkTableStructure(false, false);
|
||||
checkTableStructure(zookeeper_path);
|
||||
|
||||
createReplica();
|
||||
|
||||
Coordination::Stat metadata_stat;
|
||||
current_zookeeper->get(zookeeper_path + "/metadata", &metadata_stat);
|
||||
metadata_version = metadata_stat.version;
|
||||
}
|
||||
else
|
||||
{
|
||||
checkTableStructure(skip_sanity_checks, true);
|
||||
checkTableStructure(replica_path);
|
||||
checkParts(skip_sanity_checks);
|
||||
|
||||
if (current_zookeeper->exists(replica_path + "/metadata_version"))
|
||||
{
|
||||
metadata_version = parse<int>(current_zookeeper->get(replica_path + "/metadata_version"));
|
||||
}
|
||||
else /// This replica was created on old version, so we have to take version of global node
|
||||
{
|
||||
Coordination::Stat metadata_stat;
|
||||
current_zookeeper->get(zookeeper_path + "/metadata", &metadata_stat);
|
||||
metadata_version = metadata_stat.version;
|
||||
}
|
||||
/// Temporary directories contain untinalized results of Merges or Fetches (after forced restart)
|
||||
/// and don't allow to reinitialize them, so delete each of them immediately
|
||||
clearOldTemporaryDirectories(0);
|
||||
@ -301,9 +316,6 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
|
||||
|
||||
createNewZooKeeperNodes();
|
||||
|
||||
Coordination::Stat metadata_stat;
|
||||
current_zookeeper->get(zookeeper_path + "/metadata", &metadata_stat);
|
||||
metadata_version = metadata_stat.version;
|
||||
|
||||
other_replicas_fixed_granularity = checkFixedGranualrityInZookeeper();
|
||||
}
|
||||
@ -450,10 +462,6 @@ void StorageReplicatedMergeTree::createTableIfNotExists()
|
||||
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/replicas", "",
|
||||
zkutil::CreateMode::Persistent));
|
||||
|
||||
////std::cerr << "Creating alters node\n";
|
||||
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/alters", "",
|
||||
zkutil::CreateMode::Persistent));
|
||||
|
||||
Coordination::Responses responses;
|
||||
auto code = zookeeper->tryMulti(ops, responses);
|
||||
if (code && code != Coordination::ZNODEEXISTS)
|
||||
@ -464,45 +472,23 @@ void StorageReplicatedMergeTree::createTableIfNotExists()
|
||||
/** Verify that list of columns and table storage_settings_ptr match those specified in ZK (/ metadata).
|
||||
* If not, throw an exception.
|
||||
*/
|
||||
void StorageReplicatedMergeTree::checkTableStructure(bool skip_sanity_checks, bool allow_alter)
|
||||
void StorageReplicatedMergeTree::checkTableStructure(const String & zookeeper_prefix)
|
||||
{
|
||||
auto zookeeper = getZooKeeper();
|
||||
|
||||
ReplicatedMergeTreeTableMetadata old_metadata(*this);
|
||||
|
||||
Coordination::Stat metadata_stat;
|
||||
String metadata_str = zookeeper->get(zookeeper_path + "/metadata", &metadata_stat);
|
||||
String metadata_str = zookeeper->get(zookeeper_prefix + "/metadata", &metadata_stat);
|
||||
auto metadata_from_zk = ReplicatedMergeTreeTableMetadata::parse(metadata_str);
|
||||
auto metadata_diff = old_metadata.checkAndFindDiff(metadata_from_zk, allow_alter);
|
||||
old_metadata.checkEquals(metadata_from_zk);
|
||||
|
||||
Coordination::Stat columns_stat;
|
||||
auto columns_from_zk = ColumnsDescription::parse(zookeeper->get(zookeeper_path + "/columns", &columns_stat));
|
||||
|
||||
/// TODO(alesap) remove this trash
|
||||
const ColumnsDescription & old_columns = getColumns();
|
||||
if (columns_from_zk != old_columns || !metadata_diff.empty())
|
||||
{
|
||||
if (allow_alter &&
|
||||
(skip_sanity_checks ||
|
||||
old_columns.getOrdinary().sizeOfDifference(columns_from_zk.getOrdinary()) +
|
||||
old_columns.getMaterialized().sizeOfDifference(columns_from_zk.getMaterialized()) <= 2))
|
||||
{
|
||||
LOG_WARNING(log, "Table structure in ZooKeeper is a little different from local table structure. Assuming ALTER.");
|
||||
|
||||
/// We delay setting table structure till startup() because otherwise new table metadata file can
|
||||
/// be overwritten in DatabaseOrdinary::createTable.
|
||||
set_table_structure_at_startup = [columns_from_zk, metadata_diff, this]()
|
||||
{
|
||||
/// Without any locks, because table has not been created yet.
|
||||
setTableStructure(std::move(columns_from_zk), metadata_diff);
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception("Table structure in ZooKeeper is too different from local table structure",
|
||||
ErrorCodes::INCOMPATIBLE_COLUMNS);
|
||||
}
|
||||
}
|
||||
if (columns_from_zk != old_columns)
|
||||
throw Exception("Table columns structure in ZooKeeper is different from local table structure", ErrorCodes::INCOMPATIBLE_COLUMNS);
|
||||
}
|
||||
|
||||
|
||||
@ -2934,9 +2920,6 @@ void StorageReplicatedMergeTree::startup()
|
||||
if (is_readonly)
|
||||
return;
|
||||
|
||||
if (set_table_structure_at_startup)
|
||||
set_table_structure_at_startup();
|
||||
|
||||
queue.initialize(
|
||||
zookeeper_path, replica_path,
|
||||
getStorageID().getFullTableName() + " (ReplicatedMergeTreeQueue)",
|
||||
@ -3223,7 +3206,7 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer
|
||||
|
||||
auto columns_from_entry = ColumnsDescription::parse(entry.columns_str);
|
||||
auto metadata_from_entry = ReplicatedMergeTreeTableMetadata::parse(entry.metadata_str);
|
||||
auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this).checkAndFindDiff(metadata_from_entry, /* allow_alter = */ true);
|
||||
auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this).checkAndFindDiff(metadata_from_entry);
|
||||
|
||||
MergeTreeData::DataParts parts;
|
||||
|
||||
@ -3232,6 +3215,7 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer
|
||||
requests.emplace_back(zkutil::makeSetRequest(replica_path + "/columns", entry.columns_str, -1));
|
||||
requests.emplace_back(zkutil::makeSetRequest(replica_path + "/metadata", entry.metadata_str, -1));
|
||||
|
||||
|
||||
zookeeper->multi(requests);
|
||||
|
||||
{
|
||||
@ -3245,6 +3229,9 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer
|
||||
LOG_INFO(log, "Applied changes to the metadata of the table. Current metadata version: " << metadata_version);
|
||||
}
|
||||
|
||||
/// This transaction may not happen, but it's ok, because on the next retry we will eventually create this node
|
||||
zookeeper->createOrUpdate(replica_path + "/metadata_version", std::to_string(metadata_version), zkutil::CreateMode::Persistent);
|
||||
|
||||
recalculateColumnSizes();
|
||||
|
||||
return true;
|
||||
@ -3287,7 +3274,7 @@ void StorageReplicatedMergeTree::alter(
|
||||
|
||||
/// Clear nodes from previous iteration
|
||||
alter_entry.emplace();
|
||||
mutation_znode.emplace();
|
||||
mutation_znode.reset();
|
||||
|
||||
/// We can safely read structure, because we guarded with alter_intention_lock
|
||||
if (is_readonly)
|
||||
@ -3432,7 +3419,7 @@ void StorageReplicatedMergeTree::alter(
|
||||
if (!unwaited.empty())
|
||||
throw Exception("Some replicas doesn't finish metadata alter", ErrorCodes::UNFINISHED);
|
||||
|
||||
if (mutation_znode.has_value() && !mutation_znode->empty())
|
||||
if (mutation_znode)
|
||||
{
|
||||
LOG_DEBUG(log, "Metadata changes applied. Will wait for data changes.");
|
||||
waitMutation(*mutation_znode, query_context.getSettingsRef().replication_alter_partitions_sync);
|
||||
|
@ -221,9 +221,6 @@ private:
|
||||
*/
|
||||
zkutil::EphemeralNodeHolderPtr replica_is_active_node;
|
||||
|
||||
/// Used to delay setting table structure till startup() in case of an offline ALTER.
|
||||
std::function<void()> set_table_structure_at_startup;
|
||||
|
||||
/** Is this replica "leading". The leader replica selects the parts to merge.
|
||||
*/
|
||||
std::atomic<bool> is_leader {false};
|
||||
@ -308,11 +305,7 @@ private:
|
||||
*/
|
||||
void createNewZooKeeperNodes();
|
||||
|
||||
/** Verify that the list of columns and table settings match those specified in ZK (/metadata).
|
||||
* If not, throw an exception.
|
||||
* Must be called before startup().
|
||||
*/
|
||||
void checkTableStructure(bool skip_sanity_checks, bool allow_alter);
|
||||
void checkTableStructure(const String & zookeeper_prefix);
|
||||
|
||||
/// A part of ALTER: apply metadata changes only (data parts are altered separately).
|
||||
/// Must be called under IStorage::lockStructureForAlter() lock.
|
||||
|
Loading…
Reference in New Issue
Block a user