From 78d7bdfacb304493dabb975ac03294c19b835b2e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 27 Nov 2024 23:08:13 +0000 Subject: [PATCH] Backport #70297 to 24.9: move `fixReplicaMetadataVersionIfNeeded` from attach thread to restarting thread --- .../ReplicatedMergeTreeAttachThread.cpp | 90 +----------------- .../ReplicatedMergeTreeAttachThread.h | 2 - .../MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeQueue.h | 1 + .../ReplicatedMergeTreeRestartingThread.cpp | 92 +++++++++++++++++++ .../ReplicatedMergeTreeRestartingThread.h | 4 + tests/integration/helpers/cluster.py | 50 +++++----- tests/integration/parallel_skip.json | 14 ++- .../test_fix_metadata_version/__init__.py | 0 .../configs/config.xml | 16 ++++ .../test_fix_metadata_version/test.py | 73 +++++++++++++++ 11 files changed, 227 insertions(+), 117 deletions(-) create mode 100644 tests/integration/test_fix_metadata_version/__init__.py create mode 100644 tests/integration/test_fix_metadata_version/configs/config.xml create mode 100644 tests/integration/test_fix_metadata_version/test.py diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index 67570d78366..1e3571cd3ff 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -15,7 +16,6 @@ namespace ErrorCodes { extern const int SUPPORT_IS_DISABLED; extern const int REPLICA_STATUS_CHANGED; - extern const int LOGICAL_ERROR; } ReplicatedMergeTreeAttachThread::ReplicatedMergeTreeAttachThread(StorageReplicatedMergeTree & storage_) @@ -118,67 +118,6 @@ void ReplicatedMergeTreeAttachThread::checkHasReplicaMetadataInZooKeeper(const z } } -Int32 ReplicatedMergeTreeAttachThread::fixReplicaMetadataVersionIfNeeded(zkutil::ZooKeeperPtr zookeeper) -{ - const String & zookeeper_path = storage.zookeeper_path; - const String & replica_path = storage.replica_path; - const bool replica_readonly = storage.is_readonly; - - for (size_t i = 0; i != 2; ++i) - { - String replica_metadata_version_str; - const bool replica_metadata_version_exists = zookeeper->tryGet(replica_path + "/metadata_version", replica_metadata_version_str); - if (!replica_metadata_version_exists) - return -1; - - const Int32 metadata_version = parse(replica_metadata_version_str); - - if (metadata_version != 0 || replica_readonly) - { - /// No need to fix anything - return metadata_version; - } - - Coordination::Stat stat; - zookeeper->get(fs::path(zookeeper_path) / "metadata", &stat); - if (stat.version == 0) - { - /// No need to fix anything - return metadata_version; - } - - ReplicatedMergeTreeQueue & queue = storage.queue; - queue.pullLogsToQueue(zookeeper); - if (queue.getStatus().metadata_alters_in_queue != 0) - { - LOG_DEBUG(log, "No need to update metadata_version as there are ALTER_METADATA entries in the queue"); - return metadata_version; - } - - const Coordination::Requests ops = { - zkutil::makeSetRequest(fs::path(replica_path) / "metadata_version", std::to_string(stat.version), 0), - zkutil::makeCheckRequest(fs::path(zookeeper_path) / "metadata", stat.version), - }; - Coordination::Responses ops_responses; - const auto code = zookeeper->tryMulti(ops, ops_responses); - if (code == Coordination::Error::ZOK) - { - LOG_DEBUG(log, "Successfully set metadata_version to {}", stat.version); - return stat.version; - } - if (code != Coordination::Error::ZBADVERSION) - { - throw zkutil::KeeperException(code); - } - } - - /// Second attempt is only possible if metadata_version != 0 or metadata.version changed during the first attempt. - /// If metadata_version != 0, on second attempt we will return the new metadata_version. - /// If metadata.version changed, on second attempt we will either get metadata_version != 0 and return the new metadata_version or we will get metadata_alters_in_queue != 0 and return 0. - /// Either way, on second attempt this method should return. - throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to fix replica metadata_version in ZooKeeper after two attempts"); -} - void ReplicatedMergeTreeAttachThread::runImpl() { storage.setZooKeeper(); @@ -222,33 +161,6 @@ void ReplicatedMergeTreeAttachThread::runImpl() /// Just in case it was not removed earlier due to connection loss zookeeper->tryRemove(replica_path + "/flags/force_restore_data"); - const Int32 replica_metadata_version = fixReplicaMetadataVersionIfNeeded(zookeeper); - const bool replica_metadata_version_exists = replica_metadata_version != -1; - if (replica_metadata_version_exists) - { - storage.setInMemoryMetadata(metadata_snapshot->withMetadataVersion(replica_metadata_version)); - } - else - { - /// Table was created before 20.4 and was never altered, - /// let's initialize replica metadata version from global metadata version. - Coordination::Stat table_metadata_version_stat; - zookeeper->get(zookeeper_path + "/metadata", &table_metadata_version_stat); - - Coordination::Requests ops; - ops.emplace_back(zkutil::makeCheckRequest(zookeeper_path + "/metadata", table_metadata_version_stat.version)); - ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata_version", toString(table_metadata_version_stat.version), zkutil::CreateMode::Persistent)); - - Coordination::Responses res; - auto code = zookeeper->tryMulti(ops, res); - - if (code == Coordination::Error::ZBADVERSION) - throw Exception(ErrorCodes::REPLICA_STATUS_CHANGED, "Failed to initialize metadata_version " - "because table was concurrently altered, will retry"); - - zkutil::KeeperMultiException::check(code, ops, res); - } - storage.checkTableStructure(replica_path, metadata_snapshot); storage.checkParts(skip_sanity_checks); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h index bfc97442598..250a5ed34d1 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h @@ -48,8 +48,6 @@ private: void runImpl(); void finalizeInitialization(); - - Int32 fixReplicaMetadataVersionIfNeeded(zkutil::ZooKeeperPtr zookeeper); }; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 0fa2be6a389..8e8c1fd592d 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -607,7 +607,7 @@ std::pair ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::Zo { std::lock_guard lock(pull_logs_to_queue_mutex); - if (reason != LOAD) + if (reason != LOAD && reason != FIX_METADATA_VERSION) { /// It's totally ok to load queue on readonly replica (that's what RestartingThread does on initialization). /// It's ok if replica became readonly due to connection loss after we got current zookeeper (in this case zookeeper must be expired). diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 9d3349663e2..6ec8818b0c6 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -334,6 +334,7 @@ public: UPDATE, MERGE_PREDICATE, SYNC, + FIX_METADATA_VERSION, OTHER, }; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index dec1dd0bee9..9ca09372f21 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -24,6 +24,8 @@ namespace DB namespace ErrorCodes { extern const int REPLICA_IS_ALREADY_ACTIVE; + extern const int REPLICA_STATUS_CHANGED; + extern const int LOGICAL_ERROR; } namespace FailPoints @@ -202,6 +204,36 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() throw; } + const Int32 replica_metadata_version = fixReplicaMetadataVersionIfNeeded(zookeeper); + const bool replica_metadata_version_exists = replica_metadata_version != -1; + if (replica_metadata_version_exists) + { + storage.setInMemoryMetadata(storage.getInMemoryMetadataPtr()->withMetadataVersion(replica_metadata_version)); + } + else + { + /// Table was created before 20.4 and was never altered, + /// let's initialize replica metadata version from global metadata version. + + const String & zookeeper_path = storage.zookeeper_path, & replica_path = storage.replica_path; + + Coordination::Stat table_metadata_version_stat; + zookeeper->get(zookeeper_path + "/metadata", &table_metadata_version_stat); + + Coordination::Requests ops; + ops.emplace_back(zkutil::makeCheckRequest(zookeeper_path + "/metadata", table_metadata_version_stat.version)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata_version", toString(table_metadata_version_stat.version), zkutil::CreateMode::Persistent)); + + Coordination::Responses res; + auto code = zookeeper->tryMulti(ops, res); + + if (code == Coordination::Error::ZBADVERSION) + throw Exception(ErrorCodes::REPLICA_STATUS_CHANGED, "Failed to initialize metadata_version " + "because table was concurrently altered, will retry"); + + zkutil::KeeperMultiException::check(code, ops, res); + } + storage.queue.removeCurrentPartsFromMutations(); storage.last_queue_update_finish_time.store(time(nullptr)); @@ -419,4 +451,64 @@ void ReplicatedMergeTreeRestartingThread::setNotReadonly() storage.readonly_start_time.store(0, std::memory_order_relaxed); } + +Int32 ReplicatedMergeTreeRestartingThread::fixReplicaMetadataVersionIfNeeded(zkutil::ZooKeeperPtr zookeeper) +{ + const String & zookeeper_path = storage.zookeeper_path; + const String & replica_path = storage.replica_path; + + const size_t num_attempts = 2; + for (size_t attempt = 0; attempt != num_attempts; ++attempt) + { + String replica_metadata_version_str; + Coordination::Stat replica_stat; + const bool replica_metadata_version_exists = zookeeper->tryGet(replica_path + "/metadata_version", replica_metadata_version_str, &replica_stat); + if (!replica_metadata_version_exists) + return -1; + + const Int32 metadata_version = parse(replica_metadata_version_str); + if (metadata_version != 0) + return metadata_version; + + Coordination::Stat table_stat; + zookeeper->get(fs::path(zookeeper_path) / "metadata", &table_stat); + if (table_stat.version == 0) + return metadata_version; + + ReplicatedMergeTreeQueue & queue = storage.queue; + queue.pullLogsToQueue(zookeeper, {}, ReplicatedMergeTreeQueue::FIX_METADATA_VERSION); + if (queue.getStatus().metadata_alters_in_queue != 0) + { + LOG_INFO(log, "Skipping updating metadata_version as there are ALTER_METADATA entries in the queue"); + return metadata_version; + } + + const Coordination::Requests ops = { + zkutil::makeSetRequest(fs::path(replica_path) / "metadata_version", std::to_string(table_stat.version), replica_stat.version), + zkutil::makeCheckRequest(fs::path(zookeeper_path) / "metadata", table_stat.version), + }; + Coordination::Responses ops_responses; + const Coordination::Error code = zookeeper->tryMulti(ops, ops_responses); + if (code == Coordination::Error::ZOK) + { + LOG_DEBUG(log, "Successfully set metadata_version to {}", table_stat.version); + return table_stat.version; + } + + if (code == Coordination::Error::ZBADVERSION) + { + LOG_WARNING(log, "Cannot fix metadata_version because either metadata.version or metadata_version.version changed, attempts left = {}", num_attempts - attempt - 1); + continue; + } + + throw zkutil::KeeperException(code); + } + + /// Second attempt is only possible if either metadata_version.version or metadata.version changed during the first attempt. + /// If metadata_version changed to non-zero value during the first attempt, on second attempt we will return the new metadata_version. + /// If metadata.version changed during first attempt, on second attempt we will either get metadata_version != 0 and return the new metadata_version or we will get metadata_alters_in_queue != 0 and return 0. + /// So either first or second attempt should return unless metadata_version was rewritten from 0 to 0 during the first attempt which is highly unlikely. + throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to fix replica metadata_version in ZooKeeper after two attempts"); +} + } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index d719505ae5e..6f450dc1d40 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -68,6 +69,9 @@ private: /// Disable readonly mode for table void setNotReadonly(); + + /// Fix replica metadata_version if needed + Int32 fixReplicaMetadataVersionIfNeeded(zkutil::ZooKeeperPtr zookeeper); }; diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index ee0a75753d2..08b557f53ea 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -20,6 +20,7 @@ import shlex import urllib3 import requests from pathlib import Path +from typing import List try: # Please, add modules that required for specific tests only here. @@ -81,6 +82,8 @@ CLICKHOUSE_ERROR_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.err.lo # This means that this minimum need to be, at least, 1 year older than the current release CLICKHOUSE_CI_MIN_TESTED_VERSION = "23.3" +ZOOKEEPER_CONTAINERS = ("zoo1", "zoo2", "zoo3") + # to create docker-compose env file def _create_env_file(path, variables): @@ -2163,6 +2166,11 @@ class ClickHouseCluster: container_id = self.get_container_id(instance_name) return self.docker_client.api.logs(container_id).decode() + def query_zookeeper(self, query, node=ZOOKEEPER_CONTAINERS[0], nothrow=False): + cmd = f'clickhouse keeper-client -p {self.zookeeper_port} -q "{query}"' + container_id = self.get_container_id(node) + return self.exec_in_container(container_id, cmd, nothrow=nothrow, use_cli=False) + def exec_in_container( self, container_id, cmd, detach=False, nothrow=False, use_cli=True, **kwargs ): @@ -2495,10 +2503,21 @@ class ClickHouseCluster: def wait_zookeeper_secure_to_start(self, timeout=20): logging.debug("Wait ZooKeeper Secure to start") + self.wait_zookeeper_nodes_to_start(ZOOKEEPER_CONTAINERS, timeout) + + def wait_zookeeper_to_start(self, timeout: float = 180) -> None: + logging.debug("Wait ZooKeeper to start") + self.wait_zookeeper_nodes_to_start(ZOOKEEPER_CONTAINERS, timeout) + + def wait_zookeeper_nodes_to_start( + self, + nodes: List[str], + timeout: float = 60, + ) -> None: start = time.time() while time.time() - start < timeout: try: - for instance in ["zoo1", "zoo2", "zoo3"]: + for instance in nodes: conn = self.get_kazoo_client(instance) conn.get_children("/") conn.stop() @@ -2510,25 +2529,6 @@ class ClickHouseCluster: raise Exception("Cannot wait ZooKeeper secure container") - def wait_zookeeper_to_start(self, timeout=180): - logging.debug("Wait ZooKeeper to start") - start = time.time() - while time.time() - start < timeout: - try: - for instance in ["zoo1", "zoo2", "zoo3"]: - conn = self.get_kazoo_client(instance) - conn.get_children("/") - conn.stop() - logging.debug("All instances of ZooKeeper started") - return - except Exception as ex: - logging.debug(f"Can't connect to ZooKeeper {instance}: {ex}") - time.sleep(0.5) - - raise Exception( - "Cannot wait ZooKeeper container (probably it's a `iptables-nft` issue, you may try to `sudo iptables -P FORWARD ACCEPT`)" - ) - def make_hdfs_api(self, timeout=180, kerberized=False): if kerberized: keytab = p.abspath( @@ -3329,7 +3329,11 @@ class ClickHouseCluster: return zk def run_kazoo_commands_with_retries( - self, kazoo_callback, zoo_instance_name="zoo1", repeats=1, sleep_for=1 + self, + kazoo_callback, + zoo_instance_name=ZOOKEEPER_CONTAINERS[0], + repeats=1, + sleep_for=1, ): zk = self.get_kazoo_client(zoo_instance_name) logging.debug( @@ -4737,9 +4741,7 @@ class ClickHouseInstance: depends_on.append("nats1") if self.with_zookeeper: - depends_on.append("zoo1") - depends_on.append("zoo2") - depends_on.append("zoo3") + depends_on += list(ZOOKEEPER_CONTAINERS) if self.with_minio: depends_on.append("minio1") diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 507894534d4..d293cae4dfd 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -170,6 +170,18 @@ "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string", "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string_request_new_ticket_after_expiration", "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string_no_kdc", - "test_storage_kerberized_kafka/test.py::test_kafka_config_from_sql_named_collection" + "test_storage_kerberized_kafka/test.py::test_kafka_config_from_sql_named_collection", + "test_dns_cache/test.py::test_ip_change_drop_dns_cache", + "test_dns_cache/test.py::test_ip_change_update_dns_cache", + "test_dns_cache/test.py::test_dns_cache_update", + "test_dns_cache/test.py::test_user_access_ip_change", + "test_dns_cache/test.py::test_host_is_drop_from_cache_after_consecutive_failures", + "test_dns_cache/test.py::test_dns_resolver_filter", + + "test_https_replication/test_change_ip.py::test_replication_when_node_ip_changed", + + "test_host_regexp_multiple_ptr_records/test.py::test_host_regexp_multiple_ptr_v4_fails_with_wrong_resolution", + "test_host_regexp_multiple_ptr_records/test.py::test_host_regexp_multiple_ptr_v4", + "test_host_regexp_multiple_ptr_records/test.py::test_host_regexp_multiple_ptr_v6" ] diff --git a/tests/integration/test_fix_metadata_version/__init__.py b/tests/integration/test_fix_metadata_version/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_fix_metadata_version/configs/config.xml b/tests/integration/test_fix_metadata_version/configs/config.xml new file mode 100644 index 00000000000..4662e6794e3 --- /dev/null +++ b/tests/integration/test_fix_metadata_version/configs/config.xml @@ -0,0 +1,16 @@ + + 9000 + + + + + + + + + default + + + + + diff --git a/tests/integration/test_fix_metadata_version/test.py b/tests/integration/test_fix_metadata_version/test.py new file mode 100644 index 00000000000..085872bba05 --- /dev/null +++ b/tests/integration/test_fix_metadata_version/test.py @@ -0,0 +1,73 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/config.xml"], + stay_alive=True, + with_zookeeper=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_fix_metadata_version(start_cluster): + zookeeper_path = "/clickhouse/test_fix_metadata_version" + replica = "replica1" + replica_path = f"{zookeeper_path}/replicas/{replica}" + + def get_metadata_versions(): + table_metadata_version = int( + node.query( + f""" + SELECT version + FROM system.zookeeper + WHERE path = '{zookeeper_path}' AND name = 'metadata' + """ + ).strip() + ) + + replica_metadata_version = int( + node.query( + f""" + SELECT value + FROM system.zookeeper + WHERE path = '{replica_path}' AND name = 'metadata_version' + """ + ).strip() + ) + + return table_metadata_version, replica_metadata_version + + node.query( + f""" + DROP TABLE IF EXISTS t SYNC; + CREATE TABLE t + ( + `x` UInt32 + ) + ENGINE = ReplicatedMergeTree('{zookeeper_path}', '{replica}') + ORDER BY x + """ + ) + + node.query("ALTER TABLE t (ADD COLUMN `y` UInt32)") + + assert get_metadata_versions() == (1, 1) + + cluster.query_zookeeper(f"set '{replica_path}/metadata_version' '0'") + + assert get_metadata_versions() == (1, 0) + + node.restart_clickhouse() + + assert get_metadata_versions() == (1, 1)