From 671a401ded9fa1ef18c5b38e5605dc51c65229dc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 15 Nov 2023 12:26:14 +0100 Subject: [PATCH 1/3] Ask linker to remove garbage from external libraries --- CMakeLists.txt | 2 +- contrib/CMakeLists.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9e548c5a6d0..36fd3a00eba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -164,7 +164,7 @@ if (OS_LINUX) # and whatever is poisoning it by LD_PRELOAD should not link to our symbols. # - The clickhouse-odbc-bridge and clickhouse-library-bridge binaries # should not expose their symbols to ODBC drivers and libraries. - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic -Wl,--gc-sections") endif () if (OS_DARWIN) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 390b0241e7d..0273a93b044 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -1,7 +1,7 @@ #"${folder}/CMakeLists.txt" Third-party libraries may have substandard code. -set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w") -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w") +set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w -ffunction-sections -fdata-sections") +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w -ffunction-sections -fdata-sections") if (WITH_COVERAGE) set (WITHOUT_COVERAGE_LIST ${WITHOUT_COVERAGE}) From bf1098951508dff6f1ec5e1787aaf4cae6f3de25 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 15 Nov 2023 16:54:47 +0100 Subject: [PATCH 2/3] Fix unexpected parts handling (#56693) * fix unexpected parts handling * Automatic style fix * fix --------- Co-authored-by: robot-clickhouse --- src/Storages/MergeTree/MergeTreeData.cpp | 6 ++- src/Storages/MergeTree/MergeTreeSettings.h | 1 + src/Storages/StorageReplicatedMergeTree.cpp | 30 ++++++++++++- src/Storages/StorageReplicatedMergeTree.h | 1 + .../test.py | 43 ++++++++++++++++++- 5 files changed, 77 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d5a82fb032c..b101d9b0c96 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1863,6 +1863,9 @@ try is_async ? "asynchronously" : "synchronously"); } + std::this_thread::sleep_for(std::chrono::milliseconds(static_cast(getSettings()->sleep_before_loading_outdated_parts_ms))); + ThreadFuzzer::maybeInjectSleep(); + /// Acquire shared lock because 'relative_data_path' is used while loading parts. TableLockHolder shared_lock; if (is_async) @@ -1875,6 +1878,7 @@ try while (true) { + ThreadFuzzer::maybeInjectSleep(); PartLoadingTree::NodePtr part; { @@ -3982,7 +3986,7 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT LOG_TEST(log, "forcefullyMovePartToDetachedAndRemoveFromMemory: removing {} from data_parts_indexes", part->getNameWithState()); data_parts_indexes.erase(it_part); - if (restore_covered && part->info.level == 0) + if (restore_covered && part->info.level == 0 && part->info.mutation == 0) { LOG_WARNING(log, "Will not recover parts covered by zero-level part {}", part->name); return; diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 53876e77376..69307e74d1d 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -167,6 +167,7 @@ struct Settings; M(Bool, enable_the_endpoint_id_with_zookeeper_name_prefix, false, "Enable the endpoint id with zookeeper name prefix for the replicated merge tree table", 0) \ M(UInt64, zero_copy_merge_mutation_min_parts_size_sleep_before_lock, 1ULL * 1024 * 1024 * 1024, "If zero copy replication is enabled sleep random amount of time before trying to lock depending on parts size for merge or mutation", 0) \ M(Bool, allow_floating_point_partition_key, false, "Allow floating point as partition key", 0) \ + M(UInt64, sleep_before_loading_outdated_parts_ms, 0, "For testing. Do not change it.", 0) \ \ /** Experimental/work in progress feature. Unsafe for production. */ \ M(UInt64, part_moves_between_shards_enable, 0, "Experimental/Incomplete feature to move parts between shards. Does not take into account sharding expressions.", 0) \ diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 74821a9186c..c56887c085f 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1327,6 +1327,20 @@ void StorageReplicatedMergeTree::paranoidCheckForCoveredPartsInZooKeeperOnStart( } void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) +{ + if (checkPartsImpl(skip_sanity_checks)) + return; + + /// We failed to check parts in an optimistic way, and now we need all the parts including Outdated parts to check them correctly. + waitForOutdatedPartsToBeLoaded(); + + if (checkPartsImpl(skip_sanity_checks)) + return; + + throw Exception(ErrorCodes::LOGICAL_ERROR, "checkPartsImpl returned false after loading Outdated parts"); +} + +bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) { auto zookeeper = getZooKeeper(); @@ -1422,6 +1436,18 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) continue; } + /// We have uncovered unexpected parts, and we are not sure if we can restore them or not. + /// So we have to exit, load all Outdated parts, and check again. + { + std::lock_guard lock(outdated_data_parts_mutex); + if (!outdated_data_parts_loading_finished) + { + LOG_INFO(log, "Outdated parts are not loaded yet, but we may need them to check if unexpected parts can be recovered. " + "Need retry."); + return false; + } + } + /// Part is unexpected and we don't have covering part: it's suspicious uncovered_unexpected_parts.insert(part->name); uncovered_unexpected_parts_rows += part->rows_count; @@ -1478,7 +1504,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) unexpected_parts_rows - uncovered_unexpected_parts_rows); } - if (unexpected_parts_nonnew_rows > 0 || uncovered_unexpected_parts_rows > 0) + if (unexpected_parts_nonnew_rows > 0 || uncovered_unexpected_parts_rows > 0 || !restorable_unexpected_parts.empty()) { LOG_DEBUG(log, sanity_report_debug_fmt, fmt::join(uncovered_unexpected_parts, ", "), fmt::join(restorable_unexpected_parts, ", "), fmt::join(parts_to_fetch, ", "), fmt::join(covered_unexpected_parts, ", "), fmt::join(expected_parts, ", ")); @@ -1503,6 +1529,8 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) LOG_ERROR(log, "Renaming unexpected part {} to ignored_{}{}", part->name, part->name, restore_covered ? ", restoring covered parts" : ""); forcefullyMovePartToDetachedAndRemoveFromMemory(part, "ignored", restore_covered); } + + return true; } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 8c90d0e2679..b2a67572adc 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -621,6 +621,7 @@ private: * But if there are too many, throw an exception just in case - it's probably a configuration error. */ void checkParts(bool skip_sanity_checks); + bool checkPartsImpl(bool skip_sanity_checks); /// Synchronize the list of part uuids which are currently pinned. These should be sent to root query executor /// to be used for deduplication. diff --git a/tests/integration/test_max_suspicious_broken_parts_replicated/test.py b/tests/integration/test_max_suspicious_broken_parts_replicated/test.py index 6226240df56..0d009e6b132 100644 --- a/tests/integration/test_max_suspicious_broken_parts_replicated/test.py +++ b/tests/integration/test_max_suspicious_broken_parts_replicated/test.py @@ -81,12 +81,51 @@ def test_unexpected_uncommitted_merge(): detach_table("broken_table") attach_table("broken_table") - assert node.query("SELECT sum(key) FROM broken_table") == "190\n" + # it's not readonly + node.query("INSERT INTO broken_table SELECT 1") + + assert node.query("SELECT sum(key) FROM broken_table") == "191\n" assert ( node.query( "SELECT name FROM system.parts where table = 'broken_table' and active order by name" ) - == "all_0_0_0\nall_1_1_0\n" + == "all_0_0_0\nall_1_1_0\nall_2_2_0\n" + ) + + +def test_unexpected_uncommitted_mutation(): + node.query( + """ + CREATE TABLE broken_table0 (key Int) ENGINE = ReplicatedMergeTree('/tables/broken0', '1') ORDER BY tuple() + SETTINGS max_suspicious_broken_parts = 0, replicated_max_ratio_of_wrong_parts=0, old_parts_lifetime=100500, sleep_before_loading_outdated_parts_ms=10000""" + ) + + node.query("INSERT INTO broken_table0 SELECT number from numbers(10)") + + node.query( + "ALTER TABLE broken_table0 UPDATE key = key * 10 WHERE 1 SETTINGS mutations_sync=1" + ) + + assert node.query("SELECT sum(key) FROM broken_table0") == "450\n" + assert ( + node.query( + "SELECT name FROM system.parts where table = 'broken_table0' and active" + ) + == "all_0_0_0_1\n" + ) + + remove_part_from_zookeeper("/tables/broken0/replicas/1", "all_0_0_0_1") + + detach_table("broken_table0") + attach_table("broken_table0") + + node.query("INSERT INTO broken_table0 SELECT 1") + + # it may remain 45 if the nutation was finalized + sum_key = node.query("SELECT sum(key) FROM broken_table0") + assert sum_key == "46\n" or sum_key == "451\n" + assert "all_0_0_0_1" in node.query( + "SELECT name FROM system.detached_parts where table = 'broken_table0'" ) From d6c58023d50bbe7a77a7497ddc9b1031186bb01c Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Wed, 15 Nov 2023 18:07:24 +0000 Subject: [PATCH 3/3] Better except for SSL connection failure --- src/Server/TCPHandler.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 1da9806b4f5..c0941603d78 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -104,6 +104,7 @@ namespace DB::ErrorCodes extern const int TIMEOUT_EXCEEDED; extern const int SUPPORT_IS_DISABLED; extern const int UNSUPPORTED_METHOD; + extern const int WRONG_PASSWORD; } namespace @@ -1431,8 +1432,11 @@ void TCPHandler::receiveHello() getClientAddress(client_info)); return; } - catch (...) + catch (const Exception & e) { + if (e.code() != DB::ErrorCodes::WRONG_PASSWORD) + throw; + tryLogCurrentException(log, "SSL authentication failed, falling back to password authentication"); } }