From 024d9382c717153c8c397f69873358450519bb34 Mon Sep 17 00:00:00 2001 From: Anton Kozlov Date: Fri, 11 Nov 2022 14:24:10 +0000 Subject: [PATCH 01/14] CLICKHOUSE-2375 Add interserver DNS retries --- src/Client/ConnectionEstablisher.cpp | 3 +- .../test_interserver_dns_retires/__init__.py | 0 .../configs/remote_servers.xml | 32 ++++++++ .../test_interserver_dns_retires/test.py | 81 +++++++++++++++++++ 4 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_interserver_dns_retires/__init__.py create mode 100644 tests/integration/test_interserver_dns_retires/configs/remote_servers.xml create mode 100644 tests/integration/test_interserver_dns_retires/test.py diff --git a/src/Client/ConnectionEstablisher.cpp b/src/Client/ConnectionEstablisher.cpp index 3ad9f6ba95c..87256d4e8db 100644 --- a/src/Client/ConnectionEstablisher.cpp +++ b/src/Client/ConnectionEstablisher.cpp @@ -14,6 +14,7 @@ namespace DB namespace ErrorCodes { extern const int ATTEMPT_TO_READ_AFTER_EOF; + extern const int DNS_ERROR; extern const int NETWORK_ERROR; extern const int SOCKET_TIMEOUT; } @@ -90,7 +91,7 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std:: catch (const Exception & e) { if (e.code() != ErrorCodes::NETWORK_ERROR && e.code() != ErrorCodes::SOCKET_TIMEOUT - && e.code() != ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF) + && e.code() != ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF && e.code() != ErrorCodes::DNS_ERROR) throw; fail_message = getCurrentExceptionMessage(/* with_stacktrace = */ false); diff --git a/tests/integration/test_interserver_dns_retires/__init__.py b/tests/integration/test_interserver_dns_retires/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_interserver_dns_retires/configs/remote_servers.xml b/tests/integration/test_interserver_dns_retires/configs/remote_servers.xml new file mode 100644 index 00000000000..d4e9a2bee24 --- /dev/null +++ b/tests/integration/test_interserver_dns_retires/configs/remote_servers.xml @@ -0,0 +1,32 @@ + + + + + + + node1 + 9000 + r0 + + + node2 + 9000 + r0 + + + + + node3 + 9000 + r0 + + + node3 + 9000 + r0 + + + + + + diff --git a/tests/integration/test_interserver_dns_retires/test.py b/tests/integration/test_interserver_dns_retires/test.py new file mode 100644 index 00000000000..f0c581e6450 --- /dev/null +++ b/tests/integration/test_interserver_dns_retires/test.py @@ -0,0 +1,81 @@ +""" +This test makes sure interserver cluster queries handle invalid DNS +records for replicas. +""" +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster, ClickHouseInstance + +from contextlib import contextmanager +import multiprocessing.dummy + + +def bootstrap(cluster: ClickHouseCluster): + node: ClickHouseInstance + for node in cluster.instances.values(): + node_number = int(node.name[-1]) + + # getaddrinfo(...) may hang for a log time without these options. + node.exec_in_container( + [ + "bash", + "-c", + 'echo -e "options timeout:1\noptions attempts:1" >> /etc/resolv.conf', + ], + privileged=True, + user="root", + ) + + node.query(f"CREATE DATABASE IF NOT EXISTS r0") + node.query(f"CREATE TABLE r0.test_data(v UInt64) ENGINE = Memory()") + node.query( + f"INSERT INTO r0.test_data SELECT number + {node_number} * 10 FROM numbers(10)" + ) + node.query( + f"""CREATE TABLE default.test AS r0.test_data ENGINE = Distributed(cluster_missing_replica, 'r0', test_data, rand())""" + ) + + +@contextmanager +def start_cluster(): + cluster = ClickHouseCluster(__file__) + # node1 is missing on purpose to test DNS resolution errors. + # It exists in configs/remote_servers.xml to create the failure condition. + for node in ["node2", "node3", "node4"]: + cluster.add_instance(node, main_configs=["configs/remote_servers.xml"]) + try: + cluster.start() + bootstrap(cluster) + yield cluster + finally: + cluster.shutdown() + + +def test_query(): + with start_cluster() as cluster: + n_queries = 16 + + # thread-based pool + p = multiprocessing.dummy.Pool(n_queries) + + def send_query(x): + try: + # queries start at operational shard 2, and will hit either the + # 'normal' node2 or the missing node1 on shard 1. + node = ( + cluster.instances["node3"] + if (x % 2 == 0) + else cluster.instances["node4"] + ) + # numbers between 0 and 19 are on the first ("broken") shard. + # we need to make sure we're querying them successfully + assert node.query( + "SELECT count() FROM default.test where v < (rand64() % 20)" + ) + return 1 + except QueryRuntimeException as e: + # DNS_ERROR because node1 doesn't exist. + assert 198 == e.returncode + # We shouldn't be getting here due to interserver retries. + raise + + p.map(send_query, range(n_queries)) From b0b865c32e5a0396e5b91df9e16db824351dc9e4 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 7 Feb 2023 18:50:31 +0100 Subject: [PATCH 02/14] Resubmit prefetches --- src/Common/ElapsedTimeProfileEventIncrement.h | 21 +- src/Common/ProfileEvents.cpp | 3 + src/Common/Stopwatch.h | 3 +- src/Common/SystemLogBase.cpp | 1 + src/Common/SystemLogBase.h | 1 + .../CachedCompressedReadBuffer.cpp | 4 +- src/Compression/CachedCompressedReadBuffer.h | 2 +- .../CompressedReadBufferFromFile.cpp | 4 +- .../CompressedReadBufferFromFile.h | 2 +- src/Core/Settings.h | 9 + ...chronousReadIndirectBufferFromRemoteFS.cpp | 64 +- ...ynchronousReadIndirectBufferFromRemoteFS.h | 27 +- .../IO/CachedOnDiskReadBufferFromFile.cpp | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 26 +- src/Disks/IO/ReadBufferFromRemoteFSGather.h | 7 +- .../IO/ReadIndirectBufferFromRemoteFS.cpp | 2 +- src/Disks/IO/ThreadPoolReader.cpp | 4 +- src/Disks/IO/ThreadPoolRemoteFSReader.cpp | 43 +- src/Disks/ObjectStorages/StoredObject.cpp | 2 +- src/Disks/ObjectStorages/StoredObject.h | 2 + src/IO/AsyncReadCounters.cpp | 37 ++ src/IO/AsyncReadCounters.h | 31 + ...ynchronousReadBufferFromFileDescriptor.cpp | 13 +- ...AsynchronousReadBufferFromFileDescriptor.h | 6 +- src/IO/AsynchronousReader.h | 3 + src/IO/CompressedReadBufferWrapper.h | 2 +- src/IO/PeekableReadBuffer.h | 2 +- src/IO/ReadBuffer.h | 6 +- src/IO/ReadBufferFromFileDescriptor.cpp | 2 +- src/IO/ReadBufferFromFileDescriptor.h | 2 +- src/IO/ReadBufferFromS3.cpp | 3 + src/IO/ReadSettings.h | 2 + src/IO/SeekAvoidingReadBuffer.h | 2 +- src/Interpreters/Context.cpp | 92 ++- src/Interpreters/Context.h | 16 + .../FilesystemReadPrefetchesLog.cpp | 61 ++ .../FilesystemReadPrefetchesLog.h | 48 ++ src/Interpreters/QueryLog.cpp | 7 + src/Interpreters/QueryLog.h | 2 + src/Interpreters/SystemLog.cpp | 15 +- src/Interpreters/SystemLog.h | 4 +- src/Interpreters/executeQuery.cpp | 2 + .../QueryPlan/ReadFromMergeTree.cpp | 45 +- .../HDFS/AsynchronousReadBufferFromHDFS.cpp | 17 +- .../HDFS/AsynchronousReadBufferFromHDFS.h | 9 +- src/Storages/HDFS/ReadBufferFromHDFS.cpp | 4 +- src/Storages/HDFS/ReadBufferFromHDFS.h | 1 - src/Storages/Hive/StorageHive.cpp | 2 +- src/Storages/MergeTree/IMergeTreeReader.h | 2 + .../MergeTreeBaseSelectProcessor.cpp | 92 ++- .../MergeTree/MergeTreeBaseSelectProcessor.h | 24 +- .../MergeTree/MergeTreeBlockReadUtils.cpp | 10 +- .../MergeTree/MergeTreeBlockReadUtils.h | 17 +- src/Storages/MergeTree/MergeTreeIOSettings.h | 2 + .../MergeTree/MergeTreePrefetchedReadPool.cpp | 612 ++++++++++++++++++ .../MergeTree/MergeTreePrefetchedReadPool.h | 128 ++++ src/Storages/MergeTree/MergeTreeReadPool.cpp | 4 +- src/Storages/MergeTree/MergeTreeReadPool.h | 37 +- .../MergeTree/MergeTreeReaderWide.cpp | 114 +++- src/Storages/MergeTree/MergeTreeReaderWide.h | 14 +- .../MergeTree/MergeTreeSelectProcessor.cpp | 4 +- .../MergeTreeThreadSelectProcessor.cpp | 16 +- .../MergeTreeThreadSelectProcessor.cpp.rej | 55 ++ src/Storages/StorageS3.cpp | 2 +- tests/clickhouse-test.rej | 9 + tests/config/install.sh | 1 + tests/config/install.sh.rej | 9 + tests/config/users.d/prefetch_settings.xml | 9 + .../test_profile_events_s3/test.py | 1 + .../test_ttl_move_memory_usage.py | 25 +- ...test_fix_extra_seek_on_compressed_cache.sh | 3 +- .../01045_order_by_pk_special_storages.sh | 4 +- .../01045_order_by_pk_special_storages.sh.rej | 10 + .../0_stateless/01293_show_settings.reference | 1 + .../01339_client_unrecognized_option.sh | 4 +- ...adaptive_granularity_block_borders.sql.rej | 8 + ...cache_on_write_with_small_segment_size.sql | 6 + utils/debug_remote_fs_performance/debug.sh | 58 ++ 78 files changed, 1731 insertions(+), 215 deletions(-) create mode 100644 src/IO/AsyncReadCounters.cpp create mode 100644 src/IO/AsyncReadCounters.h create mode 100644 src/Interpreters/FilesystemReadPrefetchesLog.cpp create mode 100644 src/Interpreters/FilesystemReadPrefetchesLog.h create mode 100644 src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp create mode 100644 src/Storages/MergeTree/MergeTreePrefetchedReadPool.h create mode 100644 src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp.rej create mode 100644 tests/clickhouse-test.rej create mode 100644 tests/config/install.sh.rej create mode 100644 tests/config/users.d/prefetch_settings.xml create mode 100644 tests/queries/0_stateless/01045_order_by_pk_special_storages.sh.rej create mode 100644 tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql.rej create mode 100644 tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sql create mode 100755 utils/debug_remote_fs_performance/debug.sh diff --git a/src/Common/ElapsedTimeProfileEventIncrement.h b/src/Common/ElapsedTimeProfileEventIncrement.h index 5b4c532f2e1..b30afd24a4c 100644 --- a/src/Common/ElapsedTimeProfileEventIncrement.h +++ b/src/Common/ElapsedTimeProfileEventIncrement.h @@ -20,17 +20,22 @@ struct ProfileEventTimeIncrement explicit ProfileEventTimeIncrement