From 053a81938d196748ffad6f7ca81982d1e491ab98 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 6 May 2024 15:56:43 -0300 Subject: [PATCH 01/30] Fix wrong argument being passed as request protocol for proxy --- src/Storages/StorageURL.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 679946f9aee..2d94453cf4d 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -457,7 +457,7 @@ std::pair> StorageURLSource: const auto settings = context_->getSettings(); - auto proxy_config = getProxyConfiguration(http_method); + auto proxy_config = getProxyConfiguration(request_uri.getScheme()); try { @@ -543,10 +543,11 @@ StorageURLSink::StorageURLSink( std::string content_type = FormatFactory::instance().getContentType(format, context, format_settings); std::string content_encoding = toContentEncodingName(compression_method); - auto proxy_config = getProxyConfiguration(http_method); + auto poco_uri = Poco::URI(uri); + auto proxy_config = getProxyConfiguration(poco_uri.getScheme()); auto write_buffer = std::make_unique( - HTTPConnectionGroupType::STORAGE, Poco::URI(uri), http_method, content_type, content_encoding, headers, timeouts, DBMS_DEFAULT_BUFFER_SIZE, proxy_config + HTTPConnectionGroupType::STORAGE, poco_uri, http_method, content_type, content_encoding, headers, timeouts, DBMS_DEFAULT_BUFFER_SIZE, proxy_config ); const auto & settings = context->getSettingsRef(); @@ -1327,6 +1328,7 @@ std::optional IStorageURLBase::tryGetLastModificationTime( .withBufSize(settings.max_read_buffer_size) .withRedirects(settings.max_http_get_redirects) .withHeaders(headers) + .withProxy(proxy_config) .create(credentials); return buf->tryGetLastModificationTime(); From bade6b43dd34906334d285b92c0cb8509700a75a Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 8 May 2024 09:33:26 -0300 Subject: [PATCH 02/30] fix wrong originalRequestProtocol in remote resolver causing proxy tunneling to be misconfigured --- .../ProxyConfigurationResolverProvider.cpp | 1 + .../RemoteProxyConfigurationResolver.cpp | 89 +++++++----- src/Common/RemoteProxyConfigurationResolver.h | 17 ++- ...st_proxy_remote_configuration_resolver.cpp | 136 ++++++++++++++++++ 4 files changed, 203 insertions(+), 40 deletions(-) create mode 100644 src/Common/tests/gtest_proxy_remote_configuration_resolver.cpp diff --git a/src/Common/ProxyConfigurationResolverProvider.cpp b/src/Common/ProxyConfigurationResolverProvider.cpp index d15b4d98615..559a77af7c1 100644 --- a/src/Common/ProxyConfigurationResolverProvider.cpp +++ b/src/Common/ProxyConfigurationResolverProvider.cpp @@ -49,6 +49,7 @@ namespace return std::make_shared( server_configuration, request_protocol, + std::make_unique(), isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(configuration)); } diff --git a/src/Common/RemoteProxyConfigurationResolver.cpp b/src/Common/RemoteProxyConfigurationResolver.cpp index ef972a8e318..cd9f9fa8155 100644 --- a/src/Common/RemoteProxyConfigurationResolver.cpp +++ b/src/Common/RemoteProxyConfigurationResolver.cpp @@ -16,12 +16,55 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) const +{ + /// It should be just empty GET request. + Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, endpoint.getPath(), Poco::Net::HTTPRequest::HTTP_1_1); + + const auto & host = endpoint.getHost(); + auto resolved_hosts = DNSResolver::instance().resolveHostAll(host); + + HTTPSessionPtr session; + + for (size_t i = 0; i < resolved_hosts.size(); ++i) + { + auto resolved_endpoint = endpoint; + resolved_endpoint.setHost(resolved_hosts[i].toString()); + session = makeHTTPSession(HTTPConnectionGroupType::HTTP, resolved_endpoint, timeouts); + + try + { + session->sendRequest(request); + break; + } + catch (...) + { + if (i + 1 == resolved_hosts.size()) + throw; + } + } + + Poco::Net::HTTPResponse response; + auto & response_body_stream = session->receiveResponse(response); + + if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Proxy resolver returned not OK status: {}", response.getReason()); + + String proxy_host; + /// Read proxy host as string from response body. + Poco::StreamCopier::copyToString(response_body_stream, proxy_host); + + return proxy_host; +} + RemoteProxyConfigurationResolver::RemoteProxyConfigurationResolver( const RemoteServerConfiguration & remote_server_configuration_, Protocol request_protocol_, + std::unique_ptr fetcher_, bool disable_tunneling_for_https_requests_over_http_proxy_ ) -: ProxyConfigurationResolver(request_protocol_, disable_tunneling_for_https_requests_over_http_proxy_), remote_server_configuration(remote_server_configuration_) +: ProxyConfigurationResolver(request_protocol_, disable_tunneling_for_https_requests_over_http_proxy_), + remote_server_configuration(remote_server_configuration_), fetcher(std::move(fetcher_)) { } @@ -29,7 +72,7 @@ ProxyConfiguration RemoteProxyConfigurationResolver::resolve() { auto logger = getLogger("RemoteProxyConfigurationResolver"); - auto & [endpoint, proxy_protocol, proxy_port, cache_ttl_] = remote_server_configuration; + auto & [endpoint, proxy_protocol_string, proxy_port, cache_ttl_] = remote_server_configuration; LOG_DEBUG(logger, "Obtain proxy using resolver: {}", endpoint.toString()); @@ -57,50 +100,18 @@ ProxyConfiguration RemoteProxyConfigurationResolver::resolve() try { - /// It should be just empty GET request. - Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, endpoint.getPath(), Poco::Net::HTTPRequest::HTTP_1_1); + const auto proxy_host = fetcher->fetch(endpoint, timeouts); - const auto & host = endpoint.getHost(); - auto resolved_hosts = DNSResolver::instance().resolveHostAll(host); + LOG_DEBUG(logger, "Use proxy: {}://{}:{}", proxy_protocol_string, proxy_host, proxy_port); - HTTPSessionPtr session; - - for (size_t i = 0; i < resolved_hosts.size(); ++i) - { - auto resolved_endpoint = endpoint; - resolved_endpoint.setHost(resolved_hosts[i].toString()); - session = makeHTTPSession(HTTPConnectionGroupType::HTTP, resolved_endpoint, timeouts); - - try - { - session->sendRequest(request); - break; - } - catch (...) - { - if (i + 1 == resolved_hosts.size()) - throw; - } - } - - Poco::Net::HTTPResponse response; - auto & response_body_stream = session->receiveResponse(response); - - if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Proxy resolver returned not OK status: {}", response.getReason()); - - String proxy_host; - /// Read proxy host as string from response body. - Poco::StreamCopier::copyToString(response_body_stream, proxy_host); - - LOG_DEBUG(logger, "Use proxy: {}://{}:{}", proxy_protocol, proxy_host, proxy_port); + auto proxy_protocol = ProxyConfiguration::protocolFromString(proxy_protocol_string); bool use_tunneling_for_https_requests_over_http_proxy = useTunneling( request_protocol, - cached_config.protocol, + proxy_protocol, disable_tunneling_for_https_requests_over_http_proxy); - cached_config.protocol = ProxyConfiguration::protocolFromString(proxy_protocol); + cached_config.protocol = proxy_protocol; cached_config.host = proxy_host; cached_config.port = proxy_port; cached_config.tunneling = use_tunneling_for_https_requests_over_http_proxy; diff --git a/src/Common/RemoteProxyConfigurationResolver.h b/src/Common/RemoteProxyConfigurationResolver.h index 3275202215a..fe2fd56aea8 100644 --- a/src/Common/RemoteProxyConfigurationResolver.h +++ b/src/Common/RemoteProxyConfigurationResolver.h @@ -10,6 +10,19 @@ namespace DB { +struct ConnectionTimeouts; + +struct RemoteProxyHostFetcher +{ + virtual ~RemoteProxyHostFetcher() = default; + virtual std::string fetch(const Poco::URI & uri, const ConnectionTimeouts & timeouts) const = 0; +}; + +struct RemoteProxyHostFetcherImpl : public RemoteProxyHostFetcher +{ + std::string fetch(const Poco::URI & uri, const ConnectionTimeouts & timeouts) const override; +}; + /* * Makes an HTTP GET request to the specified endpoint to obtain a proxy host. * */ @@ -28,7 +41,8 @@ public: RemoteProxyConfigurationResolver( const RemoteServerConfiguration & remote_server_configuration_, Protocol request_protocol_, - bool disable_tunneling_for_https_requests_over_http_proxy_ = true); + std::unique_ptr fetcher_, + bool disable_tunneling_for_https_requests_over_http_proxy_ = false); ProxyConfiguration resolve() override; @@ -36,6 +50,7 @@ public: private: RemoteServerConfiguration remote_server_configuration; + std::unique_ptr fetcher; std::mutex cache_mutex; bool cache_valid = false; diff --git a/src/Common/tests/gtest_proxy_remote_configuration_resolver.cpp b/src/Common/tests/gtest_proxy_remote_configuration_resolver.cpp new file mode 100644 index 00000000000..bc9ad5c7205 --- /dev/null +++ b/src/Common/tests/gtest_proxy_remote_configuration_resolver.cpp @@ -0,0 +1,136 @@ +#include + +#include +#include +#include + +namespace +{ + +struct RemoteProxyHostFetcherMock : public DB::RemoteProxyHostFetcher +{ + explicit RemoteProxyHostFetcherMock(const std::string & return_mock_) : return_mock(return_mock_) {} + + std::string fetch(const Poco::URI &, const DB::ConnectionTimeouts &) const override + { + return return_mock; + } + + std::string return_mock; +}; + +} + + +namespace DB +{ + +TEST(RemoteProxyConfigurationResolver, HTTPOverHTTP) +{ + const char * proxy_server_mock = "proxy1"; + auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration + { + Poco::URI("not_important"), + "http", + 80, + 10 + }; + + RemoteProxyConfigurationResolver resolver( + remote_server_configuration, + ProxyConfiguration::Protocol::HTTP, + std::make_unique(proxy_server_mock) + ); + + auto configuration = resolver.resolve(); + + ASSERT_EQ(configuration.host, proxy_server_mock); + ASSERT_EQ(configuration.port, 80); + ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTP); + ASSERT_EQ(configuration.original_request_protocol, ProxyConfiguration::Protocol::HTTP); + ASSERT_EQ(configuration.tunneling, false); +} + +TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTPS) +{ + const char * proxy_server_mock = "proxy1"; + auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration + { + Poco::URI("not_important"), + "https", + 443, + 10 + }; + + RemoteProxyConfigurationResolver resolver( + remote_server_configuration, + ProxyConfiguration::Protocol::HTTPS, + std::make_unique(proxy_server_mock) + ); + + auto configuration = resolver.resolve(); + + ASSERT_EQ(configuration.host, proxy_server_mock); + ASSERT_EQ(configuration.port, 443); + ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTPS); + ASSERT_EQ(configuration.original_request_protocol, ProxyConfiguration::Protocol::HTTPS); + // tunneling should not be used, https over https. + ASSERT_EQ(configuration.tunneling, false); +} + +TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTP) +{ + const char * proxy_server_mock = "proxy1"; + auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration + { + Poco::URI("not_important"), + "http", + 80, + 10 + }; + + RemoteProxyConfigurationResolver resolver( + remote_server_configuration, + ProxyConfiguration::Protocol::HTTPS, + std::make_unique(proxy_server_mock) + ); + + auto configuration = resolver.resolve(); + + ASSERT_EQ(configuration.host, proxy_server_mock); + ASSERT_EQ(configuration.port, 80); + ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTP); + ASSERT_EQ(configuration.original_request_protocol, ProxyConfiguration::Protocol::HTTPS); + // tunneling should be used, https over http. + ASSERT_EQ(configuration.tunneling, true); +} + +TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTPNoTunneling) +{ + const char * proxy_server_mock = "proxy1"; + auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration + { + Poco::URI("not_important"), + "http", + 80, + 10 + }; + + RemoteProxyConfigurationResolver resolver( + remote_server_configuration, + ProxyConfiguration::Protocol::HTTPS, + std::make_unique(proxy_server_mock), + true /* disable_tunneling_for_https_requests_over_http_proxy_ */ + ); + + auto configuration = resolver.resolve(); + + ASSERT_EQ(configuration.host, proxy_server_mock); + ASSERT_EQ(configuration.port, 80); + ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTP); + ASSERT_EQ(configuration.original_request_protocol, ProxyConfiguration::Protocol::HTTPS); + // tunneling should be used, https over http. + ASSERT_EQ(configuration.tunneling, false); +} + +} From a64cf57375950a386e29a1dfe181270bb3ce9a12 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 9 May 2024 11:43:59 -0300 Subject: [PATCH 03/30] modify tests so unexpected http methods in proxy logs are errors --- .../helpers/s3_url_proxy_tests_util.py | 47 +++++++++++++++---- .../configs/config.d/proxy_list.xml | 1 - .../test.py | 6 +-- .../configs/config.d/proxy_list.xml | 1 - .../configs/config.d/storage_conf.xml | 2 +- .../test_s3_storage_conf_new_proxy/test.py | 38 +-------------- .../configs/config.d/storage_conf.xml | 5 +- .../proxy-resolver/resolver.py | 5 +- .../test_s3_storage_conf_proxy/test.py | 37 +-------------- .../configs/config.d/proxy_list.xml | 1 - .../test.py | 6 +-- .../configs/config.d/proxy_list.xml | 5 -- .../test.py | 6 +-- 13 files changed, 54 insertions(+), 106 deletions(-) diff --git a/tests/integration/helpers/s3_url_proxy_tests_util.py b/tests/integration/helpers/s3_url_proxy_tests_util.py index 9059fda08ae..487a2d71d19 100644 --- a/tests/integration/helpers/s3_url_proxy_tests_util.py +++ b/tests/integration/helpers/s3_url_proxy_tests_util.py @@ -2,21 +2,26 @@ import os import time +ALL_HTTP_METHODS = {"POST", "PUT", "GET", "HEAD", "CONNECT"} + + def check_proxy_logs( - cluster, proxy_instance, protocol, bucket, http_methods={"POST", "PUT", "GET"} + cluster, proxy_instance, protocol, bucket, requested_http_methods ): for i in range(10): logs = cluster.get_container_logs(proxy_instance) # Check with retry that all possible interactions with Minio are present - for http_method in http_methods: + for http_method in ALL_HTTP_METHODS: if ( logs.find(http_method + f" {protocol}://minio1:9001/root/data/{bucket}") >= 0 ): - return + if http_method not in requested_http_methods: + assert False, f"Found http method {http_method} for bucket {bucket} that should not be found in {proxy_instance} logs" + elif http_method in requested_http_methods: + assert False, f"{http_method} method not found in logs of {proxy_instance} for bucket {bucket}" + time.sleep(1) - else: - assert False, f"{http_methods} method not found in logs of {proxy_instance}" def wait_resolver(cluster): @@ -78,11 +83,35 @@ def perform_simple_queries(node, minio_endpoint): ) -def simple_test(cluster, proxies, protocol, bucket): +def simple_test(cluster, proxy, protocol, bucket): minio_endpoint = build_s3_endpoint(protocol, bucket) - node = cluster.instances[f"{bucket}"] + node = cluster.instances[bucket] perform_simple_queries(node, minio_endpoint) - for proxy in proxies: - check_proxy_logs(cluster, proxy, protocol, bucket) + check_proxy_logs(cluster, proxy, protocol, bucket, ["PUT", "GET", "HEAD"]) + + +def simple_storage_test(cluster, node, proxy, policy): + node.query( + """ + CREATE TABLE s3_test ( + id Int64, + data String + ) ENGINE=MergeTree() + ORDER BY id + SETTINGS storage_policy='{}' + """.format( + policy + ) + ) + node.query("INSERT INTO s3_test VALUES (0,'data'),(1,'data')") + assert ( + node.query("SELECT * FROM s3_test order by id FORMAT Values") + == "(0,'data'),(1,'data')" + ) + + node.query("DROP TABLE IF EXISTS s3_test SYNC") + + # not checking for POST because it is in a different format + check_proxy_logs(cluster, proxy, "http", policy, ["PUT", "GET"]) diff --git a/tests/integration/test_https_s3_table_function_with_http_proxy_no_tunneling/configs/config.d/proxy_list.xml b/tests/integration/test_https_s3_table_function_with_http_proxy_no_tunneling/configs/config.d/proxy_list.xml index 1931315897f..9d780a4f2d3 100644 --- a/tests/integration/test_https_s3_table_function_with_http_proxy_no_tunneling/configs/config.d/proxy_list.xml +++ b/tests/integration/test_https_s3_table_function_with_http_proxy_no_tunneling/configs/config.d/proxy_list.xml @@ -3,7 +3,6 @@ 1 http://proxy1 - http://proxy2 diff --git a/tests/integration/test_https_s3_table_function_with_http_proxy_no_tunneling/test.py b/tests/integration/test_https_s3_table_function_with_http_proxy_no_tunneling/test.py index ae872a33cd4..6606987bab9 100644 --- a/tests/integration/test_https_s3_table_function_with_http_proxy_no_tunneling/test.py +++ b/tests/integration/test_https_s3_table_function_with_http_proxy_no_tunneling/test.py @@ -52,12 +52,12 @@ def cluster(): def test_s3_with_https_proxy_list(cluster): - proxy_util.simple_test(cluster, ["proxy1", "proxy2"], "https", "proxy_list_node") + proxy_util.simple_test(cluster, "proxy1", "https", "proxy_list_node") def test_s3_with_https_remote_proxy(cluster): - proxy_util.simple_test(cluster, ["proxy1"], "https", "remote_proxy_node") + proxy_util.simple_test(cluster, "proxy1", "https", "remote_proxy_node") def test_s3_with_https_env_proxy(cluster): - proxy_util.simple_test(cluster, ["proxy1"], "https", "env_node") + proxy_util.simple_test(cluster, "proxy1", "https", "env_node") diff --git a/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/proxy_list.xml b/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/proxy_list.xml index 24c1eb29fbc..84e91495304 100644 --- a/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/proxy_list.xml +++ b/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/proxy_list.xml @@ -2,7 +2,6 @@ http://proxy1 - http://proxy2 diff --git a/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/storage_conf.xml b/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/storage_conf.xml index 94ac83b32ac..1d31272a395 100644 --- a/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/storage_conf.xml +++ b/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/storage_conf.xml @@ -3,7 +3,7 @@ s3 - http://minio1:9001/root/data/ + http://minio1:9001/root/data/s3 minio minio123 diff --git a/tests/integration/test_s3_storage_conf_new_proxy/test.py b/tests/integration/test_s3_storage_conf_new_proxy/test.py index c98eb05a217..ff3685428b5 100644 --- a/tests/integration/test_s3_storage_conf_new_proxy/test.py +++ b/tests/integration/test_s3_storage_conf_new_proxy/test.py @@ -3,6 +3,7 @@ import time import pytest from helpers.cluster import ClickHouseCluster +import helpers.s3_url_proxy_tests_util as proxy_util @pytest.fixture(scope="module") @@ -26,41 +27,6 @@ def cluster(): cluster.shutdown() -def check_proxy_logs(cluster, proxy_instance, http_methods={"POST", "PUT", "GET"}): - for i in range(10): - logs = cluster.get_container_logs(proxy_instance) - # Check with retry that all possible interactions with Minio are present - for http_method in http_methods: - if logs.find(http_method + " http://minio1") >= 0: - return - time.sleep(1) - else: - assert False, f"{http_methods} method not found in logs of {proxy_instance}" - - @pytest.mark.parametrize("policy", ["s3"]) def test_s3_with_proxy_list(cluster, policy): - node = cluster.instances["node"] - - node.query( - """ - CREATE TABLE s3_test ( - id Int64, - data String - ) ENGINE=MergeTree() - ORDER BY id - SETTINGS storage_policy='{}' - """.format( - policy - ) - ) - node.query("INSERT INTO s3_test VALUES (0,'data'),(1,'data')") - assert ( - node.query("SELECT * FROM s3_test order by id FORMAT Values") - == "(0,'data'),(1,'data')" - ) - - node.query("DROP TABLE IF EXISTS s3_test SYNC") - - for proxy in ["proxy1", "proxy2"]: - check_proxy_logs(cluster, proxy, ["PUT", "GET"]) + proxy_util.simple_storage_test(cluster, cluster.instances["node"], "proxy1", policy) \ No newline at end of file diff --git a/tests/integration/test_s3_storage_conf_proxy/configs/config.d/storage_conf.xml b/tests/integration/test_s3_storage_conf_proxy/configs/config.d/storage_conf.xml index 132eac4a2a6..39aea7c5507 100644 --- a/tests/integration/test_s3_storage_conf_proxy/configs/config.d/storage_conf.xml +++ b/tests/integration/test_s3_storage_conf_proxy/configs/config.d/storage_conf.xml @@ -3,17 +3,16 @@ s3 - http://minio1:9001/root/data/ + http://minio1:9001/root/data/s3 minio minio123 http://proxy1 - http://proxy2 s3 - http://minio1:9001/root/data/ + http://minio1:9001/root/data/s3_with_resolver minio minio123 diff --git a/tests/integration/test_s3_storage_conf_proxy/proxy-resolver/resolver.py b/tests/integration/test_s3_storage_conf_proxy/proxy-resolver/resolver.py index eaea4c1dab2..8c7611303b8 100644 --- a/tests/integration/test_s3_storage_conf_proxy/proxy-resolver/resolver.py +++ b/tests/integration/test_s3_storage_conf_proxy/proxy-resolver/resolver.py @@ -5,10 +5,7 @@ import bottle @bottle.route("/hostname") def index(): - if random.randrange(2) == 0: - return "proxy1" - else: - return "proxy2" + return "proxy1" bottle.run(host="0.0.0.0", port=8080) diff --git a/tests/integration/test_s3_storage_conf_proxy/test.py b/tests/integration/test_s3_storage_conf_proxy/test.py index 6cf612f8259..0e154f2636a 100644 --- a/tests/integration/test_s3_storage_conf_proxy/test.py +++ b/tests/integration/test_s3_storage_conf_proxy/test.py @@ -26,41 +26,6 @@ def cluster(): cluster.shutdown() -def check_proxy_logs(cluster, proxy_instance, http_methods={"POST", "PUT", "GET"}): - for i in range(10): - logs = cluster.get_container_logs(proxy_instance) - # Check with retry that all possible interactions with Minio are present - for http_method in http_methods: - if logs.find(http_method + " http://minio1") >= 0: - return - time.sleep(1) - else: - assert False, f"{http_methods} method not found in logs of {proxy_instance}" - - @pytest.mark.parametrize("policy", ["s3", "s3_with_resolver"]) def test_s3_with_proxy_list(cluster, policy): - node = cluster.instances["node"] - - node.query( - """ - CREATE TABLE s3_test ( - id Int64, - data String - ) ENGINE=MergeTree() - ORDER BY id - SETTINGS storage_policy='{}' - """.format( - policy - ) - ) - node.query("INSERT INTO s3_test VALUES (0,'data'),(1,'data')") - assert ( - node.query("SELECT * FROM s3_test order by id FORMAT Values") - == "(0,'data'),(1,'data')" - ) - - node.query("DROP TABLE IF EXISTS s3_test SYNC") - - for proxy in ["proxy1", "proxy2"]: - check_proxy_logs(cluster, proxy, ["PUT", "GET"]) + proxy_util.simple_storage_test(cluster, cluster.instances["node"], "proxy1", policy) \ No newline at end of file diff --git a/tests/integration/test_s3_table_function_with_http_proxy/configs/config.d/proxy_list.xml b/tests/integration/test_s3_table_function_with_http_proxy/configs/config.d/proxy_list.xml index af5687d88ac..ff207e7166c 100644 --- a/tests/integration/test_s3_table_function_with_http_proxy/configs/config.d/proxy_list.xml +++ b/tests/integration/test_s3_table_function_with_http_proxy/configs/config.d/proxy_list.xml @@ -2,7 +2,6 @@ http://proxy1 - http://proxy2 \ No newline at end of file diff --git a/tests/integration/test_s3_table_function_with_http_proxy/test.py b/tests/integration/test_s3_table_function_with_http_proxy/test.py index 1619b413099..497b5f19bf6 100644 --- a/tests/integration/test_s3_table_function_with_http_proxy/test.py +++ b/tests/integration/test_s3_table_function_with_http_proxy/test.py @@ -49,12 +49,12 @@ def cluster(): def test_s3_with_http_proxy_list(cluster): - proxy_util.simple_test(cluster, ["proxy1", "proxy2"], "http", "proxy_list_node") + proxy_util.simple_test(cluster, "proxy1", "http", "proxy_list_node") def test_s3_with_http_remote_proxy(cluster): - proxy_util.simple_test(cluster, ["proxy1"], "http", "remote_proxy_node") + proxy_util.simple_test(cluster, "proxy1", "http", "remote_proxy_node") def test_s3_with_http_env_proxy(cluster): - proxy_util.simple_test(cluster, ["proxy1"], "http", "env_node") + proxy_util.simple_test(cluster, "proxy1", "http", "env_node") diff --git a/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/proxy_list.xml b/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/proxy_list.xml index 4dad8a2a682..7e09fa88eca 100644 --- a/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/proxy_list.xml +++ b/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/proxy_list.xml @@ -1,12 +1,7 @@ - - http://proxy1 - http://proxy2 - https://proxy1 - https://proxy2 diff --git a/tests/integration/test_s3_table_function_with_https_proxy/test.py b/tests/integration/test_s3_table_function_with_https_proxy/test.py index 83af407093c..981523b8d6f 100644 --- a/tests/integration/test_s3_table_function_with_https_proxy/test.py +++ b/tests/integration/test_s3_table_function_with_https_proxy/test.py @@ -57,12 +57,12 @@ def cluster(): def test_s3_with_https_proxy_list(cluster): - proxy_util.simple_test(cluster, ["proxy1", "proxy2"], "https", "proxy_list_node") + proxy_util.simple_test(cluster, "proxy1", "https", "proxy_list_node") def test_s3_with_https_remote_proxy(cluster): - proxy_util.simple_test(cluster, ["proxy1"], "https", "remote_proxy_node") + proxy_util.simple_test(cluster, "proxy1", "https", "remote_proxy_node") def test_s3_with_https_env_proxy(cluster): - proxy_util.simple_test(cluster, ["proxy1"], "https", "env_node") + proxy_util.simple_test(cluster, "proxy1", "https", "env_node") From 0f56e0d1ad44d0c7b0dcd223bd33f3fc3a208f87 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 10 May 2024 16:54:14 -0300 Subject: [PATCH 04/30] fix black --- tests/integration/helpers/s3_url_proxy_tests_util.py | 12 +++++++----- .../test_s3_storage_conf_new_proxy/test.py | 2 +- tests/integration/test_s3_storage_conf_proxy/test.py | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/integration/helpers/s3_url_proxy_tests_util.py b/tests/integration/helpers/s3_url_proxy_tests_util.py index 487a2d71d19..6dbb90e1c40 100644 --- a/tests/integration/helpers/s3_url_proxy_tests_util.py +++ b/tests/integration/helpers/s3_url_proxy_tests_util.py @@ -5,9 +5,7 @@ import time ALL_HTTP_METHODS = {"POST", "PUT", "GET", "HEAD", "CONNECT"} -def check_proxy_logs( - cluster, proxy_instance, protocol, bucket, requested_http_methods -): +def check_proxy_logs(cluster, proxy_instance, protocol, bucket, requested_http_methods): for i in range(10): logs = cluster.get_container_logs(proxy_instance) # Check with retry that all possible interactions with Minio are present @@ -17,9 +15,13 @@ def check_proxy_logs( >= 0 ): if http_method not in requested_http_methods: - assert False, f"Found http method {http_method} for bucket {bucket} that should not be found in {proxy_instance} logs" + assert ( + False + ), f"Found http method {http_method} for bucket {bucket} that should not be found in {proxy_instance} logs" elif http_method in requested_http_methods: - assert False, f"{http_method} method not found in logs of {proxy_instance} for bucket {bucket}" + assert + False + ), f"{http_method} method not found in logs of {proxy_instance} for bucket {bucket}" time.sleep(1) diff --git a/tests/integration/test_s3_storage_conf_new_proxy/test.py b/tests/integration/test_s3_storage_conf_new_proxy/test.py index ff3685428b5..720218d7745 100644 --- a/tests/integration/test_s3_storage_conf_new_proxy/test.py +++ b/tests/integration/test_s3_storage_conf_new_proxy/test.py @@ -29,4 +29,4 @@ def cluster(): @pytest.mark.parametrize("policy", ["s3"]) def test_s3_with_proxy_list(cluster, policy): - proxy_util.simple_storage_test(cluster, cluster.instances["node"], "proxy1", policy) \ No newline at end of file + proxy_util.simple_storage_test(cluster, cluster.instances["node"], "proxy1", policy) diff --git a/tests/integration/test_s3_storage_conf_proxy/test.py b/tests/integration/test_s3_storage_conf_proxy/test.py index 0e154f2636a..c9ea3f4df5b 100644 --- a/tests/integration/test_s3_storage_conf_proxy/test.py +++ b/tests/integration/test_s3_storage_conf_proxy/test.py @@ -28,4 +28,4 @@ def cluster(): @pytest.mark.parametrize("policy", ["s3", "s3_with_resolver"]) def test_s3_with_proxy_list(cluster, policy): - proxy_util.simple_storage_test(cluster, cluster.instances["node"], "proxy1", policy) \ No newline at end of file + proxy_util.simple_storage_test(cluster, cluster.instances["node"], "proxy1", policy) From 2d904dec5fb0b55dcc04b9828714ff44bc18d88d Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 10 May 2024 17:22:49 -0300 Subject: [PATCH 05/30] add missing parenthesis --- tests/integration/helpers/s3_url_proxy_tests_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/s3_url_proxy_tests_util.py b/tests/integration/helpers/s3_url_proxy_tests_util.py index 6dbb90e1c40..5ba865b0910 100644 --- a/tests/integration/helpers/s3_url_proxy_tests_util.py +++ b/tests/integration/helpers/s3_url_proxy_tests_util.py @@ -19,7 +19,7 @@ def check_proxy_logs(cluster, proxy_instance, protocol, bucket, requested_http_m False ), f"Found http method {http_method} for bucket {bucket} that should not be found in {proxy_instance} logs" elif http_method in requested_http_methods: - assert + assert( False ), f"{http_method} method not found in logs of {proxy_instance} for bucket {bucket}" From ac603d6ba90b951a0c365aafb6940fe0ce4827d4 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 10 May 2024 17:37:26 -0300 Subject: [PATCH 06/30] add space between assert and parenthesis --- tests/integration/helpers/s3_url_proxy_tests_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/s3_url_proxy_tests_util.py b/tests/integration/helpers/s3_url_proxy_tests_util.py index 5ba865b0910..8228e9f54f7 100644 --- a/tests/integration/helpers/s3_url_proxy_tests_util.py +++ b/tests/integration/helpers/s3_url_proxy_tests_util.py @@ -19,7 +19,7 @@ def check_proxy_logs(cluster, proxy_instance, protocol, bucket, requested_http_m False ), f"Found http method {http_method} for bucket {bucket} that should not be found in {proxy_instance} logs" elif http_method in requested_http_methods: - assert( + assert ( False ), f"{http_method} method not found in logs of {proxy_instance} for bucket {bucket}" From fa5431030f244510d1585b51146ae2d278fcbf15 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 14 May 2024 15:45:05 +0200 Subject: [PATCH 07/30] simplify remoteproxyconfigurationresolver --- .../RemoteProxyConfigurationResolver.cpp | 23 ++----------------- .../gtest_remote_proxy_host_fetcher_impl.cpp | 3 +++ 2 files changed, 5 insertions(+), 21 deletions(-) create mode 100644 src/Common/tests/gtest_remote_proxy_host_fetcher_impl.cpp diff --git a/src/Common/RemoteProxyConfigurationResolver.cpp b/src/Common/RemoteProxyConfigurationResolver.cpp index cd9f9fa8155..89c7e6ebd65 100644 --- a/src/Common/RemoteProxyConfigurationResolver.cpp +++ b/src/Common/RemoteProxyConfigurationResolver.cpp @@ -21,28 +21,9 @@ std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const /// It should be just empty GET request. Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, endpoint.getPath(), Poco::Net::HTTPRequest::HTTP_1_1); - const auto & host = endpoint.getHost(); - auto resolved_hosts = DNSResolver::instance().resolveHostAll(host); + auto session = makeHTTPSession(HTTPConnectionGroupType::HTTP, endpoint, timeouts); - HTTPSessionPtr session; - - for (size_t i = 0; i < resolved_hosts.size(); ++i) - { - auto resolved_endpoint = endpoint; - resolved_endpoint.setHost(resolved_hosts[i].toString()); - session = makeHTTPSession(HTTPConnectionGroupType::HTTP, resolved_endpoint, timeouts); - - try - { - session->sendRequest(request); - break; - } - catch (...) - { - if (i + 1 == resolved_hosts.size()) - throw; - } - } + session->sendRequest(request); Poco::Net::HTTPResponse response; auto & response_body_stream = session->receiveResponse(response); diff --git a/src/Common/tests/gtest_remote_proxy_host_fetcher_impl.cpp b/src/Common/tests/gtest_remote_proxy_host_fetcher_impl.cpp new file mode 100644 index 00000000000..18751768ddc --- /dev/null +++ b/src/Common/tests/gtest_remote_proxy_host_fetcher_impl.cpp @@ -0,0 +1,3 @@ +// +// Created by arthur on 14/05/24. +// From 0fe989055284cbf4531aaa67faa92845acc6743a Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 15 May 2024 15:09:09 +0200 Subject: [PATCH 08/30] use httpbufferrw --- .../RemoteProxyConfigurationResolver.cpp | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/Common/RemoteProxyConfigurationResolver.cpp b/src/Common/RemoteProxyConfigurationResolver.cpp index 89c7e6ebd65..fec82314aca 100644 --- a/src/Common/RemoteProxyConfigurationResolver.cpp +++ b/src/Common/RemoteProxyConfigurationResolver.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { @@ -21,19 +22,14 @@ std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const /// It should be just empty GET request. Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, endpoint.getPath(), Poco::Net::HTTPRequest::HTTP_1_1); - auto session = makeHTTPSession(HTTPConnectionGroupType::HTTP, endpoint, timeouts); - - session->sendRequest(request); - - Poco::Net::HTTPResponse response; - auto & response_body_stream = session->receiveResponse(response); - - if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Proxy resolver returned not OK status: {}", response.getReason()); + auto rw_http_buffer = BuilderRWBufferFromHTTP(endpoint) + .withConnectionGroup(HTTPConnectionGroupType::HTTP) + .withTimeouts(timeouts) + .create({}); String proxy_host; - /// Read proxy host as string from response body. - Poco::StreamCopier::copyToString(response_body_stream, proxy_host); + + readString(proxy_host, *rw_http_buffer); return proxy_host; } From dabd6dc3c676119123763ccb36b16b29b3aa7d71 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 15 May 2024 15:14:13 +0200 Subject: [PATCH 09/30] use readstringuntileof --- src/Common/RemoteProxyConfigurationResolver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/RemoteProxyConfigurationResolver.cpp b/src/Common/RemoteProxyConfigurationResolver.cpp index fec82314aca..88da56b29b1 100644 --- a/src/Common/RemoteProxyConfigurationResolver.cpp +++ b/src/Common/RemoteProxyConfigurationResolver.cpp @@ -29,7 +29,7 @@ std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const String proxy_host; - readString(proxy_host, *rw_http_buffer); + readStringUntilEOF(proxy_host, *rw_http_buffer); return proxy_host; } From f36f10dac16dbd691f625618edaa6e331cf48acc Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 15 May 2024 15:22:38 +0200 Subject: [PATCH 10/30] remove bad_Arguments --- src/Common/RemoteProxyConfigurationResolver.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Common/RemoteProxyConfigurationResolver.cpp b/src/Common/RemoteProxyConfigurationResolver.cpp index 88da56b29b1..350fe754da8 100644 --- a/src/Common/RemoteProxyConfigurationResolver.cpp +++ b/src/Common/RemoteProxyConfigurationResolver.cpp @@ -12,11 +12,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) const { /// It should be just empty GET request. From 2df6b19847b3cffa458d49c2a35e13a3051afc1c Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 16 May 2024 16:26:15 +0000 Subject: [PATCH 11/30] retry 1 --- src/Common/RemoteProxyConfigurationResolver.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Common/RemoteProxyConfigurationResolver.cpp b/src/Common/RemoteProxyConfigurationResolver.cpp index 350fe754da8..3f50c300447 100644 --- a/src/Common/RemoteProxyConfigurationResolver.cpp +++ b/src/Common/RemoteProxyConfigurationResolver.cpp @@ -14,12 +14,13 @@ namespace DB std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) const { - /// It should be just empty GET request. - Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, endpoint.getPath(), Poco::Net::HTTPRequest::HTTP_1_1); + auto rw_settings = ReadSettings {}; + rw_settings.http_max_tries = 1; auto rw_http_buffer = BuilderRWBufferFromHTTP(endpoint) .withConnectionGroup(HTTPConnectionGroupType::HTTP) .withTimeouts(timeouts) + .withSettings(rw_settings) .create({}); String proxy_host; From ac0ddc9605d5b541a5245cd7fddc21cc2f1a4f47 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 17 May 2024 06:29:11 +0000 Subject: [PATCH 12/30] create local variable for credentials --- src/Common/RemoteProxyConfigurationResolver.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Common/RemoteProxyConfigurationResolver.cpp b/src/Common/RemoteProxyConfigurationResolver.cpp index 3f50c300447..8fbe3b85ce9 100644 --- a/src/Common/RemoteProxyConfigurationResolver.cpp +++ b/src/Common/RemoteProxyConfigurationResolver.cpp @@ -16,12 +16,13 @@ std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const { auto rw_settings = ReadSettings {}; rw_settings.http_max_tries = 1; + auto credentials = Poco::Net::HTTPBasicCredentials {}; auto rw_http_buffer = BuilderRWBufferFromHTTP(endpoint) .withConnectionGroup(HTTPConnectionGroupType::HTTP) .withTimeouts(timeouts) .withSettings(rw_settings) - .create({}); + .create(credentials); String proxy_host; From ef4583bf0a41c62d105c53a305fffe32bfffb596 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 21 May 2024 09:14:24 -0300 Subject: [PATCH 13/30] use raw httpsession --- src/Common/RemoteProxyConfigurationResolver.cpp | 17 +++++++---------- src/Common/RemoteProxyConfigurationResolver.h | 4 ++-- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/Common/RemoteProxyConfigurationResolver.cpp b/src/Common/RemoteProxyConfigurationResolver.cpp index 8fbe3b85ce9..dfe9e3afd9e 100644 --- a/src/Common/RemoteProxyConfigurationResolver.cpp +++ b/src/Common/RemoteProxyConfigurationResolver.cpp @@ -14,19 +14,16 @@ namespace DB std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) const { - auto rw_settings = ReadSettings {}; - rw_settings.http_max_tries = 1; - auto credentials = Poco::Net::HTTPBasicCredentials {}; + auto request = Poco::Net::HTTPRequest(Poco::Net::HTTPRequest::HTTP_GET, endpoint.getPath(), Poco::Net::HTTPRequest::HTTP_1_1); + auto session = makeHTTPSession(HTTPConnectionGroupType::HTTP, endpoint, timeouts); - auto rw_http_buffer = BuilderRWBufferFromHTTP(endpoint) - .withConnectionGroup(HTTPConnectionGroupType::HTTP) - .withTimeouts(timeouts) - .withSettings(rw_settings) - .create(credentials); + session->sendRequest(request); - String proxy_host; + Poco::Net::HTTPResponse response; + auto & response_body_stream = session->receiveResponse(response); - readStringUntilEOF(proxy_host, *rw_http_buffer); + std::string proxy_host; + Poco::StreamCopier::copyToString(response_body_stream, proxy_host); return proxy_host; } diff --git a/src/Common/RemoteProxyConfigurationResolver.h b/src/Common/RemoteProxyConfigurationResolver.h index fe2fd56aea8..e8fc1cfed7b 100644 --- a/src/Common/RemoteProxyConfigurationResolver.h +++ b/src/Common/RemoteProxyConfigurationResolver.h @@ -15,12 +15,12 @@ struct ConnectionTimeouts; struct RemoteProxyHostFetcher { virtual ~RemoteProxyHostFetcher() = default; - virtual std::string fetch(const Poco::URI & uri, const ConnectionTimeouts & timeouts) const = 0; + virtual std::string fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) const = 0; }; struct RemoteProxyHostFetcherImpl : public RemoteProxyHostFetcher { - std::string fetch(const Poco::URI & uri, const ConnectionTimeouts & timeouts) const override; + std::string fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) const override; }; /* From 4a78a0e318d73a83d90188fe392c251e20fb947f Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 22 May 2024 17:08:50 +0000 Subject: [PATCH 14/30] remove try catch from remote resolver, propagate exception --- .../RemoteProxyConfigurationResolver.cpp | 38 ++++++++----------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/src/Common/RemoteProxyConfigurationResolver.cpp b/src/Common/RemoteProxyConfigurationResolver.cpp index dfe9e3afd9e..2b3223367f2 100644 --- a/src/Common/RemoteProxyConfigurationResolver.cpp +++ b/src/Common/RemoteProxyConfigurationResolver.cpp @@ -69,34 +69,26 @@ ProxyConfiguration RemoteProxyConfigurationResolver::resolve() .withSendTimeout(1) .withReceiveTimeout(1); - try - { - const auto proxy_host = fetcher->fetch(endpoint, timeouts); + const auto proxy_host = fetcher->fetch(endpoint, timeouts); - LOG_DEBUG(logger, "Use proxy: {}://{}:{}", proxy_protocol_string, proxy_host, proxy_port); + LOG_DEBUG(logger, "Use proxy: {}://{}:{}", proxy_protocol_string, proxy_host, proxy_port); - auto proxy_protocol = ProxyConfiguration::protocolFromString(proxy_protocol_string); + auto proxy_protocol = ProxyConfiguration::protocolFromString(proxy_protocol_string); - bool use_tunneling_for_https_requests_over_http_proxy = useTunneling( - request_protocol, - proxy_protocol, - disable_tunneling_for_https_requests_over_http_proxy); + bool use_tunneling_for_https_requests_over_http_proxy = useTunneling( + request_protocol, + proxy_protocol, + disable_tunneling_for_https_requests_over_http_proxy); - cached_config.protocol = proxy_protocol; - cached_config.host = proxy_host; - cached_config.port = proxy_port; - cached_config.tunneling = use_tunneling_for_https_requests_over_http_proxy; - cached_config.original_request_protocol = request_protocol; - cache_timestamp = std::chrono::system_clock::now(); - cache_valid = true; + cached_config.protocol = proxy_protocol; + cached_config.host = proxy_host; + cached_config.port = proxy_port; + cached_config.tunneling = use_tunneling_for_https_requests_over_http_proxy; + cached_config.original_request_protocol = request_protocol; + cache_timestamp = std::chrono::system_clock::now(); + cache_valid = true; - return cached_config; - } - catch (...) - { - tryLogCurrentException("RemoteProxyConfigurationResolver", "Failed to obtain proxy"); - return {}; - } + return cached_config; } void RemoteProxyConfigurationResolver::errorReport(const ProxyConfiguration & config) From 5f2cd1740b677369d2cd548b6fc76dd5c28e3f52 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 23 May 2024 11:17:58 +0000 Subject: [PATCH 15/30] increase time to wait for proxy resolver --- tests/integration/helpers/s3_url_proxy_tests_util.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/helpers/s3_url_proxy_tests_util.py b/tests/integration/helpers/s3_url_proxy_tests_util.py index 8228e9f54f7..6e3a28ee034 100644 --- a/tests/integration/helpers/s3_url_proxy_tests_util.py +++ b/tests/integration/helpers/s3_url_proxy_tests_util.py @@ -27,7 +27,7 @@ def check_proxy_logs(cluster, proxy_instance, protocol, bucket, requested_http_m def wait_resolver(cluster): - for i in range(10): + for i in range(15): response = cluster.exec_in_container( cluster.get_container_id("resolver"), [ @@ -40,8 +40,8 @@ def wait_resolver(cluster): if response == "proxy1" or response == "proxy2": return time.sleep(i) - else: - assert False, "Resolver is not up" + + assert False, "Resolver is not up" # Runs simple proxy resolver in python env container. From 5f3778fc1a16bf4ea1da49985d6d3eb422558400 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 23 May 2024 14:57:19 +0000 Subject: [PATCH 16/30] add back response check --- src/Common/RemoteProxyConfigurationResolver.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Common/RemoteProxyConfigurationResolver.cpp b/src/Common/RemoteProxyConfigurationResolver.cpp index 2b3223367f2..0020b9875bf 100644 --- a/src/Common/RemoteProxyConfigurationResolver.cpp +++ b/src/Common/RemoteProxyConfigurationResolver.cpp @@ -22,6 +22,9 @@ std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const Poco::Net::HTTPResponse response; auto & response_body_stream = session->receiveResponse(response); + if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Proxy resolver returned not OK status: {}", response.getReason()); + std::string proxy_host; Poco::StreamCopier::copyToString(response_body_stream, proxy_host); From 498e25129741037ef5c841ff0747490ce28ccded Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 23 May 2024 16:45:42 +0000 Subject: [PATCH 17/30] extern bad_arguments --- src/Common/RemoteProxyConfigurationResolver.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Common/RemoteProxyConfigurationResolver.cpp b/src/Common/RemoteProxyConfigurationResolver.cpp index 0020b9875bf..cc18078557f 100644 --- a/src/Common/RemoteProxyConfigurationResolver.cpp +++ b/src/Common/RemoteProxyConfigurationResolver.cpp @@ -12,6 +12,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) const { auto request = Poco::Net::HTTPRequest(Poco::Net::HTTPRequest::HTTP_GET, endpoint.getPath(), Poco::Net::HTTPRequest::HTTP_1_1); From a814f2445f0e012dd30c85b0684affb021db0259 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 24 May 2024 17:46:05 -0300 Subject: [PATCH 18/30] fix cache ttl --- src/Common/ProxyConfigurationResolverProvider.cpp | 2 +- src/Common/RemoteProxyConfigurationResolver.cpp | 4 ++-- src/Common/RemoteProxyConfigurationResolver.h | 3 +-- src/IO/HTTPCommon.cpp | 2 +- src/IO/HTTPCommon.h | 2 +- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Common/ProxyConfigurationResolverProvider.cpp b/src/Common/ProxyConfigurationResolverProvider.cpp index a362836e6e8..9aa337c5b30 100644 --- a/src/Common/ProxyConfigurationResolverProvider.cpp +++ b/src/Common/ProxyConfigurationResolverProvider.cpp @@ -43,7 +43,7 @@ namespace endpoint, proxy_scheme, proxy_port, - cache_ttl + std::chrono::seconds {cache_ttl} }; return std::make_shared( diff --git a/src/Common/RemoteProxyConfigurationResolver.cpp b/src/Common/RemoteProxyConfigurationResolver.cpp index cc18078557f..6c49940b64d 100644 --- a/src/Common/RemoteProxyConfigurationResolver.cpp +++ b/src/Common/RemoteProxyConfigurationResolver.cpp @@ -51,7 +51,7 @@ ProxyConfiguration RemoteProxyConfigurationResolver::resolve() { auto logger = getLogger("RemoteProxyConfigurationResolver"); - auto & [endpoint, proxy_protocol_string, proxy_port, cache_ttl_] = remote_server_configuration; + auto & [endpoint, proxy_protocol_string, proxy_port, cache_ttl] = remote_server_configuration; LOG_DEBUG(logger, "Obtain proxy using resolver: {}", endpoint.toString()); @@ -106,7 +106,7 @@ void RemoteProxyConfigurationResolver::errorReport(const ProxyConfiguration & co std::lock_guard lock(cache_mutex); - if (!cache_ttl.count() || !cache_valid) + if (!remote_server_configuration.cache_ttl_.count() || !cache_valid) return; if (std::tie(cached_config.protocol, cached_config.host, cached_config.port) diff --git a/src/Common/RemoteProxyConfigurationResolver.h b/src/Common/RemoteProxyConfigurationResolver.h index e8fc1cfed7b..38af9250110 100644 --- a/src/Common/RemoteProxyConfigurationResolver.h +++ b/src/Common/RemoteProxyConfigurationResolver.h @@ -35,7 +35,7 @@ public: Poco::URI endpoint; String proxy_protocol; unsigned proxy_port; - unsigned cache_ttl_; + const std::chrono::seconds cache_ttl_; }; RemoteProxyConfigurationResolver( @@ -55,7 +55,6 @@ private: std::mutex cache_mutex; bool cache_valid = false; std::chrono::time_point cache_timestamp; - const std::chrono::seconds cache_ttl{0}; ProxyConfiguration cached_config; }; diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 6e1c886b9b0..9704d034b2a 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -48,7 +48,7 @@ HTTPSessionPtr makeHTTPSession( HTTPConnectionGroupType group, const Poco::URI & uri, const ConnectionTimeouts & timeouts, - ProxyConfiguration proxy_configuration) + const ProxyConfiguration & proxy_configuration) { auto connection_pool = HTTPConnectionPools::instance().getPool(group, uri, proxy_configuration); return connection_pool->getConnection(timeouts); diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h index 63dffcf6878..3a1fa5bebee 100644 --- a/src/IO/HTTPCommon.h +++ b/src/IO/HTTPCommon.h @@ -61,7 +61,7 @@ HTTPSessionPtr makeHTTPSession( HTTPConnectionGroupType group, const Poco::URI & uri, const ConnectionTimeouts & timeouts, - ProxyConfiguration proxy_config = {} + const ProxyConfiguration & proxy_config = {} ); bool isRedirect(Poco::Net::HTTPResponse::HTTPStatus status); From 7a2dc83c8a92ea74224de1532b1e80a4e68adfbc Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 24 May 2024 18:04:05 -0300 Subject: [PATCH 19/30] add test to validate cache --- .../ProxyConfigurationResolverProvider.cpp | 2 +- .../RemoteProxyConfigurationResolver.cpp | 6 +-- src/Common/RemoteProxyConfigurationResolver.h | 8 +-- ...st_proxy_remote_configuration_resolver.cpp | 54 +++++++++++++++---- 4 files changed, 53 insertions(+), 17 deletions(-) diff --git a/src/Common/ProxyConfigurationResolverProvider.cpp b/src/Common/ProxyConfigurationResolverProvider.cpp index 9aa337c5b30..4008ac2d8a5 100644 --- a/src/Common/ProxyConfigurationResolverProvider.cpp +++ b/src/Common/ProxyConfigurationResolverProvider.cpp @@ -49,7 +49,7 @@ namespace return std::make_shared( server_configuration, request_protocol, - std::make_unique(), + std::make_shared(), isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(configuration)); } diff --git a/src/Common/RemoteProxyConfigurationResolver.cpp b/src/Common/RemoteProxyConfigurationResolver.cpp index 6c49940b64d..cb541b493ed 100644 --- a/src/Common/RemoteProxyConfigurationResolver.cpp +++ b/src/Common/RemoteProxyConfigurationResolver.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) const +std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) { auto request = Poco::Net::HTTPRequest(Poco::Net::HTTPRequest::HTTP_GET, endpoint.getPath(), Poco::Net::HTTPRequest::HTTP_1_1); auto session = makeHTTPSession(HTTPConnectionGroupType::HTTP, endpoint, timeouts); @@ -39,11 +39,11 @@ std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const RemoteProxyConfigurationResolver::RemoteProxyConfigurationResolver( const RemoteServerConfiguration & remote_server_configuration_, Protocol request_protocol_, - std::unique_ptr fetcher_, + std::shared_ptr fetcher_, bool disable_tunneling_for_https_requests_over_http_proxy_ ) : ProxyConfigurationResolver(request_protocol_, disable_tunneling_for_https_requests_over_http_proxy_), - remote_server_configuration(remote_server_configuration_), fetcher(std::move(fetcher_)) + remote_server_configuration(remote_server_configuration_), fetcher(fetcher_) { } diff --git a/src/Common/RemoteProxyConfigurationResolver.h b/src/Common/RemoteProxyConfigurationResolver.h index 38af9250110..4e61a185bb3 100644 --- a/src/Common/RemoteProxyConfigurationResolver.h +++ b/src/Common/RemoteProxyConfigurationResolver.h @@ -15,12 +15,12 @@ struct ConnectionTimeouts; struct RemoteProxyHostFetcher { virtual ~RemoteProxyHostFetcher() = default; - virtual std::string fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) const = 0; + virtual std::string fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) = 0; }; struct RemoteProxyHostFetcherImpl : public RemoteProxyHostFetcher { - std::string fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) const override; + std::string fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) override; }; /* @@ -41,7 +41,7 @@ public: RemoteProxyConfigurationResolver( const RemoteServerConfiguration & remote_server_configuration_, Protocol request_protocol_, - std::unique_ptr fetcher_, + std::shared_ptr fetcher_, bool disable_tunneling_for_https_requests_over_http_proxy_ = false); ProxyConfiguration resolve() override; @@ -50,7 +50,7 @@ public: private: RemoteServerConfiguration remote_server_configuration; - std::unique_ptr fetcher; + std::shared_ptr fetcher; std::mutex cache_mutex; bool cache_valid = false; diff --git a/src/Common/tests/gtest_proxy_remote_configuration_resolver.cpp b/src/Common/tests/gtest_proxy_remote_configuration_resolver.cpp index bc9ad5c7205..7068e0f2061 100644 --- a/src/Common/tests/gtest_proxy_remote_configuration_resolver.cpp +++ b/src/Common/tests/gtest_proxy_remote_configuration_resolver.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace { @@ -11,12 +12,14 @@ struct RemoteProxyHostFetcherMock : public DB::RemoteProxyHostFetcher { explicit RemoteProxyHostFetcherMock(const std::string & return_mock_) : return_mock(return_mock_) {} - std::string fetch(const Poco::URI &, const DB::ConnectionTimeouts &) const override + std::string fetch(const Poco::URI &, const DB::ConnectionTimeouts &) override { + fetch_count++; return return_mock; } std::string return_mock; + std::size_t fetch_count {0}; }; } @@ -33,13 +36,13 @@ TEST(RemoteProxyConfigurationResolver, HTTPOverHTTP) Poco::URI("not_important"), "http", 80, - 10 + std::chrono::seconds {10} }; RemoteProxyConfigurationResolver resolver( remote_server_configuration, ProxyConfiguration::Protocol::HTTP, - std::make_unique(proxy_server_mock) + std::make_shared(proxy_server_mock) ); auto configuration = resolver.resolve(); @@ -59,13 +62,13 @@ TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTPS) Poco::URI("not_important"), "https", 443, - 10 + std::chrono::seconds {10} }; RemoteProxyConfigurationResolver resolver( remote_server_configuration, ProxyConfiguration::Protocol::HTTPS, - std::make_unique(proxy_server_mock) + std::make_shared(proxy_server_mock) ); auto configuration = resolver.resolve(); @@ -86,13 +89,13 @@ TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTP) Poco::URI("not_important"), "http", 80, - 10 + std::chrono::seconds {10} }; RemoteProxyConfigurationResolver resolver( remote_server_configuration, ProxyConfiguration::Protocol::HTTPS, - std::make_unique(proxy_server_mock) + std::make_shared(proxy_server_mock) ); auto configuration = resolver.resolve(); @@ -113,13 +116,13 @@ TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTPNoTunneling) Poco::URI("not_important"), "http", 80, - 10 + std::chrono::seconds {10} }; RemoteProxyConfigurationResolver resolver( remote_server_configuration, ProxyConfiguration::Protocol::HTTPS, - std::make_unique(proxy_server_mock), + std::make_shared(proxy_server_mock), true /* disable_tunneling_for_https_requests_over_http_proxy_ */ ); @@ -133,4 +136,37 @@ TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTPNoTunneling) ASSERT_EQ(configuration.tunneling, false); } +TEST(RemoteProxyConfigurationResolver, SimpleCacheTest) +{ + const char * proxy_server_mock = "proxy1"; + auto cache_ttl = 5u; + auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration + { + Poco::URI("not_important"), + "http", + 80, + std::chrono::seconds {cache_ttl} + }; + + auto fetcher_mock = std::make_shared(proxy_server_mock); + + RemoteProxyConfigurationResolver resolver( + remote_server_configuration, + ProxyConfiguration::Protocol::HTTP, + fetcher_mock + ); + + resolver.resolve(); + resolver.resolve(); + resolver.resolve(); + + ASSERT_EQ(fetcher_mock->fetch_count, 1u); + + sleepForSeconds(cache_ttl * 2); + + resolver.resolve(); + + ASSERT_EQ(fetcher_mock->fetch_count, 2); +} + } From d9c9c4f7ddb975994515f96d17c4561bc528888a Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 28 May 2024 13:48:46 -0300 Subject: [PATCH 20/30] use skip_access_check --- src/Common/RemoteProxyConfigurationResolver.cpp | 9 +++++++-- .../configs/config.d/storage_conf.xml | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Common/RemoteProxyConfigurationResolver.cpp b/src/Common/RemoteProxyConfigurationResolver.cpp index cb541b493ed..75120894123 100644 --- a/src/Common/RemoteProxyConfigurationResolver.cpp +++ b/src/Common/RemoteProxyConfigurationResolver.cpp @@ -14,7 +14,7 @@ namespace DB namespace ErrorCodes { - extern const int BAD_ARGUMENTS; + extern const int RECEIVED_ERROR_FROM_REMOTE_IO_SERVER; } std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) @@ -28,7 +28,12 @@ std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const auto & response_body_stream = session->receiveResponse(response); if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Proxy resolver returned not OK status: {}", response.getReason()); + throw HTTPException( + ErrorCodes::RECEIVED_ERROR_FROM_REMOTE_IO_SERVER, + endpoint.toString(), + response.getStatus(), + response.getReason(), + ""); std::string proxy_host; Poco::StreamCopier::copyToString(response_body_stream, proxy_host); diff --git a/tests/integration/test_s3_storage_conf_proxy/configs/config.d/storage_conf.xml b/tests/integration/test_s3_storage_conf_proxy/configs/config.d/storage_conf.xml index 39aea7c5507..cef637211d6 100644 --- a/tests/integration/test_s3_storage_conf_proxy/configs/config.d/storage_conf.xml +++ b/tests/integration/test_s3_storage_conf_proxy/configs/config.d/storage_conf.xml @@ -15,6 +15,7 @@ http://minio1:9001/root/data/s3_with_resolver minio minio123 + true