From 894513f6cd1f8439235dff6fdef5ce5e4565544f Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 23 Aug 2023 18:43:08 +0000 Subject: [PATCH] Fix tests --- src/IO/ReadWriteBufferFromHTTP.cpp | 13 +++++- src/IO/ReadWriteBufferFromHTTP.h | 2 +- src/Storages/HDFS/StorageHDFS.cpp | 7 +-- src/Storages/StorageAzureBlob.cpp | 2 +- src/Storages/StorageURL.cpp | 43 ++++++++----------- src/Storages/StorageURL.h | 2 +- .../test_storage_azure_blob_storage/test.py | 8 ++-- tests/integration/test_storage_hdfs/test.py | 2 +- 8 files changed, 41 insertions(+), 38 deletions(-) diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 66998a488cb..08d5b1875a3 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -784,10 +784,19 @@ template const std::string & ReadWriteBufferFromHTTPBase::getCompressionMethod() const { return content_encoding; } template -std::optional ReadWriteBufferFromHTTPBase::getLastModificationTime() +std::optional ReadWriteBufferFromHTTPBase::tryGetLastModificationTime() { if (!file_info) - file_info = getFileInfo(); + { + try + { + file_info = getFileInfo(); + } + catch (...) + { + return std::nullopt; + } + } return file_info->last_modified; } diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 2d2ae5fe724..2e321f84bcd 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -201,7 +201,7 @@ namespace detail const std::string & getCompressionMethod() const; - std::optional getLastModificationTime(); + std::optional tryGetLastModificationTime(); HTTPFileInfo getFileInfo(); diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 4bea4a4bb5e..8e1924b2248 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -208,7 +208,7 @@ namespace throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null"); for (int i = 0; i < ls.length; ++i) { - const String full_path = String(ls.file_info[i].mName); + const String full_path = fs::path(ls.file_info[i].mName).lexically_normal(); const size_t last_slash = full_path.rfind('/'); const String file_name = full_path.substr(last_slash); const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos; @@ -218,7 +218,7 @@ namespace { if (re2::RE2::FullMatch(file_name, matcher)) result.push_back(StorageHDFS::PathWithInfo{ - String(file_name), + String(full_path), StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}}); } else if (is_directory && looking_for_directory) @@ -253,7 +253,8 @@ namespace HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); HDFSFSPtr fs = createHDFSFS(builder.get()); - return LSWithRegexpMatching("/", fs, path_from_uri); + auto res = LSWithRegexpMatching("/", fs, path_from_uri); + return res; } } diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 3b7e5fd7236..0c2bae142f2 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -992,7 +992,7 @@ Chunk StorageAzureBlobSource::generate() if (const auto * input_format = reader.getInputFormat()) chunk_size = input_format->getApproxBytesReadForChunk(); progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - VirtualColumnUtils::addRequestedPathAndFileVirtualsToChunk(chunk, requested_virtual_columns, reader.getPath()); + VirtualColumnUtils::addRequestedPathAndFileVirtualsToChunk(chunk, requested_virtual_columns, reader.getRelativePath()); return chunk; } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 23c8785ddeb..d27fef53dd8 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -292,7 +292,7 @@ StorageURLSource::StorageURLSource( while (getContext()->getSettingsRef().engine_url_skip_empty_files && uri_and_buf.second->eof()); curr_uri = uri_and_buf.first; - auto last_mod_time = uri_and_buf.second->getLastModificationTime(); + auto last_mod_time = uri_and_buf.second->tryGetLastModificationTime(); read_buf = std::move(uri_and_buf.second); if (auto file_progress_callback = getContext()->getFileProgressCallback()) @@ -998,7 +998,7 @@ std::optional IStorageURLBase::tryGetColumnsFromCache( { auto get_last_mod_time = [&]() -> std::optional { - auto last_mod_time = getLastModificationTime(url, headers, credentials, context); + auto last_mod_time = tryGetLastModificationTime(url, headers, credentials, context); /// Some URLs could not have Last-Modified header, in this case we cannot be sure that /// data wasn't changed after adding it's schema to cache. Use schema from cache only if /// special setting for this case is enabled. @@ -1028,7 +1028,7 @@ void IStorageURLBase::addColumnsToCache( schema_cache.addManyColumns(cache_keys, columns); } -std::optional IStorageURLBase::getLastModificationTime( +std::optional IStorageURLBase::tryGetLastModificationTime( const String & url, const HTTPHeaderEntries & headers, const Poco::Net::HTTPBasicCredentials & credentials, @@ -1036,29 +1036,22 @@ std::optional IStorageURLBase::getLastModificationTime( { auto settings = context->getSettingsRef(); - try - { - ReadWriteBufferFromHTTP buf( - Poco::URI(url), - Poco::Net::HTTPRequest::HTTP_GET, - {}, - getHTTPTimeouts(context), - credentials, - settings.max_http_get_redirects, - settings.max_read_buffer_size, - context->getReadSettings(), - headers, - &context->getRemoteHostFilter(), - true, - false, - false); + ReadWriteBufferFromHTTP buf( + Poco::URI(url), + Poco::Net::HTTPRequest::HTTP_GET, + {}, + getHTTPTimeouts(context), + credentials, + settings.max_http_get_redirects, + settings.max_read_buffer_size, + context->getReadSettings(), + headers, + &context->getRemoteHostFilter(), + true, + false, + false); - return buf.getLastModificationTime(); - } - catch (...) - { - return std::nullopt; - } + return buf.tryGetLastModificationTime(); } StorageURL::StorageURL( diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 1f3258d8b0c..504b0d5de40 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -59,7 +59,7 @@ public: static SchemaCache & getSchemaCache(const ContextPtr & context); - static std::optional getLastModificationTime( + static std::optional tryGetLastModificationTime( const String & url, const HTTPHeaderEntries & headers, const Poco::Net::HTTPBasicCredentials & credentials, diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index ccfe462d27f..b3429ba49b3 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -44,10 +44,10 @@ def azure_query(node, query, expect_error="false", try_num=10, settings={}): return node.query(query, settings=settings) except Exception as ex: retriable_errors = [ - "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", - "DB::Exception: Azure::Core::Http::TransportException: Connection closed before getting full response or response is less than expected", - "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", - "DB::Exception: Azure::Core::Http::TransportException: Error while polling for socket ready read", + "Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", + "Azure::Core::Http::TransportException: Connection closed before getting full response or response is less than expected", + "Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", + "Azure::Core::Http::TransportException: Error while polling for socket ready read", ] retry = False for error in retriable_errors: diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 060440f9083..9af75cf37bd 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -318,7 +318,7 @@ def test_virtual_columns(started_cluster): hdfs_api.write_data("/file1", "1\n") hdfs_api.write_data("/file2", "2\n") hdfs_api.write_data("/file3", "3\n") - expected = "1\tfile1\thdfs://hdfs1:9000//file1\n2\tfile2\thdfs://hdfs1:9000//file2\n3\tfile3\thdfs://hdfs1:9000//file3\n" + expected = "1\tfile1\thdfs://hdfs1:9000/file1\n2\tfile2\thdfs://hdfs1:9000/file2\n3\tfile3\thdfs://hdfs1:9000/file3\n" assert ( node1.query( "select id, _file as file_name, _path as file_path from virtual_cols order by id"