Fix tests

This commit is contained in:
avogar 2023-08-23 18:43:08 +00:00
parent 592fa77987
commit 894513f6cd
8 changed files with 41 additions and 38 deletions

View File

@ -784,10 +784,19 @@ template <typename UpdatableSessionPtr>
const std::string & ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::getCompressionMethod() const { return content_encoding; } const std::string & ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::getCompressionMethod() const { return content_encoding; }
template <typename UpdatableSessionPtr> template <typename UpdatableSessionPtr>
std::optional<time_t> ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::getLastModificationTime() std::optional<time_t> ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::tryGetLastModificationTime()
{ {
if (!file_info) if (!file_info)
file_info = getFileInfo(); {
try
{
file_info = getFileInfo();
}
catch (...)
{
return std::nullopt;
}
}
return file_info->last_modified; return file_info->last_modified;
} }

View File

@ -201,7 +201,7 @@ namespace detail
const std::string & getCompressionMethod() const; const std::string & getCompressionMethod() const;
std::optional<time_t> getLastModificationTime(); std::optional<time_t> tryGetLastModificationTime();
HTTPFileInfo getFileInfo(); HTTPFileInfo getFileInfo();

View File

@ -208,7 +208,7 @@ namespace
throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null"); throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null");
for (int i = 0; i < ls.length; ++i) for (int i = 0; i < ls.length; ++i)
{ {
const String full_path = String(ls.file_info[i].mName); const String full_path = fs::path(ls.file_info[i].mName).lexically_normal();
const size_t last_slash = full_path.rfind('/'); const size_t last_slash = full_path.rfind('/');
const String file_name = full_path.substr(last_slash); const String file_name = full_path.substr(last_slash);
const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos; const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos;
@ -218,7 +218,7 @@ namespace
{ {
if (re2::RE2::FullMatch(file_name, matcher)) if (re2::RE2::FullMatch(file_name, matcher))
result.push_back(StorageHDFS::PathWithInfo{ result.push_back(StorageHDFS::PathWithInfo{
String(file_name), String(full_path),
StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast<size_t>(ls.file_info[i].mSize)}}); StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast<size_t>(ls.file_info[i].mSize)}});
} }
else if (is_directory && looking_for_directory) else if (is_directory && looking_for_directory)
@ -253,7 +253,8 @@ namespace
HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef());
HDFSFSPtr fs = createHDFSFS(builder.get()); HDFSFSPtr fs = createHDFSFS(builder.get());
return LSWithRegexpMatching("/", fs, path_from_uri); auto res = LSWithRegexpMatching("/", fs, path_from_uri);
return res;
} }
} }

View File

@ -992,7 +992,7 @@ Chunk StorageAzureBlobSource::generate()
if (const auto * input_format = reader.getInputFormat()) if (const auto * input_format = reader.getInputFormat())
chunk_size = input_format->getApproxBytesReadForChunk(); chunk_size = input_format->getApproxBytesReadForChunk();
progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
VirtualColumnUtils::addRequestedPathAndFileVirtualsToChunk(chunk, requested_virtual_columns, reader.getPath()); VirtualColumnUtils::addRequestedPathAndFileVirtualsToChunk(chunk, requested_virtual_columns, reader.getRelativePath());
return chunk; return chunk;
} }

View File

@ -292,7 +292,7 @@ StorageURLSource::StorageURLSource(
while (getContext()->getSettingsRef().engine_url_skip_empty_files && uri_and_buf.second->eof()); while (getContext()->getSettingsRef().engine_url_skip_empty_files && uri_and_buf.second->eof());
curr_uri = uri_and_buf.first; curr_uri = uri_and_buf.first;
auto last_mod_time = uri_and_buf.second->getLastModificationTime(); auto last_mod_time = uri_and_buf.second->tryGetLastModificationTime();
read_buf = std::move(uri_and_buf.second); read_buf = std::move(uri_and_buf.second);
if (auto file_progress_callback = getContext()->getFileProgressCallback()) if (auto file_progress_callback = getContext()->getFileProgressCallback())
@ -998,7 +998,7 @@ std::optional<ColumnsDescription> IStorageURLBase::tryGetColumnsFromCache(
{ {
auto get_last_mod_time = [&]() -> std::optional<time_t> auto get_last_mod_time = [&]() -> std::optional<time_t>
{ {
auto last_mod_time = getLastModificationTime(url, headers, credentials, context); auto last_mod_time = tryGetLastModificationTime(url, headers, credentials, context);
/// Some URLs could not have Last-Modified header, in this case we cannot be sure that /// Some URLs could not have Last-Modified header, in this case we cannot be sure that
/// data wasn't changed after adding it's schema to cache. Use schema from cache only if /// data wasn't changed after adding it's schema to cache. Use schema from cache only if
/// special setting for this case is enabled. /// special setting for this case is enabled.
@ -1028,7 +1028,7 @@ void IStorageURLBase::addColumnsToCache(
schema_cache.addManyColumns(cache_keys, columns); schema_cache.addManyColumns(cache_keys, columns);
} }
std::optional<time_t> IStorageURLBase::getLastModificationTime( std::optional<time_t> IStorageURLBase::tryGetLastModificationTime(
const String & url, const String & url,
const HTTPHeaderEntries & headers, const HTTPHeaderEntries & headers,
const Poco::Net::HTTPBasicCredentials & credentials, const Poco::Net::HTTPBasicCredentials & credentials,
@ -1036,29 +1036,22 @@ std::optional<time_t> IStorageURLBase::getLastModificationTime(
{ {
auto settings = context->getSettingsRef(); auto settings = context->getSettingsRef();
try ReadWriteBufferFromHTTP buf(
{ Poco::URI(url),
ReadWriteBufferFromHTTP buf( Poco::Net::HTTPRequest::HTTP_GET,
Poco::URI(url), {},
Poco::Net::HTTPRequest::HTTP_GET, getHTTPTimeouts(context),
{}, credentials,
getHTTPTimeouts(context), settings.max_http_get_redirects,
credentials, settings.max_read_buffer_size,
settings.max_http_get_redirects, context->getReadSettings(),
settings.max_read_buffer_size, headers,
context->getReadSettings(), &context->getRemoteHostFilter(),
headers, true,
&context->getRemoteHostFilter(), false,
true, false);
false,
false);
return buf.getLastModificationTime(); return buf.tryGetLastModificationTime();
}
catch (...)
{
return std::nullopt;
}
} }
StorageURL::StorageURL( StorageURL::StorageURL(

View File

@ -59,7 +59,7 @@ public:
static SchemaCache & getSchemaCache(const ContextPtr & context); static SchemaCache & getSchemaCache(const ContextPtr & context);
static std::optional<time_t> getLastModificationTime( static std::optional<time_t> tryGetLastModificationTime(
const String & url, const String & url,
const HTTPHeaderEntries & headers, const HTTPHeaderEntries & headers,
const Poco::Net::HTTPBasicCredentials & credentials, const Poco::Net::HTTPBasicCredentials & credentials,

View File

@ -44,10 +44,10 @@ def azure_query(node, query, expect_error="false", try_num=10, settings={}):
return node.query(query, settings=settings) return node.query(query, settings=settings)
except Exception as ex: except Exception as ex:
retriable_errors = [ retriable_errors = [
"DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", "Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response",
"DB::Exception: Azure::Core::Http::TransportException: Connection closed before getting full response or response is less than expected", "Azure::Core::Http::TransportException: Connection closed before getting full response or response is less than expected",
"DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", "Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response",
"DB::Exception: Azure::Core::Http::TransportException: Error while polling for socket ready read", "Azure::Core::Http::TransportException: Error while polling for socket ready read",
] ]
retry = False retry = False
for error in retriable_errors: for error in retriable_errors:

View File

@ -318,7 +318,7 @@ def test_virtual_columns(started_cluster):
hdfs_api.write_data("/file1", "1\n") hdfs_api.write_data("/file1", "1\n")
hdfs_api.write_data("/file2", "2\n") hdfs_api.write_data("/file2", "2\n")
hdfs_api.write_data("/file3", "3\n") hdfs_api.write_data("/file3", "3\n")
expected = "1\tfile1\thdfs://hdfs1:9000//file1\n2\tfile2\thdfs://hdfs1:9000//file2\n3\tfile3\thdfs://hdfs1:9000//file3\n" expected = "1\tfile1\thdfs://hdfs1:9000/file1\n2\tfile2\thdfs://hdfs1:9000/file2\n3\tfile3\thdfs://hdfs1:9000/file3\n"
assert ( assert (
node1.query( node1.query(
"select id, _file as file_name, _path as file_path from virtual_cols order by id" "select id, _file as file_name, _path as file_path from virtual_cols order by id"