From 766130bc98c116d198343f8fee6e0e5527fad712 Mon Sep 17 00:00:00 2001 From: skyoct Date: Tue, 18 Jun 2024 19:16:32 +0800 Subject: [PATCH] feat: add etag for object storage --- .../AzureBlobStorage/AzureObjectStorage.cpp | 2 ++ src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp | 2 +- src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp | 1 + src/Disks/ObjectStorages/IObjectStorage.h | 1 + src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 4 +++- src/IO/S3/getObjectInfo.cpp | 2 ++ src/IO/S3/getObjectInfo.h | 1 + .../ObjectStorage/StorageObjectStorageSource.cpp | 3 ++- src/Storages/VirtualColumnUtils.cpp | 10 +++++++++- src/Storages/VirtualColumnUtils.h | 2 +- 10 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index e7ecf7cd515..e4b85b79ab4 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -86,6 +86,7 @@ private: Poco::Timestamp::fromEpochTime( std::chrono::duration_cast( static_cast(blob.Details.LastModified).time_since_epoch()).count()), + blob.Details.ETag.ToString(), {}})); } @@ -186,6 +187,7 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith Poco::Timestamp::fromEpochTime( std::chrono::duration_cast( static_cast(blob.Details.LastModified).time_since_epoch()).count()), + blob.Details.ETag.ToString(), {}})); } diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp index 44854633d65..e9114c75077 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp @@ -205,7 +205,7 @@ void DiskObjectStorageMetadata::addObject(ObjectStorageKey key, size_t size) } total_size += size; - keys_with_meta.emplace_back(std::move(key), ObjectMetadata{size, {}, {}}); + keys_with_meta.emplace_back(std::move(key), ObjectMetadata{size, {}, {}, {}}); } ObjectKeyWithMetadata DiskObjectStorageMetadata::popLastObject() diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index dcb2af9d4d3..a28f1888020 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -221,6 +221,7 @@ void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithM ObjectMetadata{ static_cast(ls.file_info[i].mSize), Poco::Timestamp::fromEpochTime(ls.file_info[i].mLastMod), + "", {}})); } diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 7bc9e4073db..c1402522c5f 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -54,6 +54,7 @@ struct ObjectMetadata { uint64_t size_bytes = 0; Poco::Timestamp last_modified; + std::string etag; ObjectAttributes attributes; }; diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 63e11dcd8c8..7b2f71a828e 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -145,7 +145,7 @@ private: auto objects = outcome.GetResult().GetContents(); for (const auto & object : objects) { - ObjectMetadata metadata{static_cast(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), {}}; + ObjectMetadata metadata{static_cast(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), object.GetETag(), {}}; batch.emplace_back(std::make_shared(object.GetKey(), std::move(metadata))); } @@ -329,6 +329,7 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet ObjectMetadata{ static_cast(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), + object.GetETag(), {}})); if (max_keys) @@ -476,6 +477,7 @@ ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) cons ObjectMetadata result; result.size_bytes = object_info.size; result.last_modified = Poco::Timestamp::fromEpochTime(object_info.last_modification_time); + result.etag = object_info.etag; result.attributes = object_info.metadata; return result; diff --git a/src/IO/S3/getObjectInfo.cpp b/src/IO/S3/getObjectInfo.cpp index 9271ad820e4..afa4079c261 100644 --- a/src/IO/S3/getObjectInfo.cpp +++ b/src/IO/S3/getObjectInfo.cpp @@ -54,6 +54,8 @@ namespace ObjectInfo object_info; object_info.size = static_cast(result.GetContentLength()); object_info.last_modification_time = result.GetLastModified().Seconds(); + String etag(result.GetETag.c_str(), result.GetETag().size()); + object_info.etag = etag; if (with_metadata) object_info.metadata = result.GetMetadata(); diff --git a/src/IO/S3/getObjectInfo.h b/src/IO/S3/getObjectInfo.h index 32f34f74069..2fec407f70e 100644 --- a/src/IO/S3/getObjectInfo.h +++ b/src/IO/S3/getObjectInfo.h @@ -15,6 +15,7 @@ struct ObjectInfo { size_t size = 0; time_t last_modification_time = 0; + String etag = ""; std::map metadata = {}; /// Set only if getObjectInfo() is called with `with_metadata = true`. }; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 2fc6993369d..8554fd9235d 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -201,7 +201,8 @@ Chunk StorageObjectStorageSource::generate() .path = getUniqueStoragePathIdentifier(*configuration, reader.getObjectInfo(), false), .size = object_info.metadata->size_bytes, .filename = &filename, - .last_modified = object_info.metadata->last_modified + .last_modified = object_info.metadata->last_modified, + .etag = &(object_info.metadata->etag) }); return chunk; } diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 778c9e13adb..960fff371a7 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -112,7 +112,7 @@ void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context) NameSet getVirtualNamesForFileLikeStorage() { - return {"_path", "_file", "_size", "_time"}; + return {"_path", "_file", "_size", "_time", "_etag", "_last_modified"}; } VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns) @@ -131,6 +131,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription add_virtual("_file", std::make_shared(std::make_shared())); add_virtual("_size", makeNullable(std::make_shared())); add_virtual("_time", makeNullable(std::make_shared())); + add_virtual("_etag", std::make_shared(std::make_shared())); return desc; } @@ -226,6 +227,13 @@ void addRequestedFileLikeStorageVirtualsToChunk( else chunk.addColumn(virtual_column.type->createColumnConstWithDefaultValue(chunk.getNumRows())->convertToFullColumnIfConst()); } + else if (virtual_column.name == "_etag") + { + if (virtual_values.etag) + chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), (*virtual_values.etag))->convertToFullColumnIfConst()); + else + chunk.addColumn(virtual_column.type->createColumnConstWithDefaultValue(chunk.getNumRows())->convertToFullColumnIfConst()); + } } } diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index fbfbdd6c6cc..dc178277556 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -74,7 +74,7 @@ struct VirtualsForFileLikeStorage std::optional size { std::nullopt }; const String * filename { nullptr }; std::optional last_modified { std::nullopt }; - + const String * etag { nullptr }; }; void addRequestedFileLikeStorageVirtualsToChunk(