diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 93f4a187656..d664c37bd0f 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -146,6 +146,7 @@ Code: 48. DB::Exception: Received from localhost:9000. DB::Exception: Reading fr - `_file` — Name of the file. Type: `LowCardinalty(String)`. - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. +- `_etag` — ETag of the file. Type: `LowCardinalty(String)`. If the etag is unknown, the value is `NULL`. For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns). diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 0d92561d142..f85b5f45b37 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -86,6 +86,7 @@ private: Poco::Timestamp::fromEpochTime( std::chrono::duration_cast( static_cast(blob.Details.LastModified).time_since_epoch()).count()), + blob.Details.ETag.ToString(), {}})); } @@ -186,6 +187,7 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith Poco::Timestamp::fromEpochTime( std::chrono::duration_cast( static_cast(blob.Details.LastModified).time_since_epoch()).count()), + blob.Details.ETag.ToString(), {}})); } diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp index 56d5d11ef8a..233b13fb908 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp @@ -205,7 +205,7 @@ void DiskObjectStorageMetadata::addObject(ObjectStorageKey key, size_t size) } total_size += size; - keys_with_meta.emplace_back(std::move(key), ObjectMetadata{size, {}, {}}); + keys_with_meta.emplace_back(std::move(key), ObjectMetadata{size, {}, {}, {}}); } ObjectKeyWithMetadata DiskObjectStorageMetadata::popLastObject() diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index 00ef4b63e6f..512cc34ef44 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -222,6 +222,7 @@ void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithM ObjectMetadata{ static_cast(ls.file_info[i].mSize), Poco::Timestamp::fromEpochTime(ls.file_info[i].mLastMod), + "", {}})); } diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 529c79790fd..f3c587a1188 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -54,6 +54,7 @@ struct ObjectMetadata { uint64_t size_bytes = 0; Poco::Timestamp last_modified; + std::string etag; ObjectAttributes attributes; }; diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 3c4b4d76bf5..433a0e96d2e 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -146,7 +146,7 @@ private: auto objects = outcome.GetResult().GetContents(); for (const auto & object : objects) { - ObjectMetadata metadata{static_cast(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), {}}; + ObjectMetadata metadata{static_cast(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), object.GetETag(), {}}; batch.emplace_back(std::make_shared(object.GetKey(), std::move(metadata))); } @@ -332,6 +332,7 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet ObjectMetadata{ static_cast(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), + object.GetETag(), {}})); if (max_keys) @@ -479,6 +480,7 @@ ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) cons ObjectMetadata result; result.size_bytes = object_info.size; result.last_modified = Poco::Timestamp::fromEpochTime(object_info.last_modification_time); + result.etag = object_info.etag; result.attributes = object_info.metadata; return result; diff --git a/src/IO/S3/getObjectInfo.cpp b/src/IO/S3/getObjectInfo.cpp index 9271ad820e4..a21fb9fce54 100644 --- a/src/IO/S3/getObjectInfo.cpp +++ b/src/IO/S3/getObjectInfo.cpp @@ -54,6 +54,7 @@ namespace ObjectInfo object_info; object_info.size = static_cast(result.GetContentLength()); object_info.last_modification_time = result.GetLastModified().Seconds(); + object_info.etag = result.GetETag(); if (with_metadata) object_info.metadata = result.GetMetadata(); diff --git a/src/IO/S3/getObjectInfo.h b/src/IO/S3/getObjectInfo.h index 32f34f74069..30d4c627d37 100644 --- a/src/IO/S3/getObjectInfo.h +++ b/src/IO/S3/getObjectInfo.h @@ -15,6 +15,7 @@ struct ObjectInfo { size_t size = 0; time_t last_modification_time = 0; + String etag; std::map metadata = {}; /// Set only if getObjectInfo() is called with `with_metadata = true`. }; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index e760098f10f..320799c7166 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -204,7 +204,9 @@ Chunk StorageObjectStorageSource::generate() {.path = getUniqueStoragePathIdentifier(*configuration, *object_info, false), .size = object_info->isArchive() ? object_info->fileSizeInArchive() : object_info->metadata->size_bytes, .filename = &filename, - .last_modified = object_info->metadata->last_modified}); + .last_modified = object_info->metadata->last_modified, + .etag = &(object_info->metadata->etag) + }); const auto & partition_columns = configuration->getPartitionColumns(); if (!partition_columns.empty() && chunk_size && chunk.hasColumns()) diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index ba1f4488005..19694830c4e 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -116,7 +116,7 @@ void filterBlockWithExpression(const ExpressionActionsPtr & actions, Block & blo NameSet getVirtualNamesForFileLikeStorage() { - return {"_path", "_file", "_size", "_time"}; + return {"_path", "_file", "_size", "_time", "_etag"}; } VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns) @@ -135,6 +135,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription add_virtual("_file", std::make_shared(std::make_shared())); add_virtual("_size", makeNullable(std::make_shared())); add_virtual("_time", makeNullable(std::make_shared())); + add_virtual("_etag", std::make_shared(std::make_shared())); return desc; } @@ -230,6 +231,13 @@ void addRequestedFileLikeStorageVirtualsToChunk( else chunk.addColumn(virtual_column.type->createColumnConstWithDefaultValue(chunk.getNumRows())->convertToFullColumnIfConst()); } + else if (virtual_column.name == "_etag") + { + if (virtual_values.etag) + chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), (*virtual_values.etag))->convertToFullColumnIfConst()); + else + chunk.addColumn(virtual_column.type->createColumnConstWithDefaultValue(chunk.getNumRows())->convertToFullColumnIfConst()); + } } } diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index d75dc70ae44..1ed369300ff 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -83,7 +83,7 @@ struct VirtualsForFileLikeStorage std::optional size { std::nullopt }; const String * filename { nullptr }; std::optional last_modified { std::nullopt }; - + const String * etag { nullptr }; }; void addRequestedFileLikeStorageVirtualsToChunk( diff --git a/tests/queries/0_stateless/02245_s3_virtual_columns.reference b/tests/queries/0_stateless/02245_s3_virtual_columns.reference index 09383c51888..3822f6ffa0f 100644 --- a/tests/queries/0_stateless/02245_s3_virtual_columns.reference +++ b/tests/queries/0_stateless/02245_s3_virtual_columns.reference @@ -11,5 +11,5 @@ create table test_02245_2 (a UInt64, _path Int32) engine = S3(s3_conn, filename= insert into test_02245_2 select 1, 2 settings s3_truncate_on_insert=1; select * from test_02245_2; 1 2 -select _path from test_02245_2; -2 +select _path, isNotNull(_etag) from test_02245_2; +2 1 diff --git a/tests/queries/0_stateless/02245_s3_virtual_columns.sql b/tests/queries/0_stateless/02245_s3_virtual_columns.sql index e86344d2094..a66b212e5c7 100644 --- a/tests/queries/0_stateless/02245_s3_virtual_columns.sql +++ b/tests/queries/0_stateless/02245_s3_virtual_columns.sql @@ -12,4 +12,4 @@ drop table if exists test_02245_2; create table test_02245_2 (a UInt64, _path Int32) engine = S3(s3_conn, filename='test_02245_2', format=Parquet); insert into test_02245_2 select 1, 2 settings s3_truncate_on_insert=1; select * from test_02245_2; -select _path from test_02245_2; +select _path, isNotNull(_etag) from test_02245_2; diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 8a9a8d2e76c..37094a1a088 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -247,6 +247,7 @@ DoubleDelta Doxygen Durre ECMA +ETag Ecto EdgeAngle EdgeLengthKm @@ -1587,6 +1588,7 @@ enum's enums erfc errorCodeToName +etag evalMLMethod exFAT expiryMsec