mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
include snapshot_id in iceberg class, compare version, schema and snapshot when comparing IcebergMetadata equality
This commit is contained in:
parent
afea675901
commit
d5e1798474
@ -57,7 +57,8 @@ IcebergMetadata::IcebergMetadata(
|
|||||||
Int32 metadata_version_,
|
Int32 metadata_version_,
|
||||||
Int32 format_version_,
|
Int32 format_version_,
|
||||||
String manifest_list_file_,
|
String manifest_list_file_,
|
||||||
Int32 current_schema_id_,
|
Int32 schema_id_,
|
||||||
|
Int64 snapshot_id_,
|
||||||
DB::NamesAndTypesList schema_)
|
DB::NamesAndTypesList schema_)
|
||||||
: WithContext(context_)
|
: WithContext(context_)
|
||||||
, object_storage(object_storage_)
|
, object_storage(object_storage_)
|
||||||
@ -65,7 +66,8 @@ IcebergMetadata::IcebergMetadata(
|
|||||||
, metadata_version(metadata_version_)
|
, metadata_version(metadata_version_)
|
||||||
, format_version(format_version_)
|
, format_version(format_version_)
|
||||||
, manifest_list_file(std::move(manifest_list_file_))
|
, manifest_list_file(std::move(manifest_list_file_))
|
||||||
, current_schema_id(current_schema_id_)
|
, schema_id(schema_id_)
|
||||||
|
, snapshot_id(snapshot_id_)
|
||||||
, schema(std::move(schema_))
|
, schema(std::move(schema_))
|
||||||
, log(getLogger("IcebergMetadata"))
|
, log(getLogger("IcebergMetadata"))
|
||||||
{
|
{
|
||||||
@ -266,7 +268,7 @@ NamesAndTypesList parseTableSchema(const Poco::JSON::Object::Ptr & metadata_obje
|
|||||||
if (ignore_schema_evolution)
|
if (ignore_schema_evolution)
|
||||||
{
|
{
|
||||||
/// If we ignore schema evolution, we will just use latest schema for all data files.
|
/// If we ignore schema evolution, we will just use latest schema for all data files.
|
||||||
/// Find schema with 'schema-id' equal to 'current_schema_id'.
|
/// Find schema with the correct 'schema_id'.
|
||||||
for (uint32_t i = 0; i != schemas->size(); ++i)
|
for (uint32_t i = 0; i != schemas->size(); ++i)
|
||||||
{
|
{
|
||||||
auto current_schema = schemas->getObject(i);
|
auto current_schema = schemas->getObject(i);
|
||||||
@ -405,17 +407,18 @@ IcebergMetadata::create(ObjectStoragePtr object_storage, ConfigurationObserverPt
|
|||||||
|
|
||||||
String manifest_list_file;
|
String manifest_list_file;
|
||||||
Int32 schema_id = -1;
|
Int32 schema_id = -1;
|
||||||
|
Int64 snapshot_id = -1;
|
||||||
|
|
||||||
// Use the current snapshot (by default) or find the appropriate snapshot based on timestamp
|
// Use the current snapshot (by default) or find the appropriate snapshot based on timestamp
|
||||||
Int64 query_timestamp = local_context->getSettingsRef()[Setting::iceberg_query_at_timestamp_ms];
|
Int64 query_timestamp = local_context->getSettingsRef()[Setting::iceberg_query_at_timestamp_ms];
|
||||||
if (query_timestamp == 0)
|
if (query_timestamp == 0)
|
||||||
{
|
{
|
||||||
// Use current snapshot
|
// Use current snapshot
|
||||||
auto current_snapshot_id = metadata->getValue<Int64>("current-snapshot-id");
|
snapshot_id = metadata->getValue<Int64>("current-snapshot-id");
|
||||||
for (size_t i = 0; i < snapshots->size(); ++i)
|
for (size_t i = 0; i < snapshots->size(); ++i)
|
||||||
{
|
{
|
||||||
const auto snapshot = snapshots->getObject(static_cast<UInt32>(i));
|
const auto snapshot = snapshots->getObject(static_cast<UInt32>(i));
|
||||||
if (snapshot->getValue<Int64>("snapshot-id") == current_snapshot_id)
|
if (snapshot->getValue<Int64>("snapshot-id") == snapshot_id)
|
||||||
{
|
{
|
||||||
const auto path = snapshot->getValue<String>("manifest-list");
|
const auto path = snapshot->getValue<String>("manifest-list");
|
||||||
manifest_list_file = std::filesystem::path(configuration_ptr->getPath()) / "metadata" / std::filesystem::path(path).filename();
|
manifest_list_file = std::filesystem::path(configuration_ptr->getPath()) / "metadata" / std::filesystem::path(path).filename();
|
||||||
@ -424,7 +427,7 @@ IcebergMetadata::create(ObjectStoragePtr object_storage, ConfigurationObserverPt
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (manifest_list_file.empty() || schema_id == -1)
|
if (manifest_list_file.empty() || schema_id == -1 || snapshot_id == -1)
|
||||||
{
|
{
|
||||||
throw Exception(
|
throw Exception(
|
||||||
ErrorCodes::BAD_ARGUMENTS,
|
ErrorCodes::BAD_ARGUMENTS,
|
||||||
@ -448,10 +451,11 @@ IcebergMetadata::create(ObjectStoragePtr object_storage, ConfigurationObserverPt
|
|||||||
const auto path = snapshot->getValue<String>("manifest-list");
|
const auto path = snapshot->getValue<String>("manifest-list");
|
||||||
manifest_list_file = std::filesystem::path(configuration_ptr->getPath()) / "metadata" / std::filesystem::path(path).filename();
|
manifest_list_file = std::filesystem::path(configuration_ptr->getPath()) / "metadata" / std::filesystem::path(path).filename();
|
||||||
schema_id = snapshot->getValue<Int32>("schema-id");
|
schema_id = snapshot->getValue<Int32>("schema-id");
|
||||||
|
snapshot_id = snapshot->getValue<Int64>("snapshot-id");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (manifest_list_file.empty() || schema_id == -1)
|
if (manifest_list_file.empty() || schema_id == -1 || snapshot_id == -1)
|
||||||
{
|
{
|
||||||
throw Exception(
|
throw Exception(
|
||||||
ErrorCodes::BAD_ARGUMENTS,
|
ErrorCodes::BAD_ARGUMENTS,
|
||||||
@ -466,7 +470,7 @@ IcebergMetadata::create(ObjectStoragePtr object_storage, ConfigurationObserverPt
|
|||||||
auto schema = parseTableSchema(metadata, format_version, schema_id, local_context->getSettingsRef()[Setting::iceberg_engine_ignore_schema_evolution]);
|
auto schema = parseTableSchema(metadata, format_version, schema_id, local_context->getSettingsRef()[Setting::iceberg_engine_ignore_schema_evolution]);
|
||||||
|
|
||||||
return std::make_unique<IcebergMetadata>(
|
return std::make_unique<IcebergMetadata>(
|
||||||
object_storage, configuration_ptr, local_context, metadata_version, format_version, manifest_list_file, schema_id, schema);
|
object_storage, configuration_ptr, local_context, metadata_version, format_version, manifest_list_file, schema_id, snapshot_id, schema);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -551,7 +555,7 @@ Strings IcebergMetadata::getDataFiles() const
|
|||||||
Poco::Dynamic::Var json = parser.parse(schema_json_string);
|
Poco::Dynamic::Var json = parser.parse(schema_json_string);
|
||||||
Poco::JSON::Object::Ptr schema_object = json.extract<Poco::JSON::Object::Ptr>();
|
Poco::JSON::Object::Ptr schema_object = json.extract<Poco::JSON::Object::Ptr>();
|
||||||
if (!context->getSettingsRef()[Setting::iceberg_engine_ignore_schema_evolution]
|
if (!context->getSettingsRef()[Setting::iceberg_engine_ignore_schema_evolution]
|
||||||
&& schema_object->getValue<int>("schema-id") != current_schema_id)
|
&& schema_object->getValue<int>("schema-id") != schema_id)
|
||||||
throw Exception(
|
throw Exception(
|
||||||
ErrorCodes::UNSUPPORTED_METHOD,
|
ErrorCodes::UNSUPPORTED_METHOD,
|
||||||
"Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not "
|
"Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not "
|
||||||
|
@ -74,7 +74,8 @@ public:
|
|||||||
Int32 metadata_version_,
|
Int32 metadata_version_,
|
||||||
Int32 format_version_,
|
Int32 format_version_,
|
||||||
String manifest_list_file_,
|
String manifest_list_file_,
|
||||||
Int32 current_schema_id_,
|
Int32 schema_id_,
|
||||||
|
Int64 snapshot_id_,
|
||||||
NamesAndTypesList schema_);
|
NamesAndTypesList schema_);
|
||||||
|
|
||||||
/// Get data files. On first request it reads manifest_list file and iterates through manifest files to find all data files.
|
/// Get data files. On first request it reads manifest_list file and iterates through manifest files to find all data files.
|
||||||
@ -91,22 +92,21 @@ public:
|
|||||||
bool operator ==(const IDataLakeMetadata & other) const override
|
bool operator ==(const IDataLakeMetadata & other) const override
|
||||||
{
|
{
|
||||||
const auto * iceberg_metadata = dynamic_cast<const IcebergMetadata *>(&other);
|
const auto * iceberg_metadata = dynamic_cast<const IcebergMetadata *>(&other);
|
||||||
return iceberg_metadata && getManifestListFile() == iceberg_metadata->getManifestListFile();
|
return iceberg_metadata && getVersion() == iceberg_metadata->getVersion();
|
||||||
}
|
}
|
||||||
|
|
||||||
static DataLakeMetadataPtr create(ObjectStoragePtr object_storage, ConfigurationObserverPtr configuration, ContextPtr local_context);
|
static DataLakeMetadataPtr create(ObjectStoragePtr object_storage, ConfigurationObserverPtr configuration, ContextPtr local_context);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const std::string& getManifestListFile() const { return manifest_list_file; }
|
std::tuple<Int32, Int32, Int64> getVersion() const { return std::make_tuple(metadata_version, schema_id, snapshot_id); }
|
||||||
|
|
||||||
size_t getVersion() const { return metadata_version; }
|
|
||||||
|
|
||||||
const ObjectStoragePtr object_storage;
|
const ObjectStoragePtr object_storage;
|
||||||
const ConfigurationObserverPtr configuration;
|
const ConfigurationObserverPtr configuration;
|
||||||
Int32 metadata_version;
|
Int32 metadata_version;
|
||||||
Int32 format_version;
|
Int32 format_version;
|
||||||
String manifest_list_file;
|
String manifest_list_file;
|
||||||
Int32 current_schema_id;
|
Int32 schema_id;
|
||||||
|
Int64 snapshot_id;
|
||||||
NamesAndTypesList schema;
|
NamesAndTypesList schema;
|
||||||
mutable Strings data_files;
|
mutable Strings data_files;
|
||||||
std::unordered_map<String, String> column_name_to_physical_name;
|
std::unordered_map<String, String> column_name_to_physical_name;
|
||||||
|
Loading…
Reference in New Issue
Block a user