Add getAllFilesMask

This commit is contained in:
divanik 2024-11-18 17:17:26 +00:00
parent 63621bd381
commit f76df37cfa
3 changed files with 11 additions and 2 deletions

View File

@ -135,6 +135,7 @@ add_headers_and_sources(dbms Storages/ObjectStorage/S3)
add_headers_and_sources(dbms Storages/ObjectStorage/HDFS) add_headers_and_sources(dbms Storages/ObjectStorage/HDFS)
add_headers_and_sources(dbms Storages/ObjectStorage/Local) add_headers_and_sources(dbms Storages/ObjectStorage/Local)
add_headers_and_sources(dbms Storages/ObjectStorage/DataLakes) add_headers_and_sources(dbms Storages/ObjectStorage/DataLakes)
add_headers_and_sources(dbms Storages/ObjectStorage/DataLakes/Iceberg)
add_headers_and_sources(dbms Common/NamedCollections) add_headers_and_sources(dbms Common/NamedCollections)
add_headers_and_sources(dbms Common/Scheduler/Workload) add_headers_and_sources(dbms Common/Scheduler/Workload)

View File

@ -1,5 +1,6 @@
#include "config.h" #include "config.h"
#if USE_AVRO #if USE_AVRO
# include <Columns/ColumnString.h> # include <Columns/ColumnString.h>
@ -88,6 +89,7 @@ enum class DataFileContent : uint8_t
EQUALITY_DELETES = 2, EQUALITY_DELETES = 2,
}; };
/** /**
* Iceberg supports the next data types (see https://iceberg.apache.org/spec/#schemas-and-data-types): * Iceberg supports the next data types (see https://iceberg.apache.org/spec/#schemas-and-data-types):
* - Primitive types: * - Primitive types:
@ -307,7 +309,7 @@ parseTableSchema(const Poco::JSON::Object::Ptr & metadata_object, int format_ver
/// Field "schemas" is optional for version 1, but after version 2 was introduced, /// Field "schemas" is optional for version 1, but after version 2 was introduced,
/// in most cases this field is added for new tables in version 1 as well. /// in most cases this field is added for new tables in version 1 as well.
if (!ignore_schema_evolution && metadata_object->has("schemas") if (!ignore_schema_evolution && metadata_object->has("schemas")
&& metadata_object->get("schemas").extract<Poco::JSON::Array::Ptr>()->size() > 1) && metadata_object->get("schemas").extract<Poco::JSON::Array::Ptr>()->size() > 1§)
throw Exception( throw Exception(
ErrorCodes::UNSUPPORTED_METHOD, ErrorCodes::UNSUPPORTED_METHOD,
"Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not " "Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not "

View File

@ -88,7 +88,7 @@ struct SpecificSchemaPartitionInfo
class PartitionPruningProcessor class PartitionPruningProcessor
{ {
public: public:
CommonPartitionInfo getCommonPartitionInfo(Poco::JSON::Array::Ptr partition_specification, const ColumnTuple * big_partition_tuple); CommonPartitionInfo addCommonPartitionInfo(Poco::JSON::Array::Ptr partition_specification, const ColumnTuple * big_partition_tuple);
SpecificSchemaPartitionInfo getSpecificPartitionPruning( SpecificSchemaPartitionInfo getSpecificPartitionPruning(
const CommonPartitionInfo & common_info, const CommonPartitionInfo & common_info,
@ -97,6 +97,8 @@ public:
std::vector<bool> getPruningMask(const SpecificSchemaPartitionInfo & specific_info, const ActionsDAG * filter_dag, ContextPtr context); std::vector<bool> getPruningMask(const SpecificSchemaPartitionInfo & specific_info, const ActionsDAG * filter_dag, ContextPtr context);
std::vector<bool> getAllFilesMask(const ActionsDAG * filter_dag, ContextPtr context);
private: private:
static PartitionTransform getTransform(const String & transform_name) static PartitionTransform getTransform(const String & transform_name)
{ {
@ -189,6 +191,8 @@ private:
std::unordered_map<String, CommonPartitionInfo> common_partition_info_by_manifest_file; std::unordered_map<String, CommonPartitionInfo> common_partition_info_by_manifest_file;
std::map<std::pair<String, Int32>, SpecificSchemaPartitionInfo> specific_partition_info_by_manifest_file_and_schema; std::map<std::pair<String, Int32>, SpecificSchemaPartitionInfo> specific_partition_info_by_manifest_file_and_schema;
std::vector<CommonPartitionInfo> common_partition_infos;
}; };
@ -259,6 +263,8 @@ private:
mutable Strings data_files; mutable Strings data_files;
mutable Strings manifest_files; mutable Strings manifest_files;
PartitionPruningProcessor pruning_processor;
}; };
} }