#pragma once #include #if USE_HIVE #include #include #include #include #include #include #include #include namespace DB { class HiveSettings; /** * This class represents table engine for external hdfs files. * Read method is supported for now. */ class StorageHive final : public IStorage, WithContext { public: friend class StorageHiveSource; StorageHive( const String & hive_metastore_url_, const String & hive_database_, const String & hive_table_, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, const String & comment_, const ASTPtr & partition_by_ast_, std::unique_ptr storage_settings_, ContextPtr context_); String getName() const override { return "Hive"; } bool supportsIndexForIn() const override { return true; } bool supportsSubcolumns() const override { return true; } bool mayBenefitFromIndexForIn( const ASTPtr & /* left_in_operand */, ContextPtr /* query_context */, const StorageMetadataPtr & /* metadata_snapshot */) const override { return true; } Pipe read( const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; SinkToStoragePtr write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/) override; NamesAndTypesList getVirtuals() const override; bool supportsSubsetOfColumns() const override; std::optional totalRows(const Settings & settings) const override; std::optional totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, ContextPtr context_) const override; void checkAlterIsPossible(const AlterCommands & commands, ContextPtr local_context) const override; private: using FileFormat = IHiveFile::FileFormat; using FileInfo = HiveMetastoreClient::FileInfo; using HiveTableMetadataPtr = HiveMetastoreClient::HiveTableMetadataPtr; enum class PruneLevel { None, /// Do not prune Partition, File, Split, Max = Split, }; static String pruneLevelToString(PruneLevel level) { return String(magic_enum::enum_name(level)); } static ASTPtr extractKeyExpressionList(const ASTPtr & node); static std::vector listDirectory(const String & path, const HiveTableMetadataPtr & hive_table_metadata, const HDFSFSPtr & fs); void initMinMaxIndexExpression(); HiveFiles collectHiveFiles( unsigned max_threads, const SelectQueryInfo & query_info, const HiveTableMetadataPtr & hive_table_metadata, const HDFSFSPtr & fs, const ContextPtr & context_, PruneLevel prune_level = PruneLevel::Max) const; HiveFiles collectHiveFilesFromPartition( const Apache::Hadoop::Hive::Partition & partition, const SelectQueryInfo & query_info, const HiveTableMetadataPtr & hive_table_metadata, const HDFSFSPtr & fs, const ContextPtr & context_, PruneLevel prune_level = PruneLevel::Max) const; HiveFilePtr getHiveFileIfNeeded( const FileInfo & file_info, const FieldVector & fields, const SelectQueryInfo & query_info, const HiveTableMetadataPtr & hive_table_metadata, const ContextPtr & context_, PruneLevel prune_level = PruneLevel::Max) const; void lazyInitialize(); std::optional totalRowsImpl(const Settings & settings, const SelectQueryInfo & query_info, ContextPtr context_, PruneLevel prune_level) const; String hive_metastore_url; /// Hive database and table String hive_database; String hive_table; mutable std::mutex init_mutex; bool has_initialized = false; /// Hive table meta std::vector table_schema; Names text_input_field_names; /// Defines schema of hive file, only used when text input format is TEXT String hdfs_namenode_url; String format_name; String compression_method; const ASTPtr partition_by_ast; NamesAndTypesList partition_name_types; Names partition_names; DataTypes partition_types; ExpressionActionsPtr partition_key_expr; ExpressionActionsPtr partition_minmax_idx_expr; NamesAndTypesList hivefile_name_types; ExpressionActionsPtr hivefile_minmax_idx_expr; std::shared_ptr storage_settings; Poco::Logger * log = &Poco::Logger::get("StorageHive"); }; } #endif