#include "ORCBlockInputFormat.h" #if USE_ORC #include #include #include #include #include #include #include "ArrowBufferedStreams.h" #include "ArrowColumnToCHColumn.h" namespace DB { namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int CANNOT_READ_ALL_DATA; } ORCBlockInputFormat::ORCBlockInputFormat(ReadBuffer & in_, Block header_) : IInputFormat(std::move(header_), in_) { } Chunk ORCBlockInputFormat::generate() { Chunk res; const Block & header = getPort().getHeader(); if (file_reader) return res; arrow::Status open_status = arrow::adapters::orc::ORCFileReader::Open(asArrowFile(in), arrow::default_memory_pool(), &file_reader); if (!open_status.ok()) throw Exception(open_status.ToString(), ErrorCodes::BAD_ARGUMENTS); std::shared_ptr table; arrow::Status read_status = file_reader->Read(&table); if (!read_status.ok()) throw ParsingException{"Error while reading ORC data: " + read_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA}; ArrowColumnToCHColumn::arrowTableToCHChunk(res, table, header, "ORC"); return res; } void ORCBlockInputFormat::resetParser() { IInputFormat::resetParser(); file_reader.reset(); } void registerInputFormatProcessorORC(FormatFactory &factory) { factory.registerInputFormatProcessor( "ORC", [](ReadBuffer &buf, const Block &sample, const RowInputFormatParams &, const FormatSettings & /* settings */) { return std::make_shared(buf, sample); }); factory.markFormatAsColumnOriented("ORC"); } } #else namespace DB { class FormatFactory; void registerInputFormatProcessorORC(FormatFactory &) { } } #endif