ClickHouse/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp

93 lines
2.2 KiB
C++
Raw Normal View History

2019-08-21 14:19:47 +00:00
#include "ORCBlockInputFormat.h"
#if USE_ORC
#include <Formats/FormatFactory.h>
#include <IO/ReadBufferFromMemory.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <IO/copyData.h>
2020-05-03 23:19:56 +00:00
#include <arrow/adapters/orc/adapter.h>
2019-08-21 14:19:47 +00:00
#include <arrow/io/memory.h>
#include "ArrowColumnToCHColumn.h"
namespace DB
{
2020-02-25 18:20:08 +00:00
2020-02-25 18:10:48 +00:00
namespace ErrorCodes
{
2020-05-03 23:19:56 +00:00
extern const int BAD_ARGUMENTS;
2020-02-25 18:10:48 +00:00
extern const int CANNOT_READ_ALL_DATA;
}
2019-08-21 14:19:47 +00:00
2020-05-02 19:40:50 +00:00
ORCBlockInputFormat::ORCBlockInputFormat(ReadBuffer & in_, Block header_) : IInputFormat(std::move(header_), in_)
2020-02-25 18:20:08 +00:00
{
}
2019-08-21 14:19:47 +00:00
2020-02-25 18:20:08 +00:00
Chunk ORCBlockInputFormat::generate()
{
Chunk res;
2019-08-21 14:19:47 +00:00
2020-05-03 23:19:56 +00:00
if (in.eof())
return res;
2019-08-21 14:19:47 +00:00
2020-05-03 23:19:56 +00:00
file_data.clear();
2020-02-25 18:20:08 +00:00
{
2020-05-03 23:19:56 +00:00
WriteBufferFromString file_buffer(file_data);
copyData(in, file_buffer);
}
2019-08-21 14:19:47 +00:00
2020-05-03 23:19:56 +00:00
std::unique_ptr<arrow::Buffer> local_buffer = std::make_unique<arrow::Buffer>(file_data);
2019-08-21 14:19:47 +00:00
2020-05-03 23:19:56 +00:00
std::shared_ptr<arrow::io::RandomAccessFile> in_stream = std::make_shared<arrow::io::BufferReader>(*local_buffer);
2019-08-21 14:19:47 +00:00
2020-05-03 23:19:56 +00:00
arrow::Status open_status = arrow::adapters::orc::ORCFileReader::Open(in_stream, arrow::default_memory_pool(), &file_reader);
if (!open_status.ok())
throw Exception(open_status.ToString(), ErrorCodes::BAD_ARGUMENTS);
2020-05-02 19:40:50 +00:00
2020-02-25 18:20:08 +00:00
std::shared_ptr<arrow::Table> table;
arrow::Status read_status = file_reader->Read(&table);
2020-05-03 18:12:14 +00:00
if (!read_status.ok())
throw Exception{"Error while reading ORC data: " + read_status.ToString(),
ErrorCodes::CANNOT_READ_ALL_DATA};
2020-05-03 23:19:56 +00:00
const Block & header = getPort().getHeader();
2020-05-03 18:12:14 +00:00
ArrowColumnToCHColumn::arrowTableToCHChunk(res, table, header, "ORC");
2020-02-25 18:20:08 +00:00
return res;
}
void ORCBlockInputFormat::resetParser()
{
IInputFormat::resetParser();
file_reader.reset();
file_data.clear();
}
void registerInputFormatProcessorORC(FormatFactory &factory)
{
factory.registerInputFormatProcessor(
"ORC",
[](ReadBuffer &buf,
const Block &sample,
const RowInputFormatParams &,
const FormatSettings & /* settings */)
2020-05-03 23:19:56 +00:00
{
2020-02-25 18:20:08 +00:00
return std::make_shared<ORCBlockInputFormat>(buf, sample);
});
}
2019-08-21 14:19:47 +00:00
}
#else
namespace DB
{
class FormatFactory;
void registerInputFormatProcessorORC(FormatFactory &)
{
}
}
#endif