ClickHouse/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp

82 lines
2.0 KiB
C++
Raw Normal View History

2020-04-28 19:52:22 +00:00
#include "ArrowBlockInputFormat.h"
#if USE_ARROW
#include <Formats/FormatFactory.h>
#include <IO/ReadBufferFromMemory.h>
#include <IO/WriteHelpers.h>
#include <IO/copyData.h>
#include <arrow/api.h>
#include <arrow/ipc/reader.h>
#include <arrow/status.h>
2020-05-03 18:12:14 +00:00
#include "ArrowBufferedStreams.h"
2020-04-28 19:52:22 +00:00
#include "ArrowColumnToCHColumn.h"
namespace DB
{
namespace ErrorCodes
{
2020-05-03 18:29:04 +00:00
extern const int BAD_ARGUMENTS;
2020-04-28 19:52:22 +00:00
extern const int CANNOT_READ_ALL_DATA;
}
2020-05-03 18:12:14 +00:00
ArrowBlockInputFormat::ArrowBlockInputFormat(ReadBuffer & in_, const Block & header_, const FormatSettings & format_settings_)
: IInputFormat(header_, in_), format_settings{format_settings_}, arrow_istream{std::make_shared<ArrowBufferedInputStream>(in)}
2020-04-28 19:52:22 +00:00
{
2020-05-03 18:12:14 +00:00
arrow::Status open_status = arrow::ipc::RecordBatchStreamReader::Open(arrow_istream, &reader);
if (!open_status.ok())
throw Exception(open_status.ToString(), ErrorCodes::BAD_ARGUMENTS);
2020-04-28 19:52:22 +00:00
}
Chunk ArrowBlockInputFormat::generate()
{
Chunk res;
2020-05-03 18:12:14 +00:00
if (in.eof())
return res;
2020-04-28 19:52:22 +00:00
std::shared_ptr<arrow::Table> table;
2020-05-03 19:22:41 +00:00
arrow::Status read_status = reader->ReadAll(&table);
if (!read_status.ok())
throw Exception{"Error while reading Arrow data: " + read_status.ToString(),
2020-05-03 18:12:14 +00:00
ErrorCodes::CANNOT_READ_ALL_DATA};
const Block & header = getPort().getHeader();
2020-04-28 19:52:22 +00:00
2020-05-03 18:12:14 +00:00
ArrowColumnToCHColumn::arrowTableToCHChunk(res, table, header, "Arrow");
2020-04-28 19:52:22 +00:00
return res;
}
void ArrowBlockInputFormat::resetParser()
{
IInputFormat::resetParser();
2020-05-03 18:12:14 +00:00
reader.reset();
2020-04-28 19:52:22 +00:00
}
void registerInputFormatProcessorArrow(FormatFactory &factory)
{
factory.registerInputFormatProcessor(
"Arrow",
2020-05-02 19:33:47 +00:00
[](ReadBuffer & buf,
const Block & sample,
2020-04-28 19:52:22 +00:00
const RowInputFormatParams & /* params */,
2020-05-03 18:12:14 +00:00
const FormatSettings & format_settings)
2020-04-28 19:52:22 +00:00
{
2020-05-03 18:12:14 +00:00
return std::make_shared<ArrowBlockInputFormat>(buf, sample, format_settings);
2020-04-28 19:52:22 +00:00
});
}
}
#else
namespace DB
{
class FormatFactory;
void registerInputFormatProcessorArrow(FormatFactory &)
{
}
}
2020-05-02 19:32:21 +00:00
#endif