2020-05-03 18:12:14 +00:00
|
|
|
#include "ArrowBufferedStreams.h"
|
|
|
|
|
2020-05-04 00:58:10 +00:00
|
|
|
#if USE_ARROW || USE_ORC || USE_PARQUET
|
2020-05-03 18:12:14 +00:00
|
|
|
|
2020-05-04 00:52:28 +00:00
|
|
|
#include <IO/ReadBufferFromFileDescriptor.h>
|
|
|
|
#include <IO/WriteBufferFromString.h>
|
|
|
|
#include <IO/copyData.h>
|
2020-05-03 18:12:14 +00:00
|
|
|
#include <arrow/buffer.h>
|
2020-05-04 00:52:28 +00:00
|
|
|
#include <arrow/io/api.h>
|
2020-07-13 18:25:49 +00:00
|
|
|
#include <arrow/result.h>
|
2020-05-03 18:12:14 +00:00
|
|
|
|
2020-05-04 00:52:28 +00:00
|
|
|
#include <sys/stat.h>
|
|
|
|
|
2020-07-13 01:11:35 +00:00
|
|
|
|
2020-05-03 18:12:14 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2020-05-04 00:52:28 +00:00
|
|
|
ArrowBufferedOutputStream::ArrowBufferedOutputStream(WriteBuffer & out_) : out{out_}, is_open{true}
|
2020-05-03 18:12:14 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2020-05-04 00:52:28 +00:00
|
|
|
arrow::Status ArrowBufferedOutputStream::Close()
|
2020-05-03 18:12:14 +00:00
|
|
|
{
|
|
|
|
is_open = false;
|
2020-05-04 00:52:28 +00:00
|
|
|
return arrow::Status::OK();
|
2020-05-03 18:12:14 +00:00
|
|
|
}
|
|
|
|
|
2020-07-06 21:36:10 +00:00
|
|
|
arrow::Result<int64_t> ArrowBufferedOutputStream::Tell() const
|
2020-05-03 18:12:14 +00:00
|
|
|
{
|
2020-07-06 21:36:10 +00:00
|
|
|
return arrow::Result<int64_t>(total_length);
|
2020-05-03 18:12:14 +00:00
|
|
|
}
|
|
|
|
|
2020-05-04 00:52:28 +00:00
|
|
|
arrow::Status ArrowBufferedOutputStream::Write(const void * data, int64_t length)
|
2020-05-03 18:12:14 +00:00
|
|
|
{
|
2020-05-04 00:52:28 +00:00
|
|
|
out.write(reinterpret_cast<const char *>(data), length);
|
2020-05-03 18:12:14 +00:00
|
|
|
total_length += length;
|
2020-05-04 00:52:28 +00:00
|
|
|
return arrow::Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
RandomAccessFileFromSeekableReadBuffer::RandomAccessFileFromSeekableReadBuffer(SeekableReadBuffer & in_, off_t file_size_)
|
|
|
|
: in{in_}, file_size{file_size_}, is_open{true}
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2020-07-06 21:36:10 +00:00
|
|
|
arrow::Result<int64_t> RandomAccessFileFromSeekableReadBuffer::GetSize()
|
2020-05-04 00:52:28 +00:00
|
|
|
{
|
2020-07-06 21:36:10 +00:00
|
|
|
return arrow::Result<int64_t>(file_size);
|
2020-05-04 00:52:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
arrow::Status RandomAccessFileFromSeekableReadBuffer::Close()
|
|
|
|
{
|
|
|
|
is_open = false;
|
|
|
|
return arrow::Status::OK();
|
|
|
|
}
|
|
|
|
|
2020-07-06 21:36:10 +00:00
|
|
|
arrow::Result<int64_t> RandomAccessFileFromSeekableReadBuffer::Tell() const
|
2020-05-04 00:52:28 +00:00
|
|
|
{
|
2020-07-06 21:36:10 +00:00
|
|
|
return arrow::Result<int64_t>(in.getPosition());
|
2020-05-04 00:52:28 +00:00
|
|
|
}
|
|
|
|
|
2020-07-06 21:36:10 +00:00
|
|
|
arrow::Result<int64_t> RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes, void * out)
|
2020-05-04 00:52:28 +00:00
|
|
|
{
|
2020-07-06 21:36:10 +00:00
|
|
|
int64_t bytes_read = in.readBig(reinterpret_cast<char *>(out), nbytes);
|
|
|
|
return arrow::Result<int64_t>(bytes_read);
|
2020-05-04 00:52:28 +00:00
|
|
|
}
|
|
|
|
|
2020-07-06 21:36:10 +00:00
|
|
|
arrow::Result<std::shared_ptr<arrow::Buffer>> RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes)
|
2020-05-04 00:52:28 +00:00
|
|
|
{
|
2020-07-13 18:25:49 +00:00
|
|
|
auto buffer_status = arrow::AllocateBuffer(nbytes);
|
|
|
|
ARROW_RETURN_NOT_OK(buffer_status);
|
2020-07-06 21:36:10 +00:00
|
|
|
|
2020-07-13 18:25:49 +00:00
|
|
|
auto shared_buffer = std::shared_ptr<arrow::Buffer>(std::move(std::move(*buffer_status)));
|
|
|
|
|
|
|
|
size_t n = in.readBig(reinterpret_cast<char *>(shared_buffer->mutable_data()), nbytes);
|
|
|
|
|
|
|
|
auto read_buffer = arrow::SliceBuffer(shared_buffer, 0, n);
|
|
|
|
return arrow::Result<std::shared_ptr<arrow::Buffer>>(shared_buffer);
|
2020-05-04 00:52:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
arrow::Status RandomAccessFileFromSeekableReadBuffer::Seek(int64_t position)
|
|
|
|
{
|
|
|
|
in.seek(position, SEEK_SET);
|
|
|
|
return arrow::Status::OK();
|
|
|
|
}
|
|
|
|
|
2021-04-05 19:21:16 +00:00
|
|
|
|
|
|
|
ArrowInputStream::ArrowInputStream(ReadBuffer & in_) : in(in_)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
arrow::Result<int64_t> ArrowInputStream::Read(int64_t nbytes, void * out)
|
|
|
|
{
|
|
|
|
return in.read(reinterpret_cast<char *>(out), nbytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
arrow::Result<std::shared_ptr<arrow::Buffer>> ArrowInputStream::Read(int64_t nbytes)
|
|
|
|
{
|
|
|
|
std::string file_data;
|
|
|
|
{
|
|
|
|
WriteBufferFromString file_buffer(file_data);
|
|
|
|
copyData(in, file_buffer, nbytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
return arrow::Buffer::FromString(std::move(file_data));
|
|
|
|
}
|
|
|
|
|
|
|
|
arrow::Status ArrowInputStream::Abort()
|
|
|
|
{
|
|
|
|
return arrow::Status();
|
|
|
|
}
|
|
|
|
|
|
|
|
arrow::Result<int64_t> ArrowInputStream::Tell() const
|
|
|
|
{
|
|
|
|
return in.count();
|
|
|
|
}
|
|
|
|
|
|
|
|
arrow::Status ArrowInputStream::Close()
|
|
|
|
{
|
|
|
|
return arrow::Status();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool ArrowInputStream::closed() const
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-05-04 00:52:28 +00:00
|
|
|
std::shared_ptr<arrow::io::RandomAccessFile> asArrowFile(ReadBuffer & in)
|
|
|
|
{
|
|
|
|
if (auto * fd_in = dynamic_cast<ReadBufferFromFileDescriptor *>(&in))
|
|
|
|
{
|
|
|
|
struct stat stat;
|
|
|
|
auto res = ::fstat(fd_in->getFD(), &stat);
|
|
|
|
// if fd is a regular file i.e. not stdin
|
|
|
|
if (res == 0 && S_ISREG(stat.st_mode))
|
|
|
|
return std::make_shared<RandomAccessFileFromSeekableReadBuffer>(*fd_in, stat.st_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
// fallback to loading the entire file in memory
|
|
|
|
std::string file_data;
|
|
|
|
{
|
|
|
|
WriteBufferFromString file_buffer(file_data);
|
|
|
|
copyData(in, file_buffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
return std::make_shared<arrow::io::BufferReader>(arrow::Buffer::FromString(std::move(file_data)));
|
2020-05-03 18:12:14 +00:00
|
|
|
}
|
|
|
|
|
2021-04-05 19:21:16 +00:00
|
|
|
std::shared_ptr<arrow::io::InputStream> asArrowInputStream(ReadBuffer & in)
|
|
|
|
{
|
|
|
|
return std::make_shared<ArrowInputStream>(in);
|
|
|
|
}
|
|
|
|
|
2020-05-03 18:12:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|