2020-05-03 18:12:14 +00:00
|
|
|
#pragma once
|
2021-10-27 23:10:39 +00:00
|
|
|
#include "config_formats.h"
|
2021-07-28 11:09:17 +00:00
|
|
|
|
2020-05-04 00:58:10 +00:00
|
|
|
#if USE_ARROW || USE_ORC || USE_PARQUET
|
2020-05-03 18:12:14 +00:00
|
|
|
|
2021-10-31 19:53:24 +00:00
|
|
|
#include <optional>
|
2020-05-03 18:12:14 +00:00
|
|
|
|
2022-06-07 01:58:29 +00:00
|
|
|
#include <arrow/io/interfaces.h>
|
|
|
|
|
2022-04-13 19:27:38 +00:00
|
|
|
#define ORC_MAGIC_BYTES "ORC"
|
|
|
|
#define PARQUET_MAGIC_BYTES "PAR1"
|
|
|
|
#define ARROW_MAGIC_BYTES "ARROW1"
|
|
|
|
|
2020-05-03 18:12:14 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2020-05-05 12:56:54 +00:00
|
|
|
class ReadBuffer;
|
|
|
|
class WriteBuffer;
|
|
|
|
|
2021-10-31 19:53:24 +00:00
|
|
|
class SeekableReadBuffer;
|
|
|
|
struct FormatSettings;
|
|
|
|
|
2020-05-03 18:12:14 +00:00
|
|
|
class ArrowBufferedOutputStream : public arrow::io::OutputStream
|
|
|
|
{
|
|
|
|
public:
|
2020-05-04 00:52:28 +00:00
|
|
|
explicit ArrowBufferedOutputStream(WriteBuffer & out_);
|
2020-05-03 18:12:14 +00:00
|
|
|
|
|
|
|
// FileInterface
|
2020-05-05 12:56:54 +00:00
|
|
|
arrow::Status Close() override;
|
2020-05-03 18:12:14 +00:00
|
|
|
|
2020-07-06 21:36:10 +00:00
|
|
|
arrow::Result<int64_t> Tell() const override;
|
2020-05-03 18:12:14 +00:00
|
|
|
|
|
|
|
bool closed() const override { return !is_open; }
|
|
|
|
|
|
|
|
// Writable
|
2020-05-05 12:56:54 +00:00
|
|
|
arrow::Status Write(const void * data, int64_t length) override;
|
2020-05-03 18:12:14 +00:00
|
|
|
|
|
|
|
private:
|
2020-05-04 00:52:28 +00:00
|
|
|
WriteBuffer & out;
|
2020-05-03 18:12:14 +00:00
|
|
|
int64_t total_length = 0;
|
|
|
|
bool is_open = false;
|
|
|
|
|
|
|
|
ARROW_DISALLOW_COPY_AND_ASSIGN(ArrowBufferedOutputStream);
|
|
|
|
};
|
|
|
|
|
2020-05-04 00:52:28 +00:00
|
|
|
class RandomAccessFileFromSeekableReadBuffer : public arrow::io::RandomAccessFile
|
|
|
|
{
|
|
|
|
public:
|
2022-04-26 12:57:02 +00:00
|
|
|
RandomAccessFileFromSeekableReadBuffer(ReadBuffer & in_, off_t file_size_);
|
2020-05-04 00:52:28 +00:00
|
|
|
|
2022-04-26 12:57:02 +00:00
|
|
|
explicit RandomAccessFileFromSeekableReadBuffer(ReadBuffer & in_);
|
2021-10-31 19:53:24 +00:00
|
|
|
|
2020-07-06 21:36:10 +00:00
|
|
|
arrow::Result<int64_t> GetSize() override;
|
2020-05-04 00:52:28 +00:00
|
|
|
|
|
|
|
arrow::Status Close() override;
|
|
|
|
|
2020-07-06 21:36:10 +00:00
|
|
|
arrow::Result<int64_t> Tell() const override;
|
2020-05-04 00:52:28 +00:00
|
|
|
|
|
|
|
bool closed() const override { return !is_open; }
|
|
|
|
|
2020-07-06 21:36:10 +00:00
|
|
|
arrow::Result<int64_t> Read(int64_t nbytes, void * out) override;
|
2020-05-04 00:52:28 +00:00
|
|
|
|
2020-07-06 21:36:10 +00:00
|
|
|
arrow::Result<std::shared_ptr<arrow::Buffer>> Read(int64_t nbytes) override;
|
2020-05-04 00:52:28 +00:00
|
|
|
|
|
|
|
arrow::Status Seek(int64_t position) override;
|
|
|
|
|
|
|
|
private:
|
2022-04-26 12:57:02 +00:00
|
|
|
ReadBuffer & in;
|
|
|
|
SeekableReadBuffer & seekable_in;
|
2021-10-31 19:53:24 +00:00
|
|
|
std::optional<off_t> file_size;
|
2020-05-04 00:52:28 +00:00
|
|
|
bool is_open = false;
|
|
|
|
|
|
|
|
ARROW_DISALLOW_COPY_AND_ASSIGN(RandomAccessFileFromSeekableReadBuffer);
|
|
|
|
};
|
|
|
|
|
2021-04-06 20:28:54 +00:00
|
|
|
class ArrowInputStreamFromReadBuffer : public arrow::io::InputStream
|
2021-04-05 19:21:16 +00:00
|
|
|
{
|
|
|
|
public:
|
2021-04-06 20:28:54 +00:00
|
|
|
explicit ArrowInputStreamFromReadBuffer(ReadBuffer & in);
|
2021-04-05 19:21:16 +00:00
|
|
|
arrow::Result<int64_t> Read(int64_t nbytes, void* out) override;
|
|
|
|
arrow::Result<std::shared_ptr<arrow::Buffer>> Read(int64_t nbytes) override;
|
|
|
|
arrow::Status Abort() override;
|
|
|
|
arrow::Result<int64_t> Tell() const override;
|
|
|
|
arrow::Status Close() override;
|
2021-04-06 20:28:54 +00:00
|
|
|
bool closed() const override { return !is_open; }
|
2021-04-05 19:21:16 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
ReadBuffer & in;
|
2021-04-06 20:28:54 +00:00
|
|
|
bool is_open = false;
|
2021-04-05 19:21:16 +00:00
|
|
|
|
2021-04-06 20:28:54 +00:00
|
|
|
ARROW_DISALLOW_COPY_AND_ASSIGN(ArrowInputStreamFromReadBuffer);
|
2021-04-05 19:21:16 +00:00
|
|
|
};
|
|
|
|
|
2022-04-13 19:27:38 +00:00
|
|
|
std::shared_ptr<arrow::io::RandomAccessFile> asArrowFile(
|
|
|
|
ReadBuffer & in,
|
|
|
|
const FormatSettings & settings,
|
|
|
|
std::atomic<int> & is_cancelled,
|
|
|
|
const std::string & format_name,
|
|
|
|
const std::string & magic_bytes);
|
2020-05-04 00:52:28 +00:00
|
|
|
|
2020-05-03 18:12:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|