ClickHouse/src/Processors/Formats/Impl/ArrowBufferedStreams.h

104 lines
2.5 KiB
C++
Raw Normal View History

2020-05-03 18:12:14 +00:00
#pragma once
2021-10-27 23:10:39 +00:00
#include "config_formats.h"
2021-07-28 11:09:17 +00:00
2020-05-04 00:58:10 +00:00
#if USE_ARROW || USE_ORC || USE_PARQUET
2020-05-03 18:12:14 +00:00
2021-10-31 19:53:24 +00:00
#include <optional>
2020-05-03 18:12:14 +00:00
2022-06-07 01:58:29 +00:00
#include <arrow/io/interfaces.h>
#define ORC_MAGIC_BYTES "ORC"
#define PARQUET_MAGIC_BYTES "PAR1"
#define ARROW_MAGIC_BYTES "ARROW1"
2020-05-03 18:12:14 +00:00
namespace DB
{
2020-05-05 12:56:54 +00:00
class ReadBuffer;
class WriteBuffer;
2021-10-31 19:53:24 +00:00
class SeekableReadBuffer;
struct FormatSettings;
2020-05-03 18:12:14 +00:00
class ArrowBufferedOutputStream : public arrow::io::OutputStream
{
public:
explicit ArrowBufferedOutputStream(WriteBuffer & out_);
2020-05-03 18:12:14 +00:00
// FileInterface
2020-05-05 12:56:54 +00:00
arrow::Status Close() override;
2020-05-03 18:12:14 +00:00
arrow::Result<int64_t> Tell() const override;
2020-05-03 18:12:14 +00:00
bool closed() const override { return !is_open; }
// Writable
2020-05-05 12:56:54 +00:00
arrow::Status Write(const void * data, int64_t length) override;
2020-05-03 18:12:14 +00:00
private:
WriteBuffer & out;
2020-05-03 18:12:14 +00:00
int64_t total_length = 0;
bool is_open = false;
ARROW_DISALLOW_COPY_AND_ASSIGN(ArrowBufferedOutputStream);
};
class RandomAccessFileFromSeekableReadBuffer : public arrow::io::RandomAccessFile
{
public:
2022-04-26 12:57:02 +00:00
RandomAccessFileFromSeekableReadBuffer(ReadBuffer & in_, off_t file_size_);
2022-04-26 12:57:02 +00:00
explicit RandomAccessFileFromSeekableReadBuffer(ReadBuffer & in_);
2021-10-31 19:53:24 +00:00
arrow::Result<int64_t> GetSize() override;
arrow::Status Close() override;
arrow::Result<int64_t> Tell() const override;
bool closed() const override { return !is_open; }
arrow::Result<int64_t> Read(int64_t nbytes, void * out) override;
arrow::Result<std::shared_ptr<arrow::Buffer>> Read(int64_t nbytes) override;
arrow::Status Seek(int64_t position) override;
private:
2022-04-26 12:57:02 +00:00
ReadBuffer & in;
SeekableReadBuffer & seekable_in;
2021-10-31 19:53:24 +00:00
std::optional<off_t> file_size;
bool is_open = false;
ARROW_DISALLOW_COPY_AND_ASSIGN(RandomAccessFileFromSeekableReadBuffer);
};
2021-04-06 20:28:54 +00:00
class ArrowInputStreamFromReadBuffer : public arrow::io::InputStream
2021-04-05 19:21:16 +00:00
{
public:
2021-04-06 20:28:54 +00:00
explicit ArrowInputStreamFromReadBuffer(ReadBuffer & in);
2021-04-05 19:21:16 +00:00
arrow::Result<int64_t> Read(int64_t nbytes, void* out) override;
arrow::Result<std::shared_ptr<arrow::Buffer>> Read(int64_t nbytes) override;
arrow::Status Abort() override;
arrow::Result<int64_t> Tell() const override;
arrow::Status Close() override;
2021-04-06 20:28:54 +00:00
bool closed() const override { return !is_open; }
2021-04-05 19:21:16 +00:00
private:
ReadBuffer & in;
2021-04-06 20:28:54 +00:00
bool is_open = false;
2021-04-05 19:21:16 +00:00
2021-04-06 20:28:54 +00:00
ARROW_DISALLOW_COPY_AND_ASSIGN(ArrowInputStreamFromReadBuffer);
2021-04-05 19:21:16 +00:00
};
std::shared_ptr<arrow::io::RandomAccessFile> asArrowFile(
ReadBuffer & in,
const FormatSettings & settings,
std::atomic<int> & is_cancelled,
const std::string & format_name,
const std::string & magic_bytes);
2020-05-03 18:12:14 +00:00
}
#endif