2019-08-21 14:19:47 +00:00
|
|
|
#pragma once
|
2021-10-27 23:10:39 +00:00
|
|
|
#include "config_formats.h"
|
2020-05-03 23:19:56 +00:00
|
|
|
#if USE_ORC
|
|
|
|
|
2019-08-21 14:19:47 +00:00
|
|
|
#include <Processors/Formats/IInputFormat.h>
|
2021-12-15 11:30:57 +00:00
|
|
|
#include <Processors/Formats/ISchemaReader.h>
|
2021-07-01 17:59:28 +00:00
|
|
|
#include <Formats/FormatSettings.h>
|
2019-08-21 14:19:47 +00:00
|
|
|
|
2021-10-31 19:53:24 +00:00
|
|
|
#include <arrow/adapters/orc/adapter.h>
|
|
|
|
|
|
|
|
namespace arrow::adapters::orc
|
|
|
|
{
|
|
|
|
class ORCFileReader;
|
|
|
|
}
|
2019-08-21 14:19:47 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2021-06-02 08:51:07 +00:00
|
|
|
|
|
|
|
class ArrowColumnToCHColumn;
|
|
|
|
|
2020-05-05 12:56:54 +00:00
|
|
|
class ORCBlockInputFormat : public IInputFormat
|
2019-08-21 14:19:47 +00:00
|
|
|
{
|
|
|
|
public:
|
2021-07-01 17:59:28 +00:00
|
|
|
ORCBlockInputFormat(ReadBuffer & in_, Block header_, const FormatSettings & format_settings_);
|
2019-08-21 14:19:47 +00:00
|
|
|
|
|
|
|
String getName() const override { return "ORCBlockInputFormat"; }
|
|
|
|
|
2019-11-26 23:46:19 +00:00
|
|
|
void resetParser() override;
|
|
|
|
|
2021-11-30 07:44:59 +00:00
|
|
|
const BlockMissingValues & getMissingValues() const override;
|
|
|
|
|
2019-08-21 14:19:47 +00:00
|
|
|
protected:
|
|
|
|
Chunk generate() override;
|
|
|
|
|
2021-12-27 19:42:56 +00:00
|
|
|
void onCancel() override
|
|
|
|
{
|
|
|
|
is_stopped = 1;
|
|
|
|
}
|
|
|
|
|
2019-08-21 14:19:47 +00:00
|
|
|
private:
|
2022-04-06 08:40:22 +00:00
|
|
|
void prepareReader();
|
2019-08-21 14:19:47 +00:00
|
|
|
|
|
|
|
// TODO: check that this class implements every part of its parent
|
|
|
|
|
|
|
|
std::unique_ptr<arrow::adapters::orc::ORCFileReader> file_reader;
|
2021-04-15 04:01:15 +00:00
|
|
|
|
2021-06-02 08:51:07 +00:00
|
|
|
std::unique_ptr<ArrowColumnToCHColumn> arrow_column_to_ch_column;
|
2021-05-25 12:01:28 +00:00
|
|
|
|
2021-04-15 04:01:15 +00:00
|
|
|
// indices of columns to read from ORC file
|
|
|
|
std::vector<int> include_indices;
|
|
|
|
|
2021-12-02 08:14:25 +00:00
|
|
|
std::vector<size_t> missing_columns;
|
2021-11-30 07:44:59 +00:00
|
|
|
BlockMissingValues block_missing_values;
|
|
|
|
|
2021-07-01 17:59:28 +00:00
|
|
|
const FormatSettings format_settings;
|
2022-04-06 08:40:22 +00:00
|
|
|
const std::unordered_set<int> & skip_stripes;
|
2021-07-01 17:59:28 +00:00
|
|
|
|
2022-04-06 08:40:22 +00:00
|
|
|
int stripe_total = 0;
|
|
|
|
int stripe_current = 0;
|
2021-12-27 19:42:56 +00:00
|
|
|
|
|
|
|
std::atomic<int> is_stopped{0};
|
2019-08-21 14:19:47 +00:00
|
|
|
};
|
|
|
|
|
2021-12-15 11:30:57 +00:00
|
|
|
class ORCSchemaReader : public ISchemaReader
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_);
|
|
|
|
|
|
|
|
NamesAndTypesList readSchema() override;
|
|
|
|
|
|
|
|
private:
|
|
|
|
const FormatSettings format_settings;
|
2019-08-21 14:19:47 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
#endif
|