2019-02-21 18:36:46 +00:00
|
|
|
#pragma once
|
2021-10-27 23:10:39 +00:00
|
|
|
#include "config_formats.h"
|
2019-02-21 18:36:46 +00:00
|
|
|
#if USE_PARQUET
|
2019-08-02 16:00:24 +00:00
|
|
|
|
|
|
|
#include <Processors/Formats/IInputFormat.h>
|
2021-07-01 17:59:28 +00:00
|
|
|
#include <Formats/FormatSettings.h>
|
2019-02-21 18:36:46 +00:00
|
|
|
|
2020-05-05 12:56:54 +00:00
|
|
|
namespace parquet::arrow { class FileReader; }
|
2019-02-21 18:36:46 +00:00
|
|
|
|
|
|
|
namespace arrow { class Buffer; }
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2021-06-02 08:51:07 +00:00
|
|
|
class ArrowColumnToCHColumn;
|
|
|
|
|
2020-05-05 12:56:54 +00:00
|
|
|
class ParquetBlockInputFormat : public IInputFormat
|
2019-02-21 18:36:46 +00:00
|
|
|
{
|
|
|
|
public:
|
2021-07-01 17:59:28 +00:00
|
|
|
ParquetBlockInputFormat(ReadBuffer & in_, Block header_, const FormatSettings & format_settings_);
|
2019-02-21 18:36:46 +00:00
|
|
|
|
2019-11-26 23:46:19 +00:00
|
|
|
void resetParser() override;
|
|
|
|
|
2019-02-21 18:36:46 +00:00
|
|
|
String getName() const override { return "ParquetBlockInputFormat"; }
|
|
|
|
|
|
|
|
protected:
|
|
|
|
Chunk generate() override;
|
|
|
|
|
2020-05-04 00:52:28 +00:00
|
|
|
private:
|
|
|
|
void prepareReader();
|
|
|
|
|
2019-02-21 18:36:46 +00:00
|
|
|
private:
|
|
|
|
std::unique_ptr<parquet::arrow::FileReader> file_reader;
|
|
|
|
int row_group_total = 0;
|
2020-02-23 06:04:58 +00:00
|
|
|
// indices of columns to read from Parquet file
|
|
|
|
std::vector<int> column_indices;
|
2021-06-02 08:51:07 +00:00
|
|
|
std::unique_ptr<ArrowColumnToCHColumn> arrow_column_to_ch_column;
|
2019-02-21 18:36:46 +00:00
|
|
|
int row_group_current = 0;
|
2021-07-01 17:59:28 +00:00
|
|
|
const FormatSettings format_settings;
|
2019-02-21 18:36:46 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|