2020-05-03 00:54:39 +00:00
|
|
|
#pragma once
|
2021-07-24 11:38:42 +00:00
|
|
|
|
2021-10-27 23:10:39 +00:00
|
|
|
#include "config_formats.h"
|
2019-08-21 14:19:47 +00:00
|
|
|
|
2020-04-28 19:52:22 +00:00
|
|
|
#if USE_ARROW || USE_ORC || USE_PARQUET
|
2019-08-21 14:19:47 +00:00
|
|
|
|
|
|
|
#include <DataTypes/IDataType.h>
|
2021-07-01 17:59:28 +00:00
|
|
|
#include <Core/ColumnWithTypeAndName.h>
|
2021-08-09 14:30:14 +00:00
|
|
|
#include <Core/Block.h>
|
2019-08-21 14:19:47 +00:00
|
|
|
#include <arrow/table.h>
|
2021-07-24 11:38:42 +00:00
|
|
|
|
2019-08-21 14:19:47 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2021-07-24 11:38:42 +00:00
|
|
|
class Block;
|
|
|
|
class Chunk;
|
|
|
|
|
2021-06-07 15:15:58 +00:00
|
|
|
class ArrowColumnToCHColumn
|
|
|
|
{
|
|
|
|
public:
|
2021-10-31 19:53:24 +00:00
|
|
|
using NameToColumnPtr = std::unordered_map<std::string, std::shared_ptr<arrow::ChunkedArray>>;
|
|
|
|
|
2021-11-30 06:52:26 +00:00
|
|
|
ArrowColumnToCHColumn(const Block & header_, const std::string & format_name_, bool import_nested_, bool defaults_for_omitted_fields_);
|
2021-07-01 17:59:28 +00:00
|
|
|
|
2021-08-05 15:09:48 +00:00
|
|
|
/// Constructor that create header by arrow schema. It will be useful for inserting
|
2021-07-01 17:59:28 +00:00
|
|
|
/// data from file without knowing table structure.
|
2021-11-30 06:52:26 +00:00
|
|
|
ArrowColumnToCHColumn(const arrow::Schema & schema, const std::string & format_name, bool import_nested_, bool defaults_for_omitted_fields_);
|
2021-06-07 15:15:58 +00:00
|
|
|
|
2021-11-30 07:44:59 +00:00
|
|
|
/// Convert arrow::Table to chunk. Returns missing header columns not exists in arrow::Table.
|
|
|
|
std::vector<size_t> arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table);
|
2019-08-21 14:19:47 +00:00
|
|
|
|
2021-11-30 07:44:59 +00:00
|
|
|
std::vector<size_t> arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr);
|
2021-10-31 19:53:24 +00:00
|
|
|
|
2021-06-07 15:15:58 +00:00
|
|
|
private:
|
2021-08-09 14:30:14 +00:00
|
|
|
const Block header;
|
2021-06-07 15:15:58 +00:00
|
|
|
const std::string format_name;
|
2021-07-01 17:59:28 +00:00
|
|
|
bool import_nested;
|
2021-11-30 06:52:26 +00:00
|
|
|
bool defaults_for_omitted_fields;
|
2021-08-05 15:09:48 +00:00
|
|
|
|
2021-06-07 15:15:58 +00:00
|
|
|
/// Map {column name : dictionary column}.
|
|
|
|
/// To avoid converting dictionary from Arrow Dictionary
|
|
|
|
/// to LowCardinality every chunk we save it and reuse.
|
2021-07-01 17:59:28 +00:00
|
|
|
std::unordered_map<std::string, std::shared_ptr<ColumnWithTypeAndName>> dictionary_values;
|
2021-06-07 15:15:58 +00:00
|
|
|
};
|
2021-07-24 11:38:42 +00:00
|
|
|
|
2019-08-21 14:19:47 +00:00
|
|
|
}
|
2021-07-24 11:38:42 +00:00
|
|
|
|
2019-08-21 14:19:47 +00:00
|
|
|
#endif
|