2020-05-03 00:54:39 +00:00
|
|
|
#pragma once
|
2019-08-21 14:19:47 +00:00
|
|
|
#include "config_formats.h"
|
|
|
|
|
2020-04-28 19:52:22 +00:00
|
|
|
#if USE_ARROW || USE_ORC || USE_PARQUET
|
2019-08-21 14:19:47 +00:00
|
|
|
|
|
|
|
#include <DataTypes/IDataType.h>
|
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
|
|
|
#include <DataTypes/DataTypeDate.h>
|
|
|
|
#include <DataTypes/DataTypeDateTime.h>
|
|
|
|
#include <DataTypes/DataTypeString.h>
|
|
|
|
#include <arrow/type.h>
|
|
|
|
#include <Columns/ColumnVector.h>
|
|
|
|
#include <arrow/table.h>
|
|
|
|
#include <arrow/array.h>
|
|
|
|
#include <arrow/buffer.h>
|
|
|
|
#include <Processors/Chunk.h>
|
|
|
|
#include <Core/Block.h>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2021-06-07 15:15:58 +00:00
|
|
|
class ArrowColumnToCHColumn
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
ArrowColumnToCHColumn(const Block & header_, std::shared_ptr<arrow::Schema> schema_, const std::string & format_name_);
|
|
|
|
|
|
|
|
void arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table);
|
2019-08-21 14:19:47 +00:00
|
|
|
|
2021-06-07 15:15:58 +00:00
|
|
|
private:
|
|
|
|
#define FOR_ARROW_NUMERIC_TYPES(M) \
|
2019-08-21 14:19:47 +00:00
|
|
|
M(arrow::Type::UINT8, DB::UInt8) \
|
|
|
|
M(arrow::Type::INT8, DB::Int8) \
|
|
|
|
M(arrow::Type::UINT16, DB::UInt16) \
|
|
|
|
M(arrow::Type::INT16, DB::Int16) \
|
|
|
|
M(arrow::Type::UINT32, DB::UInt32) \
|
|
|
|
M(arrow::Type::INT32, DB::Int32) \
|
|
|
|
M(arrow::Type::UINT64, DB::UInt64) \
|
|
|
|
M(arrow::Type::INT64, DB::Int64) \
|
2020-05-05 01:46:04 +00:00
|
|
|
M(arrow::Type::HALF_FLOAT, DB::Float32) \
|
2019-08-21 14:19:47 +00:00
|
|
|
M(arrow::Type::FLOAT, DB::Float32) \
|
|
|
|
M(arrow::Type::DOUBLE, DB::Float64)
|
|
|
|
|
2021-06-07 15:15:58 +00:00
|
|
|
#define FOR_ARROW_INDEXES_TYPES(M) \
|
2021-05-25 12:01:28 +00:00
|
|
|
M(arrow::Type::UINT8, DB::UInt8) \
|
|
|
|
M(arrow::Type::INT8, DB::UInt8) \
|
|
|
|
M(arrow::Type::UINT16, DB::UInt16) \
|
|
|
|
M(arrow::Type::INT16, DB::UInt16) \
|
|
|
|
M(arrow::Type::UINT32, DB::UInt32) \
|
|
|
|
M(arrow::Type::INT32, DB::UInt32) \
|
|
|
|
M(arrow::Type::UINT64, DB::UInt64) \
|
2021-06-01 07:37:05 +00:00
|
|
|
M(arrow::Type::INT64, DB::UInt64)
|
2021-05-25 12:01:28 +00:00
|
|
|
|
2019-08-21 14:19:47 +00:00
|
|
|
|
2021-06-07 15:15:58 +00:00
|
|
|
const Block & header;
|
|
|
|
std::unordered_map<std::string, DataTypePtr> name_to_internal_type;
|
|
|
|
const std::string format_name;
|
|
|
|
/// Map {column name : dictionary column}.
|
|
|
|
/// To avoid converting dictionary from Arrow Dictionary
|
|
|
|
/// to LowCardinality every chunk we save it and reuse.
|
|
|
|
std::unordered_map<std::string, ColumnPtr> dictionary_values;
|
|
|
|
};
|
2019-08-21 14:19:47 +00:00
|
|
|
}
|
|
|
|
#endif
|