ClickHouse/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h

61 lines
1.9 KiB
C++
Raw Normal View History

#pragma once
2019-08-21 14:19:47 +00:00
#include "config_formats.h"
2020-04-28 19:52:22 +00:00
#if USE_ARROW || USE_ORC || USE_PARQUET
2019-08-21 14:19:47 +00:00
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeString.h>
#include <arrow/type.h>
#include <Columns/ColumnVector.h>
#include <arrow/table.h>
#include <arrow/array.h>
#include <arrow/buffer.h>
#include <Processors/Chunk.h>
#include <Core/Block.h>
namespace DB
{
class ArrowColumnToCHColumn
{
private:
# define FOR_ARROW_NUMERIC_TYPES(M) \
M(arrow::Type::UINT8, DB::UInt8) \
M(arrow::Type::INT8, DB::Int8) \
M(arrow::Type::UINT16, DB::UInt16) \
M(arrow::Type::INT16, DB::Int16) \
M(arrow::Type::UINT32, DB::UInt32) \
M(arrow::Type::INT32, DB::Int32) \
M(arrow::Type::UINT64, DB::UInt64) \
M(arrow::Type::INT64, DB::Int64) \
2020-05-05 01:46:04 +00:00
M(arrow::Type::HALF_FLOAT, DB::Float32) \
2019-08-21 14:19:47 +00:00
M(arrow::Type::FLOAT, DB::Float32) \
M(arrow::Type::DOUBLE, DB::Float64)
2021-06-01 07:37:05 +00:00
# define FOR_ARROW_INDEXES_TYPES(M) \
M(arrow::Type::UINT8, DB::UInt8) \
M(arrow::Type::INT8, DB::UInt8) \
M(arrow::Type::UINT16, DB::UInt16) \
M(arrow::Type::INT16, DB::UInt16) \
M(arrow::Type::UINT32, DB::UInt32) \
M(arrow::Type::INT32, DB::UInt32) \
M(arrow::Type::UINT64, DB::UInt64) \
2021-06-01 07:37:05 +00:00
M(arrow::Type::INT64, DB::UInt64)
/// Map {column name : dictionary column}.
/// To avoid converting dictionary from Arrow Dictionary
/// to LowCardinality every chunk we save it and reuse.
std::unordered_map<std::string, ColumnPtr> dictionary_values;
2019-08-21 14:19:47 +00:00
public:
void arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table,
2020-05-03 18:12:14 +00:00
const Block & header, std::string format_name);
2019-08-21 14:19:47 +00:00
};
}
#endif