mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-21 01:00:48 +00:00
Resolve conflicts, fix build and tests
This commit is contained in:
parent
ee167e9ee8
commit
607d4dcc0b
@ -2,6 +2,7 @@
|
|||||||
#include "ArrowColumnToCHColumn.h"
|
#include "ArrowColumnToCHColumn.h"
|
||||||
|
|
||||||
#if USE_ARROW || USE_ORC || USE_PARQUET
|
#if USE_ARROW || USE_ORC || USE_PARQUET
|
||||||
|
|
||||||
#include <DataTypes/DataTypeFactory.h>
|
#include <DataTypes/DataTypeFactory.h>
|
||||||
#include <DataTypes/DataTypeNullable.h>
|
#include <DataTypes/DataTypeNullable.h>
|
||||||
#include <DataTypes/DataTypesDecimal.h>
|
#include <DataTypes/DataTypesDecimal.h>
|
||||||
@ -10,7 +11,8 @@
|
|||||||
#include <DataTypes/DataTypeLowCardinality.h>
|
#include <DataTypes/DataTypeLowCardinality.h>
|
||||||
#include <DataTypes/DataTypeTuple.h>
|
#include <DataTypes/DataTypeTuple.h>
|
||||||
#include <DataTypes/DataTypeMap.h>
|
#include <DataTypes/DataTypeMap.h>
|
||||||
#include <DataTypes/DataTypeDate.h>
|
#include <DataTypes/DataTypeString.h>
|
||||||
|
#include <DataTypes/DataTypeDate32.h>
|
||||||
#include <DataTypes/NestedUtils.h>
|
#include <DataTypes/NestedUtils.h>
|
||||||
#include <common/DateLUTImpl.h>
|
#include <common/DateLUTImpl.h>
|
||||||
#include <common/types.h>
|
#include <common/types.h>
|
||||||
@ -23,15 +25,39 @@
|
|||||||
#include <Columns/ColumnUnique.h>
|
#include <Columns/ColumnUnique.h>
|
||||||
#include <Columns/ColumnMap.h>
|
#include <Columns/ColumnMap.h>
|
||||||
#include <Columns/ColumnsNumber.h>
|
#include <Columns/ColumnsNumber.h>
|
||||||
|
#include <Processors/Chunk.h>
|
||||||
#include <Interpreters/castColumn.h>
|
#include <Interpreters/castColumn.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <fmt/format.h>
|
|
||||||
#include <arrow/builder.h>
|
#include <arrow/builder.h>
|
||||||
#include <arrow/array.h>
|
#include <arrow/array.h>
|
||||||
|
|
||||||
|
|
||||||
|
#define FOR_ARROW_NUMERIC_TYPES(M) \
|
||||||
|
M(arrow::Type::UINT8, DB::UInt8) \
|
||||||
|
M(arrow::Type::INT8, DB::Int8) \
|
||||||
|
M(arrow::Type::UINT16, DB::UInt16) \
|
||||||
|
M(arrow::Type::INT16, DB::Int16) \
|
||||||
|
M(arrow::Type::UINT32, DB::UInt32) \
|
||||||
|
M(arrow::Type::INT32, DB::Int32) \
|
||||||
|
M(arrow::Type::UINT64, DB::UInt64) \
|
||||||
|
M(arrow::Type::INT64, DB::Int64) \
|
||||||
|
M(arrow::Type::HALF_FLOAT, DB::Float32) \
|
||||||
|
M(arrow::Type::FLOAT, DB::Float32) \
|
||||||
|
M(arrow::Type::DOUBLE, DB::Float64)
|
||||||
|
|
||||||
|
#define FOR_ARROW_INDEXES_TYPES(M) \
|
||||||
|
M(arrow::Type::UINT8, DB::UInt8) \
|
||||||
|
M(arrow::Type::INT8, DB::UInt8) \
|
||||||
|
M(arrow::Type::UINT16, DB::UInt16) \
|
||||||
|
M(arrow::Type::INT16, DB::UInt16) \
|
||||||
|
M(arrow::Type::UINT32, DB::UInt32) \
|
||||||
|
M(arrow::Type::INT32, DB::UInt32) \
|
||||||
|
M(arrow::Type::UINT64, DB::UInt64) \
|
||||||
|
M(arrow::Type::INT64, DB::UInt64)
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int UNKNOWN_TYPE;
|
extern const int UNKNOWN_TYPE;
|
||||||
@ -43,11 +69,6 @@ namespace DB
|
|||||||
extern const int UNKNOWN_EXCEPTION;
|
extern const int UNKNOWN_EXCEPTION;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void checkStatus(const arrow::Status & status, const String & column_name, const String & format_name)
|
|
||||||
{
|
|
||||||
if (!status.ok())
|
|
||||||
throw Exception{fmt::format("Error with a {} column \"{}\": {}.", format_name, column_name, status.ToString()), ErrorCodes::UNKNOWN_EXCEPTION};
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Inserts numeric data right into internal column data to reduce an overhead
|
/// Inserts numeric data right into internal column data to reduce an overhead
|
||||||
template <typename NumericType, typename VectorType = ColumnVector<NumericType>>
|
template <typename NumericType, typename VectorType = ColumnVector<NumericType>>
|
||||||
@ -83,7 +104,7 @@ namespace DB
|
|||||||
size_t chars_t_size = 0;
|
size_t chars_t_size = 0;
|
||||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||||
{
|
{
|
||||||
arrow::BinaryArray & chunk = static_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i)));
|
arrow::BinaryArray & chunk = dynamic_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i)));
|
||||||
const size_t chunk_length = chunk.length();
|
const size_t chunk_length = chunk.length();
|
||||||
|
|
||||||
if (chunk_length > 0)
|
if (chunk_length > 0)
|
||||||
@ -98,7 +119,7 @@ namespace DB
|
|||||||
|
|
||||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||||
{
|
{
|
||||||
arrow::BinaryArray & chunk = static_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i)));
|
arrow::BinaryArray & chunk = dynamic_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i)));
|
||||||
std::shared_ptr<arrow::Buffer> buffer = chunk.value_data();
|
std::shared_ptr<arrow::Buffer> buffer = chunk.value_data();
|
||||||
const size_t chunk_length = chunk.length();
|
const size_t chunk_length = chunk.length();
|
||||||
|
|
||||||
@ -126,7 +147,7 @@ namespace DB
|
|||||||
|
|
||||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||||
{
|
{
|
||||||
arrow::BooleanArray & chunk = static_cast<arrow::BooleanArray &>(*(arrow_column->chunk(chunk_i)));
|
arrow::BooleanArray & chunk = dynamic_cast<arrow::BooleanArray &>(*(arrow_column->chunk(chunk_i)));
|
||||||
/// buffers[0] is a null bitmap and buffers[1] are actual values
|
/// buffers[0] is a null bitmap and buffers[1] are actual values
|
||||||
std::shared_ptr<arrow::Buffer> buffer = chunk.data()->buffers[1];
|
std::shared_ptr<arrow::Buffer> buffer = chunk.data()->buffers[1];
|
||||||
|
|
||||||
@ -138,14 +159,14 @@ namespace DB
|
|||||||
|
|
||||||
static ColumnWithTypeAndName readColumnWithDate32Data(std::shared_ptr<arrow::ChunkedArray> & arrow_column, const String & column_name)
|
static ColumnWithTypeAndName readColumnWithDate32Data(std::shared_ptr<arrow::ChunkedArray> & arrow_column, const String & column_name)
|
||||||
{
|
{
|
||||||
auto internal_type = std::make_shared<DataTypeUInt16>();
|
auto internal_type = std::make_shared<DataTypeDate32>();
|
||||||
auto internal_column = internal_type->createColumn();
|
auto internal_column = internal_type->createColumn();
|
||||||
PaddedPODArray<UInt16> & column_data = assert_cast<ColumnVector<UInt16> &>(*internal_column).getData();
|
PaddedPODArray<Int32> & column_data = assert_cast<ColumnVector<Int32> &>(*internal_column).getData();
|
||||||
column_data.reserve(arrow_column->length());
|
column_data.reserve(arrow_column->length());
|
||||||
|
|
||||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||||
{
|
{
|
||||||
arrow::Date32Array & chunk = static_cast<arrow::Date32Array &>(*(arrow_column->chunk(chunk_i)));
|
arrow::Date32Array & chunk = dynamic_cast<arrow::Date32Array &>(*(arrow_column->chunk(chunk_i)));
|
||||||
|
|
||||||
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
|
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
|
||||||
{
|
{
|
||||||
@ -155,8 +176,8 @@ namespace DB
|
|||||||
// TODO: will it rollback correctly?
|
// TODO: will it rollback correctly?
|
||||||
throw Exception
|
throw Exception
|
||||||
{
|
{
|
||||||
fmt::format("Input value {} of a column \"{}\" is greater than max allowed Date value, which is {}", days_num, column_name, DATE_LUT_MAX_DAY_NUM),
|
ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE,
|
||||||
ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE
|
"Input value {} of a column \"{}\" is greater than max allowed Date value, which is {}", days_num, column_name, DATE_LUT_MAX_DAY_NUM,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -176,7 +197,7 @@ namespace DB
|
|||||||
|
|
||||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||||
{
|
{
|
||||||
auto & chunk = static_cast<arrow::Date64Array &>(*(arrow_column->chunk(chunk_i)));
|
auto & chunk = dynamic_cast<arrow::Date64Array &>(*(arrow_column->chunk(chunk_i)));
|
||||||
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
|
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
|
||||||
{
|
{
|
||||||
auto timestamp = static_cast<UInt32>(chunk.Value(value_i) / 1000); // Always? in ms
|
auto timestamp = static_cast<UInt32>(chunk.Value(value_i) / 1000); // Always? in ms
|
||||||
@ -195,7 +216,7 @@ namespace DB
|
|||||||
|
|
||||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||||
{
|
{
|
||||||
auto & chunk = static_cast<arrow::TimestampArray &>(*(arrow_column->chunk(chunk_i)));
|
auto & chunk = dynamic_cast<arrow::TimestampArray &>(*(arrow_column->chunk(chunk_i)));
|
||||||
const auto & type = static_cast<const ::arrow::TimestampType &>(*chunk.type());
|
const auto & type = static_cast<const ::arrow::TimestampType &>(*chunk.type());
|
||||||
|
|
||||||
UInt32 divide = 1;
|
UInt32 divide = 1;
|
||||||
@ -237,7 +258,7 @@ namespace DB
|
|||||||
|
|
||||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||||
{
|
{
|
||||||
auto & chunk = static_cast<DecimalArray &>(*(arrow_column->chunk(chunk_i)));
|
auto & chunk = dynamic_cast<DecimalArray &>(*(arrow_column->chunk(chunk_i)));
|
||||||
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
|
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
|
||||||
{
|
{
|
||||||
column_data.emplace_back(chunk.IsNull(value_i) ? DecimalType(0) : *reinterpret_cast<const DecimalType *>(chunk.Value(value_i))); // TODO: copy column
|
column_data.emplace_back(chunk.IsNull(value_i) ? DecimalType(0) : *reinterpret_cast<const DecimalType *>(chunk.Value(value_i))); // TODO: copy column
|
||||||
@ -271,9 +292,9 @@ namespace DB
|
|||||||
|
|
||||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||||
{
|
{
|
||||||
arrow::ListArray & list_chunk = static_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i)));
|
arrow::ListArray & list_chunk = dynamic_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i)));
|
||||||
auto arrow_offsets_array = list_chunk.offsets();
|
auto arrow_offsets_array = list_chunk.offsets();
|
||||||
auto & arrow_offsets = static_cast<arrow::Int32Array &>(*arrow_offsets_array);
|
auto & arrow_offsets = dynamic_cast<arrow::Int32Array &>(*arrow_offsets_array);
|
||||||
auto start = offsets_data.back();
|
auto start = offsets_data.back();
|
||||||
for (int64_t i = 1; i < arrow_offsets.length(); ++i)
|
for (int64_t i = 1; i < arrow_offsets.length(); ++i)
|
||||||
offsets_data.emplace_back(start + arrow_offsets.Value(i));
|
offsets_data.emplace_back(start + arrow_offsets.Value(i));
|
||||||
@ -303,7 +324,7 @@ namespace DB
|
|||||||
array_vector.reserve(arrow_column->num_chunks());
|
array_vector.reserve(arrow_column->num_chunks());
|
||||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||||
{
|
{
|
||||||
arrow::ListArray & list_chunk = static_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i)));
|
arrow::ListArray & list_chunk = dynamic_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i)));
|
||||||
std::shared_ptr<arrow::Array> chunk = list_chunk.values();
|
std::shared_ptr<arrow::Array> chunk = list_chunk.values();
|
||||||
array_vector.emplace_back(std::move(chunk));
|
array_vector.emplace_back(std::move(chunk));
|
||||||
}
|
}
|
||||||
@ -374,11 +395,11 @@ namespace DB
|
|||||||
case arrow::Type::STRUCT:
|
case arrow::Type::STRUCT:
|
||||||
{
|
{
|
||||||
auto arrow_type = arrow_column->type();
|
auto arrow_type = arrow_column->type();
|
||||||
auto arrow_struct_type = assert_cast<arrow::StructType *>(arrow_type.get());
|
auto * arrow_struct_type = assert_cast<arrow::StructType *>(arrow_type.get());
|
||||||
std::vector<arrow::ArrayVector> nested_arrow_columns(arrow_struct_type->num_fields());
|
std::vector<arrow::ArrayVector> nested_arrow_columns(arrow_struct_type->num_fields());
|
||||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||||
{
|
{
|
||||||
arrow::StructArray & struct_chunk = static_cast<arrow::StructArray &>(*(arrow_column->chunk(chunk_i)));
|
arrow::StructArray & struct_chunk = dynamic_cast<arrow::StructArray &>(*(arrow_column->chunk(chunk_i)));
|
||||||
for (int i = 0; i < arrow_struct_type->num_fields(); ++i)
|
for (int i = 0; i < arrow_struct_type->num_fields(); ++i)
|
||||||
nested_arrow_columns[i].emplace_back(struct_chunk.field(i));
|
nested_arrow_columns[i].emplace_back(struct_chunk.field(i));
|
||||||
}
|
}
|
||||||
@ -409,7 +430,7 @@ namespace DB
|
|||||||
arrow::ArrayVector dict_array;
|
arrow::ArrayVector dict_array;
|
||||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||||
{
|
{
|
||||||
arrow::DictionaryArray & dict_chunk = static_cast<arrow::DictionaryArray &>(*(arrow_column->chunk(chunk_i)));
|
arrow::DictionaryArray & dict_chunk = dynamic_cast<arrow::DictionaryArray &>(*(arrow_column->chunk(chunk_i)));
|
||||||
dict_array.emplace_back(dict_chunk.dictionary());
|
dict_array.emplace_back(dict_chunk.dictionary());
|
||||||
}
|
}
|
||||||
auto arrow_dict_column = std::make_shared<arrow::ChunkedArray>(dict_array);
|
auto arrow_dict_column = std::make_shared<arrow::ChunkedArray>(dict_array);
|
||||||
@ -426,7 +447,7 @@ namespace DB
|
|||||||
arrow::ArrayVector indexes_array;
|
arrow::ArrayVector indexes_array;
|
||||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||||
{
|
{
|
||||||
arrow::DictionaryArray & dict_chunk = static_cast<arrow::DictionaryArray &>(*(arrow_column->chunk(chunk_i)));
|
arrow::DictionaryArray & dict_chunk = dynamic_cast<arrow::DictionaryArray &>(*(arrow_column->chunk(chunk_i)));
|
||||||
indexes_array.emplace_back(dict_chunk.indices());
|
indexes_array.emplace_back(dict_chunk.indices());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -447,33 +468,49 @@ namespace DB
|
|||||||
default:
|
default:
|
||||||
throw Exception
|
throw Exception
|
||||||
{
|
{
|
||||||
fmt::format(R"(Unsupported {} type "{}" of an input column "{}".)", format_name, arrow_column->type()->name(), column_name),
|
ErrorCodes::UNKNOWN_TYPE,
|
||||||
ErrorCodes::UNKNOWN_TYPE
|
"Unsupported {} type '{}' of an input column '{}'.", format_name, arrow_column->type()->name(), column_name,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static Block arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name)
|
|
||||||
{
|
// Creating CH header by arrow schema. Will be useful in task about inserting
|
||||||
ColumnsWithTypeAndName sample_columns;
|
// data from file without knowing table structure.
|
||||||
for (const auto & field : schema.fields())
|
//
|
||||||
{
|
//static void checkStatus(const arrow::Status & status, const String & column_name, const String & format_name)
|
||||||
/// Create empty arrow column by it's type and convert it to ClickHouse column.
|
//{
|
||||||
arrow::MemoryPool* pool = arrow::default_memory_pool();
|
// if (!status.ok())
|
||||||
std::unique_ptr<arrow::ArrayBuilder> array_builder;
|
// throw Exception{ErrorCodes::UNKNOWN_EXCEPTION, "Error with a {} column '{}': {}.", format_name, column_name, status.ToString()};
|
||||||
arrow::Status status = MakeBuilder(pool, field->type(), &array_builder);
|
//}
|
||||||
checkStatus(status, field->name(), format_name);
|
//
|
||||||
std::shared_ptr<arrow::Array> arrow_array;
|
//static Block arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name)
|
||||||
status = array_builder->Finish(&arrow_array);
|
//{
|
||||||
checkStatus(status, field->name(), format_name);
|
// ColumnsWithTypeAndName sample_columns;
|
||||||
arrow::ArrayVector array_vector = {arrow_array};
|
// for (const auto & field : schema.fields())
|
||||||
auto arrow_column = std::make_shared<arrow::ChunkedArray>(array_vector);
|
// {
|
||||||
std::unordered_map<std::string, std::shared_ptr<ColumnWithTypeAndName>> dict_values;
|
// /// Create empty arrow column by it's type and convert it to ClickHouse column.
|
||||||
ColumnWithTypeAndName sample_column = readColumnFromArrowColumn(arrow_column, field->name(), format_name, false, dict_values);
|
// arrow::MemoryPool* pool = arrow::default_memory_pool();
|
||||||
sample_columns.emplace_back(std::move(sample_column));
|
// std::unique_ptr<arrow::ArrayBuilder> array_builder;
|
||||||
}
|
// arrow::Status status = MakeBuilder(pool, field->type(), &array_builder);
|
||||||
return Block(std::move(sample_columns));
|
// checkStatus(status, field->name(), format_name);
|
||||||
}
|
// std::shared_ptr<arrow::Array> arrow_array;
|
||||||
|
// status = array_builder->Finish(&arrow_array);
|
||||||
|
// checkStatus(status, field->name(), format_name);
|
||||||
|
// arrow::ArrayVector array_vector = {arrow_array};
|
||||||
|
// auto arrow_column = std::make_shared<arrow::ChunkedArray>(array_vector);
|
||||||
|
// std::unordered_map<std::string, std::shared_ptr<ColumnWithTypeAndName>> dict_values;
|
||||||
|
// ColumnWithTypeAndName sample_column = readColumnFromArrowColumn(arrow_column, field->name(), format_name, false, dict_values);
|
||||||
|
// sample_columns.emplace_back(std::move(sample_column));
|
||||||
|
// }
|
||||||
|
// return Block(std::move(sample_columns));
|
||||||
|
//}
|
||||||
|
//
|
||||||
|
//ArrowColumnToCHColumn::ArrowColumnToCHColumn(
|
||||||
|
// const arrow::Schema & schema, const std::string & format_name_, bool import_nested_)
|
||||||
|
// : header(arrowSchemaToCHHeader(schema, format_name_)), format_name(format_name_), import_nested(import_nested_)
|
||||||
|
//{
|
||||||
|
//}
|
||||||
|
|
||||||
ArrowColumnToCHColumn::ArrowColumnToCHColumn(
|
ArrowColumnToCHColumn::ArrowColumnToCHColumn(
|
||||||
const Block & header_, const std::string & format_name_, bool import_nested_)
|
const Block & header_, const std::string & format_name_, bool import_nested_)
|
||||||
@ -481,12 +518,6 @@ namespace DB
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
ArrowColumnToCHColumn::ArrowColumnToCHColumn(
|
|
||||||
const arrow::Schema & schema, const std::string & format_name_, bool import_nested_)
|
|
||||||
: header(arrowSchemaToCHHeader(schema, format_name_)), format_name(format_name_), import_nested(import_nested_)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table)
|
void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table)
|
||||||
{
|
{
|
||||||
Columns columns_list;
|
Columns columns_list;
|
||||||
@ -529,8 +560,7 @@ namespace DB
|
|||||||
|
|
||||||
// TODO: What if some columns were not presented? Insert NULLs? What if a column is not nullable?
|
// TODO: What if some columns were not presented? Insert NULLs? What if a column is not nullable?
|
||||||
if (!read_from_nested)
|
if (!read_from_nested)
|
||||||
throw Exception{
|
throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", header_column.name};
|
||||||
fmt::format("Column \"{}\" is not presented in input data.", header_column.name), ErrorCodes::THERE_IS_NO_COLUMN};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<arrow::ChunkedArray> arrow_column = name_to_column_ptr[header_column.name];
|
std::shared_ptr<arrow::ChunkedArray> arrow_column = name_to_column_ptr[header_column.name];
|
||||||
@ -549,6 +579,7 @@ namespace DB
|
|||||||
|
|
||||||
res.setColumns(columns_list, num_rows);
|
res.setColumns(columns_list, num_rows);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -22,9 +22,9 @@ class ArrowColumnToCHColumn
|
|||||||
public:
|
public:
|
||||||
ArrowColumnToCHColumn(const Block & header_, const std::string & format_name_, bool import_nested_);
|
ArrowColumnToCHColumn(const Block & header_, const std::string & format_name_, bool import_nested_);
|
||||||
|
|
||||||
/// Create header by arrow schema. It will be useful for inserting
|
/// Constructor that create header by arrow schema. It will be useful for inserting
|
||||||
/// data from file without knowing table structure.
|
/// data from file without knowing table structure.
|
||||||
ArrowColumnToCHColumn(const arrow::Schema & schema, const std::string & format_name, bool import_nested_);
|
/// ArrowColumnToCHColumn(const arrow::Schema & schema, const std::string & format_name, bool import_nested_);
|
||||||
|
|
||||||
void arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table);
|
void arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table);
|
||||||
|
|
||||||
@ -32,6 +32,7 @@ private:
|
|||||||
const Block & header;
|
const Block & header;
|
||||||
const std::string format_name;
|
const std::string format_name;
|
||||||
bool import_nested;
|
bool import_nested;
|
||||||
|
|
||||||
/// Map {column name : dictionary column}.
|
/// Map {column name : dictionary column}.
|
||||||
/// To avoid converting dictionary from Arrow Dictionary
|
/// To avoid converting dictionary from Arrow Dictionary
|
||||||
/// to LowCardinality every chunk we save it and reuse.
|
/// to LowCardinality every chunk we save it and reuse.
|
||||||
|
@ -46,7 +46,7 @@
|
|||||||
M(INT64, arrow::Int64Type) \
|
M(INT64, arrow::Int64Type) \
|
||||||
M(FLOAT, arrow::FloatType) \
|
M(FLOAT, arrow::FloatType) \
|
||||||
M(DOUBLE, arrow::DoubleType) \
|
M(DOUBLE, arrow::DoubleType) \
|
||||||
M(STRING, arrow::StringType)
|
M(BINARY, arrow::BinaryType)
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
@ -315,15 +315,15 @@ Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Not y
|
|||||||
1593604801 abc 42.125
|
1593604801 abc 42.125
|
||||||
1593604801 def 7.7
|
1593604801 def 7.7
|
||||||
=== Try load data from nonnullable.impala.parquet
|
=== Try load data from nonnullable.impala.parquet
|
||||||
8 [-1] [[-1,-2],[]] {'k1':-1} [{},{'k1':1},{},{}] (-1,[-1],([[(-1)]]),{})
|
8 [-1] [[-1,-2],[]] {'k1':-1} [{},{'k1':1},{},{}] (-1,[-1],([[(-1,'nonnullable')]]),{})
|
||||||
=== Try load data from nullable.impala.parquet
|
=== Try load data from nullable.impala.parquet
|
||||||
1 [1,2,3] [[1,2],[3,4]] {'k1':1,'k2':100} [{'k1':1}] (1,[1],([[(10),(-10)],[(11)]]),{'foo':(([1.1]))})
|
1 [1,2,3] [[1,2],[3,4]] {'k1':1,'k2':100} [{'k1':1}] (1,[1],([[(10,'aaa'),(-10,'bbb')],[(11,'c')]]),{'foo':(([1.1]))})
|
||||||
2 [NULL,1,2,NULL,3,NULL] [[NULL,1,2,NULL],[3,NULL,4],[],[]] {'k1':2,'k2':NULL} [{'k3':NULL,'k1':1},{},{}] (NULL,[NULL],([[(NULL),(10),(NULL),(-10),(NULL)],[(11),(NULL)],[],[]]),{'g1':(([2.2,NULL])),'g2':(([])),'g3':(([])),'g4':(([])),'g5':(([]))})
|
2 [NULL,1,2,NULL,3,NULL] [[NULL,1,2,NULL],[3,NULL,4],[],[]] {'k1':2,'k2':NULL} [{'k3':NULL,'k1':1},{},{}] (NULL,[NULL],([[(NULL,NULL),(10,'aaa'),(NULL,NULL),(-10,'bbb'),(NULL,NULL)],[(11,'c'),(NULL,NULL)],[],[]]),{'g1':(([2.2,NULL])),'g2':(([])),'g3':(([])),'g4':(([])),'g5':(([]))})
|
||||||
3 [] [[]] {} [{},{}] (NULL,[],([]),{})
|
3 [] [[]] {} [{},{}] (NULL,[],([]),{})
|
||||||
4 [] [] {} [] (NULL,[],([]),{})
|
4 [] [] {} [] (NULL,[],([]),{})
|
||||||
5 [] [] {} [] (NULL,[],([]),{'foo':(([2.2,3.3]))})
|
5 [] [] {} [] (NULL,[],([]),{'foo':(([2.2,3.3]))})
|
||||||
6 [] [] {} [] (NULL,[],([]),{})
|
6 [] [] {} [] (NULL,[],([]),{})
|
||||||
7 [] [[],[5,6]] {'k1':NULL,'k3':NULL} [] (7,[2,3,NULL],([[],[(NULL)],[]]),{})
|
7 [] [[],[5,6]] {'k1':NULL,'k3':NULL} [] (7,[2,3,NULL],([[],[(NULL,NULL)],[]]),{})
|
||||||
=== Try load data from nullable_list.parquet
|
=== Try load data from nullable_list.parquet
|
||||||
[1,NULL,2] [NULL,'Some string',NULL] [0.00,NULL,42.42]
|
[1,NULL,2] [NULL,'Some string',NULL] [0.00,NULL,42.42]
|
||||||
[NULL] [NULL] [NULL]
|
[NULL] [NULL] [NULL]
|
||||||
|
@ -1 +1 @@
|
|||||||
`ID` Nullable(Int64), `Int_Array` Array(Nullable(Int32)), `int_array_array` Array(Array(Nullable(Int32))), `Int_Map` Map(String, Nullable(Int32)), `int_map_array` Array(Map(String, Nullable(Int32))), `nested_Struct` Tuple(Nullable(Int32), Array(Nullable(Int32)), Tuple(Array(Array(Tuple(Nullable(Int32))))), Map(String, Tuple(Tuple(Array(Nullable(Float64))))))
|
`ID` Nullable(Int64), `Int_Array` Array(Nullable(Int32)), `int_array_array` Array(Array(Nullable(Int32))), `Int_Map` Map(String, Nullable(Int32)), `int_map_array` Array(Map(String, Nullable(Int32))), `nested_Struct` Tuple(Nullable(Int32), Array(Nullable(Int32)), Tuple(Array(Array(Tuple(Nullable(Int32), Nullable(String))))), Map(String, Tuple(Tuple(Array(Nullable(Float64))))))
|
||||||
|
@ -1 +1 @@
|
|||||||
`id` Nullable(Int64), `int_array` Array(Nullable(Int32)), `int_array_Array` Array(Array(Nullable(Int32))), `int_map` Map(String, Nullable(Int32)), `int_Map_Array` Array(Map(String, Nullable(Int32))), `nested_struct` Tuple(Nullable(Int32), Array(Nullable(Int32)), Tuple(Array(Array(Tuple(Nullable(Int32))))), Map(String, Tuple(Tuple(Array(Nullable(Float64))))))
|
`id` Nullable(Int64), `int_array` Array(Nullable(Int32)), `int_array_Array` Array(Array(Nullable(Int32))), `int_map` Map(String, Nullable(Int32)), `int_Map_Array` Array(Map(String, Nullable(Int32))), `nested_struct` Tuple(Nullable(Int32), Array(Nullable(Int32)), Tuple(Array(Array(Tuple(Nullable(Int32), Nullable(String))))), Map(String, Tuple(Tuple(Array(Nullable(Float64))))))
|
||||||
|
Loading…
Reference in New Issue
Block a user