Resolve conflicts, fix build and tests

This commit is contained in:
Pavel Kruglov 2021-08-05 18:09:48 +03:00
parent ee167e9ee8
commit 607d4dcc0b
6 changed files with 477 additions and 445 deletions

View File

@ -2,6 +2,7 @@
#include "ArrowColumnToCHColumn.h" #include "ArrowColumnToCHColumn.h"
#if USE_ARROW || USE_ORC || USE_PARQUET #if USE_ARROW || USE_ORC || USE_PARQUET
#include <DataTypes/DataTypeFactory.h> #include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeNullable.h> #include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesDecimal.h> #include <DataTypes/DataTypesDecimal.h>
@ -10,7 +11,8 @@
#include <DataTypes/DataTypeLowCardinality.h> #include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeTuple.h> #include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeMap.h> #include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeDate.h> #include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/NestedUtils.h> #include <DataTypes/NestedUtils.h>
#include <common/DateLUTImpl.h> #include <common/DateLUTImpl.h>
#include <common/types.h> #include <common/types.h>
@ -23,15 +25,39 @@
#include <Columns/ColumnUnique.h> #include <Columns/ColumnUnique.h>
#include <Columns/ColumnMap.h> #include <Columns/ColumnMap.h>
#include <Columns/ColumnsNumber.h> #include <Columns/ColumnsNumber.h>
#include <Processors/Chunk.h>
#include <Interpreters/castColumn.h> #include <Interpreters/castColumn.h>
#include <algorithm> #include <algorithm>
#include <fmt/format.h>
#include <arrow/builder.h> #include <arrow/builder.h>
#include <arrow/array.h> #include <arrow/array.h>
#define FOR_ARROW_NUMERIC_TYPES(M) \
M(arrow::Type::UINT8, DB::UInt8) \
M(arrow::Type::INT8, DB::Int8) \
M(arrow::Type::UINT16, DB::UInt16) \
M(arrow::Type::INT16, DB::Int16) \
M(arrow::Type::UINT32, DB::UInt32) \
M(arrow::Type::INT32, DB::Int32) \
M(arrow::Type::UINT64, DB::UInt64) \
M(arrow::Type::INT64, DB::Int64) \
M(arrow::Type::HALF_FLOAT, DB::Float32) \
M(arrow::Type::FLOAT, DB::Float32) \
M(arrow::Type::DOUBLE, DB::Float64)
#define FOR_ARROW_INDEXES_TYPES(M) \
M(arrow::Type::UINT8, DB::UInt8) \
M(arrow::Type::INT8, DB::UInt8) \
M(arrow::Type::UINT16, DB::UInt16) \
M(arrow::Type::INT16, DB::UInt16) \
M(arrow::Type::UINT32, DB::UInt32) \
M(arrow::Type::INT32, DB::UInt32) \
M(arrow::Type::UINT64, DB::UInt64) \
M(arrow::Type::INT64, DB::UInt64)
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int UNKNOWN_TYPE; extern const int UNKNOWN_TYPE;
@ -43,11 +69,6 @@ namespace DB
extern const int UNKNOWN_EXCEPTION; extern const int UNKNOWN_EXCEPTION;
} }
static void checkStatus(const arrow::Status & status, const String & column_name, const String & format_name)
{
if (!status.ok())
throw Exception{fmt::format("Error with a {} column \"{}\": {}.", format_name, column_name, status.ToString()), ErrorCodes::UNKNOWN_EXCEPTION};
}
/// Inserts numeric data right into internal column data to reduce an overhead /// Inserts numeric data right into internal column data to reduce an overhead
template <typename NumericType, typename VectorType = ColumnVector<NumericType>> template <typename NumericType, typename VectorType = ColumnVector<NumericType>>
@ -83,7 +104,7 @@ namespace DB
size_t chars_t_size = 0; size_t chars_t_size = 0;
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
arrow::BinaryArray & chunk = static_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i))); arrow::BinaryArray & chunk = dynamic_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i)));
const size_t chunk_length = chunk.length(); const size_t chunk_length = chunk.length();
if (chunk_length > 0) if (chunk_length > 0)
@ -98,7 +119,7 @@ namespace DB
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
arrow::BinaryArray & chunk = static_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i))); arrow::BinaryArray & chunk = dynamic_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i)));
std::shared_ptr<arrow::Buffer> buffer = chunk.value_data(); std::shared_ptr<arrow::Buffer> buffer = chunk.value_data();
const size_t chunk_length = chunk.length(); const size_t chunk_length = chunk.length();
@ -126,7 +147,7 @@ namespace DB
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
arrow::BooleanArray & chunk = static_cast<arrow::BooleanArray &>(*(arrow_column->chunk(chunk_i))); arrow::BooleanArray & chunk = dynamic_cast<arrow::BooleanArray &>(*(arrow_column->chunk(chunk_i)));
/// buffers[0] is a null bitmap and buffers[1] are actual values /// buffers[0] is a null bitmap and buffers[1] are actual values
std::shared_ptr<arrow::Buffer> buffer = chunk.data()->buffers[1]; std::shared_ptr<arrow::Buffer> buffer = chunk.data()->buffers[1];
@ -138,14 +159,14 @@ namespace DB
static ColumnWithTypeAndName readColumnWithDate32Data(std::shared_ptr<arrow::ChunkedArray> & arrow_column, const String & column_name) static ColumnWithTypeAndName readColumnWithDate32Data(std::shared_ptr<arrow::ChunkedArray> & arrow_column, const String & column_name)
{ {
auto internal_type = std::make_shared<DataTypeUInt16>(); auto internal_type = std::make_shared<DataTypeDate32>();
auto internal_column = internal_type->createColumn(); auto internal_column = internal_type->createColumn();
PaddedPODArray<UInt16> & column_data = assert_cast<ColumnVector<UInt16> &>(*internal_column).getData(); PaddedPODArray<Int32> & column_data = assert_cast<ColumnVector<Int32> &>(*internal_column).getData();
column_data.reserve(arrow_column->length()); column_data.reserve(arrow_column->length());
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
arrow::Date32Array & chunk = static_cast<arrow::Date32Array &>(*(arrow_column->chunk(chunk_i))); arrow::Date32Array & chunk = dynamic_cast<arrow::Date32Array &>(*(arrow_column->chunk(chunk_i)));
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i) for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
{ {
@ -155,8 +176,8 @@ namespace DB
// TODO: will it rollback correctly? // TODO: will it rollback correctly?
throw Exception throw Exception
{ {
fmt::format("Input value {} of a column \"{}\" is greater than max allowed Date value, which is {}", days_num, column_name, DATE_LUT_MAX_DAY_NUM), ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE,
ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE "Input value {} of a column \"{}\" is greater than max allowed Date value, which is {}", days_num, column_name, DATE_LUT_MAX_DAY_NUM,
}; };
} }
@ -176,7 +197,7 @@ namespace DB
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
auto & chunk = static_cast<arrow::Date64Array &>(*(arrow_column->chunk(chunk_i))); auto & chunk = dynamic_cast<arrow::Date64Array &>(*(arrow_column->chunk(chunk_i)));
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i) for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
{ {
auto timestamp = static_cast<UInt32>(chunk.Value(value_i) / 1000); // Always? in ms auto timestamp = static_cast<UInt32>(chunk.Value(value_i) / 1000); // Always? in ms
@ -195,7 +216,7 @@ namespace DB
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
auto & chunk = static_cast<arrow::TimestampArray &>(*(arrow_column->chunk(chunk_i))); auto & chunk = dynamic_cast<arrow::TimestampArray &>(*(arrow_column->chunk(chunk_i)));
const auto & type = static_cast<const ::arrow::TimestampType &>(*chunk.type()); const auto & type = static_cast<const ::arrow::TimestampType &>(*chunk.type());
UInt32 divide = 1; UInt32 divide = 1;
@ -237,7 +258,7 @@ namespace DB
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
auto & chunk = static_cast<DecimalArray &>(*(arrow_column->chunk(chunk_i))); auto & chunk = dynamic_cast<DecimalArray &>(*(arrow_column->chunk(chunk_i)));
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i) for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
{ {
column_data.emplace_back(chunk.IsNull(value_i) ? DecimalType(0) : *reinterpret_cast<const DecimalType *>(chunk.Value(value_i))); // TODO: copy column column_data.emplace_back(chunk.IsNull(value_i) ? DecimalType(0) : *reinterpret_cast<const DecimalType *>(chunk.Value(value_i))); // TODO: copy column
@ -271,9 +292,9 @@ namespace DB
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
arrow::ListArray & list_chunk = static_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i))); arrow::ListArray & list_chunk = dynamic_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i)));
auto arrow_offsets_array = list_chunk.offsets(); auto arrow_offsets_array = list_chunk.offsets();
auto & arrow_offsets = static_cast<arrow::Int32Array &>(*arrow_offsets_array); auto & arrow_offsets = dynamic_cast<arrow::Int32Array &>(*arrow_offsets_array);
auto start = offsets_data.back(); auto start = offsets_data.back();
for (int64_t i = 1; i < arrow_offsets.length(); ++i) for (int64_t i = 1; i < arrow_offsets.length(); ++i)
offsets_data.emplace_back(start + arrow_offsets.Value(i)); offsets_data.emplace_back(start + arrow_offsets.Value(i));
@ -303,7 +324,7 @@ namespace DB
array_vector.reserve(arrow_column->num_chunks()); array_vector.reserve(arrow_column->num_chunks());
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
arrow::ListArray & list_chunk = static_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i))); arrow::ListArray & list_chunk = dynamic_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i)));
std::shared_ptr<arrow::Array> chunk = list_chunk.values(); std::shared_ptr<arrow::Array> chunk = list_chunk.values();
array_vector.emplace_back(std::move(chunk)); array_vector.emplace_back(std::move(chunk));
} }
@ -374,11 +395,11 @@ namespace DB
case arrow::Type::STRUCT: case arrow::Type::STRUCT:
{ {
auto arrow_type = arrow_column->type(); auto arrow_type = arrow_column->type();
auto arrow_struct_type = assert_cast<arrow::StructType *>(arrow_type.get()); auto * arrow_struct_type = assert_cast<arrow::StructType *>(arrow_type.get());
std::vector<arrow::ArrayVector> nested_arrow_columns(arrow_struct_type->num_fields()); std::vector<arrow::ArrayVector> nested_arrow_columns(arrow_struct_type->num_fields());
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
arrow::StructArray & struct_chunk = static_cast<arrow::StructArray &>(*(arrow_column->chunk(chunk_i))); arrow::StructArray & struct_chunk = dynamic_cast<arrow::StructArray &>(*(arrow_column->chunk(chunk_i)));
for (int i = 0; i < arrow_struct_type->num_fields(); ++i) for (int i = 0; i < arrow_struct_type->num_fields(); ++i)
nested_arrow_columns[i].emplace_back(struct_chunk.field(i)); nested_arrow_columns[i].emplace_back(struct_chunk.field(i));
} }
@ -409,7 +430,7 @@ namespace DB
arrow::ArrayVector dict_array; arrow::ArrayVector dict_array;
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
arrow::DictionaryArray & dict_chunk = static_cast<arrow::DictionaryArray &>(*(arrow_column->chunk(chunk_i))); arrow::DictionaryArray & dict_chunk = dynamic_cast<arrow::DictionaryArray &>(*(arrow_column->chunk(chunk_i)));
dict_array.emplace_back(dict_chunk.dictionary()); dict_array.emplace_back(dict_chunk.dictionary());
} }
auto arrow_dict_column = std::make_shared<arrow::ChunkedArray>(dict_array); auto arrow_dict_column = std::make_shared<arrow::ChunkedArray>(dict_array);
@ -426,7 +447,7 @@ namespace DB
arrow::ArrayVector indexes_array; arrow::ArrayVector indexes_array;
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
arrow::DictionaryArray & dict_chunk = static_cast<arrow::DictionaryArray &>(*(arrow_column->chunk(chunk_i))); arrow::DictionaryArray & dict_chunk = dynamic_cast<arrow::DictionaryArray &>(*(arrow_column->chunk(chunk_i)));
indexes_array.emplace_back(dict_chunk.indices()); indexes_array.emplace_back(dict_chunk.indices());
} }
@ -447,33 +468,49 @@ namespace DB
default: default:
throw Exception throw Exception
{ {
fmt::format(R"(Unsupported {} type "{}" of an input column "{}".)", format_name, arrow_column->type()->name(), column_name), ErrorCodes::UNKNOWN_TYPE,
ErrorCodes::UNKNOWN_TYPE "Unsupported {} type '{}' of an input column '{}'.", format_name, arrow_column->type()->name(), column_name,
}; };
} }
} }
static Block arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name)
{ // Creating CH header by arrow schema. Will be useful in task about inserting
ColumnsWithTypeAndName sample_columns; // data from file without knowing table structure.
for (const auto & field : schema.fields()) //
{ //static void checkStatus(const arrow::Status & status, const String & column_name, const String & format_name)
/// Create empty arrow column by it's type and convert it to ClickHouse column. //{
arrow::MemoryPool* pool = arrow::default_memory_pool(); // if (!status.ok())
std::unique_ptr<arrow::ArrayBuilder> array_builder; // throw Exception{ErrorCodes::UNKNOWN_EXCEPTION, "Error with a {} column '{}': {}.", format_name, column_name, status.ToString()};
arrow::Status status = MakeBuilder(pool, field->type(), &array_builder); //}
checkStatus(status, field->name(), format_name); //
std::shared_ptr<arrow::Array> arrow_array; //static Block arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name)
status = array_builder->Finish(&arrow_array); //{
checkStatus(status, field->name(), format_name); // ColumnsWithTypeAndName sample_columns;
arrow::ArrayVector array_vector = {arrow_array}; // for (const auto & field : schema.fields())
auto arrow_column = std::make_shared<arrow::ChunkedArray>(array_vector); // {
std::unordered_map<std::string, std::shared_ptr<ColumnWithTypeAndName>> dict_values; // /// Create empty arrow column by it's type and convert it to ClickHouse column.
ColumnWithTypeAndName sample_column = readColumnFromArrowColumn(arrow_column, field->name(), format_name, false, dict_values); // arrow::MemoryPool* pool = arrow::default_memory_pool();
sample_columns.emplace_back(std::move(sample_column)); // std::unique_ptr<arrow::ArrayBuilder> array_builder;
} // arrow::Status status = MakeBuilder(pool, field->type(), &array_builder);
return Block(std::move(sample_columns)); // checkStatus(status, field->name(), format_name);
} // std::shared_ptr<arrow::Array> arrow_array;
// status = array_builder->Finish(&arrow_array);
// checkStatus(status, field->name(), format_name);
// arrow::ArrayVector array_vector = {arrow_array};
// auto arrow_column = std::make_shared<arrow::ChunkedArray>(array_vector);
// std::unordered_map<std::string, std::shared_ptr<ColumnWithTypeAndName>> dict_values;
// ColumnWithTypeAndName sample_column = readColumnFromArrowColumn(arrow_column, field->name(), format_name, false, dict_values);
// sample_columns.emplace_back(std::move(sample_column));
// }
// return Block(std::move(sample_columns));
//}
//
//ArrowColumnToCHColumn::ArrowColumnToCHColumn(
// const arrow::Schema & schema, const std::string & format_name_, bool import_nested_)
// : header(arrowSchemaToCHHeader(schema, format_name_)), format_name(format_name_), import_nested(import_nested_)
//{
//}
ArrowColumnToCHColumn::ArrowColumnToCHColumn( ArrowColumnToCHColumn::ArrowColumnToCHColumn(
const Block & header_, const std::string & format_name_, bool import_nested_) const Block & header_, const std::string & format_name_, bool import_nested_)
@ -481,12 +518,6 @@ namespace DB
{ {
} }
ArrowColumnToCHColumn::ArrowColumnToCHColumn(
const arrow::Schema & schema, const std::string & format_name_, bool import_nested_)
: header(arrowSchemaToCHHeader(schema, format_name_)), format_name(format_name_), import_nested(import_nested_)
{
}
void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table) void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table)
{ {
Columns columns_list; Columns columns_list;
@ -529,8 +560,7 @@ namespace DB
// TODO: What if some columns were not presented? Insert NULLs? What if a column is not nullable? // TODO: What if some columns were not presented? Insert NULLs? What if a column is not nullable?
if (!read_from_nested) if (!read_from_nested)
throw Exception{ throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", header_column.name};
fmt::format("Column \"{}\" is not presented in input data.", header_column.name), ErrorCodes::THERE_IS_NO_COLUMN};
} }
std::shared_ptr<arrow::ChunkedArray> arrow_column = name_to_column_ptr[header_column.name]; std::shared_ptr<arrow::ChunkedArray> arrow_column = name_to_column_ptr[header_column.name];
@ -549,6 +579,7 @@ namespace DB
res.setColumns(columns_list, num_rows); res.setColumns(columns_list, num_rows);
} }
} }
#endif #endif

View File

@ -22,9 +22,9 @@ class ArrowColumnToCHColumn
public: public:
ArrowColumnToCHColumn(const Block & header_, const std::string & format_name_, bool import_nested_); ArrowColumnToCHColumn(const Block & header_, const std::string & format_name_, bool import_nested_);
/// Create header by arrow schema. It will be useful for inserting /// Constructor that create header by arrow schema. It will be useful for inserting
/// data from file without knowing table structure. /// data from file without knowing table structure.
ArrowColumnToCHColumn(const arrow::Schema & schema, const std::string & format_name, bool import_nested_); /// ArrowColumnToCHColumn(const arrow::Schema & schema, const std::string & format_name, bool import_nested_);
void arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table); void arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table);
@ -32,6 +32,7 @@ private:
const Block & header; const Block & header;
const std::string format_name; const std::string format_name;
bool import_nested; bool import_nested;
/// Map {column name : dictionary column}. /// Map {column name : dictionary column}.
/// To avoid converting dictionary from Arrow Dictionary /// To avoid converting dictionary from Arrow Dictionary
/// to LowCardinality every chunk we save it and reuse. /// to LowCardinality every chunk we save it and reuse.

View File

@ -46,7 +46,7 @@
M(INT64, arrow::Int64Type) \ M(INT64, arrow::Int64Type) \
M(FLOAT, arrow::FloatType) \ M(FLOAT, arrow::FloatType) \
M(DOUBLE, arrow::DoubleType) \ M(DOUBLE, arrow::DoubleType) \
M(STRING, arrow::StringType) M(BINARY, arrow::BinaryType)
namespace DB namespace DB
{ {

View File

@ -315,15 +315,15 @@ Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Not y
1593604801 abc 42.125 1593604801 abc 42.125
1593604801 def 7.7 1593604801 def 7.7
=== Try load data from nonnullable.impala.parquet === Try load data from nonnullable.impala.parquet
8 [-1] [[-1,-2],[]] {'k1':-1} [{},{'k1':1},{},{}] (-1,[-1],([[(-1)]]),{}) 8 [-1] [[-1,-2],[]] {'k1':-1} [{},{'k1':1},{},{}] (-1,[-1],([[(-1,'nonnullable')]]),{})
=== Try load data from nullable.impala.parquet === Try load data from nullable.impala.parquet
1 [1,2,3] [[1,2],[3,4]] {'k1':1,'k2':100} [{'k1':1}] (1,[1],([[(10),(-10)],[(11)]]),{'foo':(([1.1]))}) 1 [1,2,3] [[1,2],[3,4]] {'k1':1,'k2':100} [{'k1':1}] (1,[1],([[(10,'aaa'),(-10,'bbb')],[(11,'c')]]),{'foo':(([1.1]))})
2 [NULL,1,2,NULL,3,NULL] [[NULL,1,2,NULL],[3,NULL,4],[],[]] {'k1':2,'k2':NULL} [{'k3':NULL,'k1':1},{},{}] (NULL,[NULL],([[(NULL),(10),(NULL),(-10),(NULL)],[(11),(NULL)],[],[]]),{'g1':(([2.2,NULL])),'g2':(([])),'g3':(([])),'g4':(([])),'g5':(([]))}) 2 [NULL,1,2,NULL,3,NULL] [[NULL,1,2,NULL],[3,NULL,4],[],[]] {'k1':2,'k2':NULL} [{'k3':NULL,'k1':1},{},{}] (NULL,[NULL],([[(NULL,NULL),(10,'aaa'),(NULL,NULL),(-10,'bbb'),(NULL,NULL)],[(11,'c'),(NULL,NULL)],[],[]]),{'g1':(([2.2,NULL])),'g2':(([])),'g3':(([])),'g4':(([])),'g5':(([]))})
3 [] [[]] {} [{},{}] (NULL,[],([]),{}) 3 [] [[]] {} [{},{}] (NULL,[],([]),{})
4 [] [] {} [] (NULL,[],([]),{}) 4 [] [] {} [] (NULL,[],([]),{})
5 [] [] {} [] (NULL,[],([]),{'foo':(([2.2,3.3]))}) 5 [] [] {} [] (NULL,[],([]),{'foo':(([2.2,3.3]))})
6 [] [] {} [] (NULL,[],([]),{}) 6 [] [] {} [] (NULL,[],([]),{})
7 [] [[],[5,6]] {'k1':NULL,'k3':NULL} [] (7,[2,3,NULL],([[],[(NULL)],[]]),{}) 7 [] [[],[5,6]] {'k1':NULL,'k3':NULL} [] (7,[2,3,NULL],([[],[(NULL,NULL)],[]]),{})
=== Try load data from nullable_list.parquet === Try load data from nullable_list.parquet
[1,NULL,2] [NULL,'Some string',NULL] [0.00,NULL,42.42] [1,NULL,2] [NULL,'Some string',NULL] [0.00,NULL,42.42]
[NULL] [NULL] [NULL] [NULL] [NULL] [NULL]

View File

@ -1 +1 @@
`ID` Nullable(Int64), `Int_Array` Array(Nullable(Int32)), `int_array_array` Array(Array(Nullable(Int32))), `Int_Map` Map(String, Nullable(Int32)), `int_map_array` Array(Map(String, Nullable(Int32))), `nested_Struct` Tuple(Nullable(Int32), Array(Nullable(Int32)), Tuple(Array(Array(Tuple(Nullable(Int32))))), Map(String, Tuple(Tuple(Array(Nullable(Float64)))))) `ID` Nullable(Int64), `Int_Array` Array(Nullable(Int32)), `int_array_array` Array(Array(Nullable(Int32))), `Int_Map` Map(String, Nullable(Int32)), `int_map_array` Array(Map(String, Nullable(Int32))), `nested_Struct` Tuple(Nullable(Int32), Array(Nullable(Int32)), Tuple(Array(Array(Tuple(Nullable(Int32), Nullable(String))))), Map(String, Tuple(Tuple(Array(Nullable(Float64))))))

View File

@ -1 +1 @@
`id` Nullable(Int64), `int_array` Array(Nullable(Int32)), `int_array_Array` Array(Array(Nullable(Int32))), `int_map` Map(String, Nullable(Int32)), `int_Map_Array` Array(Map(String, Nullable(Int32))), `nested_struct` Tuple(Nullable(Int32), Array(Nullable(Int32)), Tuple(Array(Array(Tuple(Nullable(Int32))))), Map(String, Tuple(Tuple(Array(Nullable(Float64)))))) `id` Nullable(Int64), `int_array` Array(Nullable(Int32)), `int_array_Array` Array(Array(Nullable(Int32))), `int_map` Map(String, Nullable(Int32)), `int_Map_Array` Array(Map(String, Nullable(Int32))), `nested_struct` Tuple(Nullable(Int32), Array(Nullable(Int32)), Tuple(Array(Array(Tuple(Nullable(Int32), Nullable(String))))), Map(String, Tuple(Tuple(Array(Nullable(Float64))))))