Flatten list type arrow chunks on parsing

This commit is contained in:
Arthur Passos 2022-11-16 12:27:01 -03:00
parent 00554ecd00
commit e1236340b5

View File

@ -329,9 +329,8 @@ static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr<arrow::ChunkedAr
arrow::ListArray & list_chunk = dynamic_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i)));
auto arrow_offsets_array = list_chunk.offsets();
auto & arrow_offsets = dynamic_cast<arrow::Int32Array &>(*arrow_offsets_array);
auto start = offsets_data.back();
for (int64_t i = 1; i < arrow_offsets.length(); ++i)
offsets_data.emplace_back(start + arrow_offsets.Value(i));
offsets_data.emplace_back(arrow_offsets.Value(i));
}
return offsets_column;
}
@ -467,8 +466,12 @@ static std::shared_ptr<arrow::ChunkedArray> getNestedArrowColumn(std::shared_ptr
for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i)
{
arrow::ListArray & list_chunk = dynamic_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i)));
std::shared_ptr<arrow::Array> chunk = list_chunk.values();
array_vector.emplace_back(std::move(chunk));
auto flatten_result = list_chunk.Flatten();
if (flatten_result.ok()) {
array_vector.emplace_back(flatten_result.ValueOrDie());
} else {
throw Exception(ErrorCodes::INCORRECT_DATA, "Failed to flatten chunk '{}' of column of type '{}' ", chunk_i, arrow_column->type()->id());
}
}
return std::make_shared<arrow::ChunkedArray>(array_vector);
}