Avro input format support Union with single type

This commit is contained in:
flynn 2023-07-15 16:21:58 +00:00
parent 9df928eb13
commit 386adfad33
4 changed files with 41 additions and 5 deletions

View File

@ -367,14 +367,25 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro
break;
case avro::AVRO_UNION:
{
if (root_node->leaves() == 2
if (root_node->leaves() == 1)
{
auto nested_deserialize = createDeserializeFn(root_node->leafAt(0), target_type);
return [nested_deserialize](IColumn & column, avro::Decoder & decoder)
{
decoder.decodeUnionIndex();
nested_deserialize(column, decoder);
return true;
};
}
/// FIXME Support UNION has more than two datatypes.
else if (
root_node->leaves() == 2
&& (root_node->leafAt(0)->type() == avro::AVRO_NULL || root_node->leafAt(1)->type() == avro::AVRO_NULL))
{
int non_null_union_index = root_node->leafAt(0)->type() == avro::AVRO_NULL ? 1 : 0;
if (target.isNullable())
{
auto nested_deserialize = this->createDeserializeFn(
root_node->leafAt(non_null_union_index), removeNullable(target_type));
auto nested_deserialize = createDeserializeFn(root_node->leafAt(non_null_union_index), removeNullable(target_type));
return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder)
{
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
@ -393,7 +404,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro
}
else if (null_as_default)
{
auto nested_deserialize = this->createDeserializeFn(root_node->leafAt(non_null_union_index), target_type);
auto nested_deserialize = createDeserializeFn(root_node->leafAt(non_null_union_index), target_type);
return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder)
{
int union_index = static_cast<int>(decoder.decodeUnionIndex());
@ -1169,12 +1180,19 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node)
case avro::Type::AVRO_NULL:
return std::make_shared<DataTypeNothing>();
case avro::Type::AVRO_UNION:
if (node->leaves() == 2 && (node->leafAt(0)->type() == avro::Type::AVRO_NULL || node->leafAt(1)->type() == avro::Type::AVRO_NULL))
if (node->leaves() == 1)
{
return avroNodeToDataType(node->leafAt(0));
}
else if (
node->leaves() == 2
&& (node->leafAt(0)->type() == avro::Type::AVRO_NULL || node->leafAt(1)->type() == avro::Type::AVRO_NULL))
{
int nested_leaf_index = node->leafAt(0)->type() == avro::Type::AVRO_NULL ? 1 : 0;
auto nested_type = avroNodeToDataType(node->leafAt(nested_leaf_index));
return nested_type->canBeInsideNullable() ? makeNullable(nested_type) : nested_type;
}
/// FIXME Support UNION has more than two datatypes.
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Avro type UNION is not supported for inserting.");
case avro::Type::AVRO_SYMBOLIC:
return avroNodeToDataType(avro::resolveSymbol(node));

View File

@ -0,0 +1,5 @@
name String
favorite_number Int32
favorite_color String
Alyssa 256 yellow
Ben 7 red

View File

@ -0,0 +1,13 @@
#!/usr/bin/env bash
# Tags: no-parallel, no-fasttest
set -e
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
DATA_DIR=$CUR_DIR/data_avro
$CLICKHOUSE_LOCAL -q "desc file('$DATA_DIR/union_one_type.avro')"
$CLICKHOUSE_LOCAL -q "select * from file('$DATA_DIR/union_one_type.avro')"