Merge pull request #52137 from ucasfl/avro-fix

Avro input format support Union with single type
This commit is contained in:
robot-ch-test-poll1 2023-07-25 04:48:39 +02:00 committed by GitHub
commit 05d4932842
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 41 additions and 5 deletions

View File

@ -369,14 +369,25 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro
break;
case avro::AVRO_UNION:
{
if (root_node->leaves() == 2
if (root_node->leaves() == 1)
{
auto nested_deserialize = createDeserializeFn(root_node->leafAt(0), target_type);
return [nested_deserialize](IColumn & column, avro::Decoder & decoder)
{
decoder.decodeUnionIndex();
nested_deserialize(column, decoder);
return true;
};
}
/// FIXME Support UNION has more than two datatypes.
else if (
root_node->leaves() == 2
&& (root_node->leafAt(0)->type() == avro::AVRO_NULL || root_node->leafAt(1)->type() == avro::AVRO_NULL))
{
int non_null_union_index = root_node->leafAt(0)->type() == avro::AVRO_NULL ? 1 : 0;
if (target.isNullable())
{
auto nested_deserialize = this->createDeserializeFn(
root_node->leafAt(non_null_union_index), removeNullable(target_type));
auto nested_deserialize = createDeserializeFn(root_node->leafAt(non_null_union_index), removeNullable(target_type));
return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder)
{
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
@ -395,7 +406,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro
}
else if (null_as_default)
{
auto nested_deserialize = this->createDeserializeFn(root_node->leafAt(non_null_union_index), target_type);
auto nested_deserialize = createDeserializeFn(root_node->leafAt(non_null_union_index), target_type);
return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder)
{
int union_index = static_cast<int>(decoder.decodeUnionIndex());
@ -1192,12 +1203,19 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node)
case avro::Type::AVRO_NULL:
return std::make_shared<DataTypeNothing>();
case avro::Type::AVRO_UNION:
if (node->leaves() == 2 && (node->leafAt(0)->type() == avro::Type::AVRO_NULL || node->leafAt(1)->type() == avro::Type::AVRO_NULL))
if (node->leaves() == 1)
{
return avroNodeToDataType(node->leafAt(0));
}
else if (
node->leaves() == 2
&& (node->leafAt(0)->type() == avro::Type::AVRO_NULL || node->leafAt(1)->type() == avro::Type::AVRO_NULL))
{
int nested_leaf_index = node->leafAt(0)->type() == avro::Type::AVRO_NULL ? 1 : 0;
auto nested_type = avroNodeToDataType(node->leafAt(nested_leaf_index));
return nested_type->canBeInsideNullable() ? makeNullable(nested_type) : nested_type;
}
/// FIXME Support UNION has more than two datatypes.
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Avro type UNION is not supported for inserting.");
case avro::Type::AVRO_SYMBOLIC:
return avroNodeToDataType(avro::resolveSymbol(node));

View File

@ -0,0 +1,5 @@
name String
favorite_number Int32
favorite_color String
Alyssa 256 yellow
Ben 7 red

View File

@ -0,0 +1,13 @@
#!/usr/bin/env bash
# Tags: no-parallel, no-fasttest
set -e
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
DATA_DIR=$CUR_DIR/data_avro
$CLICKHOUSE_LOCAL -q "desc file('$DATA_DIR/union_one_type.avro')"
$CLICKHOUSE_LOCAL -q "select * from file('$DATA_DIR/union_one_type.avro')"