mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 00:52:02 +00:00
Avro UUID support
This commit is contained in:
parent
bcf086e592
commit
fc1ae85600
2
contrib/avro
vendored
2
contrib/avro
vendored
@ -1 +1 @@
|
|||||||
Subproject commit 6cfcf6c24293af100d523b89b61d1ab216fa4735
|
Subproject commit 92caca2d42fc9a97e34e95f963593539d32ed331
|
@ -23,6 +23,7 @@
|
|||||||
#include <DataTypes/DataTypeNullable.h>
|
#include <DataTypes/DataTypeNullable.h>
|
||||||
#include <DataTypes/DataTypeString.h>
|
#include <DataTypes/DataTypeString.h>
|
||||||
#include <DataTypes/DataTypeTuple.h>
|
#include <DataTypes/DataTypeTuple.h>
|
||||||
|
#include <DataTypes/DataTypeUUID.h>
|
||||||
#include <DataTypes/IDataType.h>
|
#include <DataTypes/IDataType.h>
|
||||||
#include <DataTypes/getLeastSupertype.h>
|
#include <DataTypes/getLeastSupertype.h>
|
||||||
|
|
||||||
@ -176,6 +177,19 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node
|
|||||||
{
|
{
|
||||||
case avro::AVRO_STRING: [[fallthrough]];
|
case avro::AVRO_STRING: [[fallthrough]];
|
||||||
case avro::AVRO_BYTES:
|
case avro::AVRO_BYTES:
|
||||||
|
if (target.isUUID())
|
||||||
|
{
|
||||||
|
return [tmp = std::string()](IColumn & column, avro::Decoder & decoder) mutable
|
||||||
|
{
|
||||||
|
decoder.decodeString(tmp);
|
||||||
|
if (tmp.length() != 36)
|
||||||
|
throw Exception(std::string("Cannot parse uuid ") + tmp, ErrorCodes::CANNOT_PARSE_UUID);
|
||||||
|
|
||||||
|
UUID uuid;
|
||||||
|
parseUUID(reinterpret_cast<const UInt8 *>(tmp.data()), std::reverse_iterator<UInt8 *>(reinterpret_cast<UInt8 *>(&uuid) + 16));
|
||||||
|
assert_cast<DataTypeUUID::ColumnType &>(column).insertValue(uuid);
|
||||||
|
};
|
||||||
|
}
|
||||||
if (target.isString() || target.isFixedString())
|
if (target.isString() || target.isFixedString())
|
||||||
{
|
{
|
||||||
return [tmp = std::string()](IColumn & column, avro::Decoder & decoder) mutable
|
return [tmp = std::string()](IColumn & column, avro::Decoder & decoder) mutable
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include <DataTypes/DataTypeEnum.h>
|
#include <DataTypes/DataTypeEnum.h>
|
||||||
#include <DataTypes/DataTypeLowCardinality.h>
|
#include <DataTypes/DataTypeLowCardinality.h>
|
||||||
#include <DataTypes/DataTypeNullable.h>
|
#include <DataTypes/DataTypeNullable.h>
|
||||||
|
#include <DataTypes/DataTypeUUID.h>
|
||||||
|
|
||||||
#include <Columns/ColumnArray.h>
|
#include <Columns/ColumnArray.h>
|
||||||
#include <Columns/ColumnFixedString.h>
|
#include <Columns/ColumnFixedString.h>
|
||||||
@ -207,6 +208,18 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF
|
|||||||
encoder.encodeEnum(enum_mapping.at(enum_value));
|
encoder.encodeEnum(enum_mapping.at(enum_value));
|
||||||
}};
|
}};
|
||||||
}
|
}
|
||||||
|
case TypeIndex::UUID:
|
||||||
|
{
|
||||||
|
auto schema = avro::StringSchema();
|
||||||
|
schema.root()->setLogicalType(avro::LogicalType(avro::LogicalType::UUID));
|
||||||
|
return {schema, [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
|
||||||
|
{
|
||||||
|
const auto & uuid = assert_cast<const DataTypeUUID::ColumnType &>(column).getElement(row_num);
|
||||||
|
std::array<UInt8, 36> s;
|
||||||
|
formatUUID(std::reverse_iterator<const UInt8 *>(reinterpret_cast<const UInt8 *>(&uuid) + 16), s.data());
|
||||||
|
encoder.encodeBytes(reinterpret_cast<const uint8_t *>(s.data()), s.size());
|
||||||
|
}};
|
||||||
|
}
|
||||||
case TypeIndex::Array:
|
case TypeIndex::Array:
|
||||||
{
|
{
|
||||||
const auto & array_type = assert_cast<const DataTypeArray &>(*data_type);
|
const auto & array_type = assert_cast<const DataTypeArray &>(*data_type);
|
||||||
|
@ -14,8 +14,8 @@
|
|||||||
"79cd909892d7e7ade1987cc7422628ba"
|
"79cd909892d7e7ade1987cc7422628ba"
|
||||||
"79cd909892d7e7ade1987cc7422628ba"
|
"79cd909892d7e7ade1987cc7422628ba"
|
||||||
= logical_types
|
= logical_types
|
||||||
"2019-12-20","2020-01-10 07:31:56.227","2020-01-10 07:31:56.227000"
|
"2019-12-20","2020-01-10 07:31:56.227","2020-01-10 07:31:56.227000","7c856fd6-005f-46c7-a7b5-3a082ef6c659"
|
||||||
18250,1578641516227,1578641516227000
|
18250,1578641516227,1578641516227000,"7c856fd6-005f-46c7-a7b5-3a082ef6c659"
|
||||||
= references
|
= references
|
||||||
"a1","c1"
|
"a1","c1"
|
||||||
"a2","c2"
|
"a2","c2"
|
||||||
@ -52,7 +52,7 @@ not found
|
|||||||
= complex
|
= complex
|
||||||
"A","t","['s1','s2']","[['a1'],['a2']]","s1",\N,"79cd909892d7e7ade1987cc7422628ba"
|
"A","t","['s1','s2']","[['a1'],['a2']]","s1",\N,"79cd909892d7e7ade1987cc7422628ba"
|
||||||
= logical_types
|
= logical_types
|
||||||
"2019-12-20","2020-01-10 07:31:56.227","2020-01-10 07:31:56.227000"
|
"2019-12-20","2020-01-10 07:31:56.227","2020-01-10 07:31:56.227000","7c856fd6-005f-46c7-a7b5-3a082ef6c659"
|
||||||
= other
|
= other
|
||||||
0
|
0
|
||||||
1000
|
1000
|
||||||
|
@ -21,8 +21,8 @@ cat $DATA_DIR/complex.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-fo
|
|||||||
cat $DATA_DIR/complex.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "g_fixed FixedString(32)" -q 'select * from table'
|
cat $DATA_DIR/complex.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "g_fixed FixedString(32)" -q 'select * from table'
|
||||||
|
|
||||||
echo = logical_types
|
echo = logical_types
|
||||||
cat $DATA_DIR/logical_types.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "a_date Date, b_timestamp_millis DateTime64(3, 'UTC'), c_timestamp_micros DateTime64(6, 'UTC')" -q 'select * from table'
|
cat $DATA_DIR/logical_types.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "a_date Date, b_timestamp_millis DateTime64(3, 'UTC'), c_timestamp_micros DateTime64(6, 'UTC'), d_uuid UUID" -q 'select * from table'
|
||||||
cat $DATA_DIR/logical_types.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S 'a_date Int32, b_timestamp_millis Int64, c_timestamp_micros Int64' -q 'select * from table'
|
cat $DATA_DIR/logical_types.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S 'a_date Int32, b_timestamp_millis Int64, c_timestamp_micros Int64, d_uuid UUID' -q 'select * from table'
|
||||||
|
|
||||||
echo = references
|
echo = references
|
||||||
cat $DATA_DIR/references.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "a String, c String" -q 'select * from table'
|
cat $DATA_DIR/references.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "a String, c String" -q 'select * from table'
|
||||||
@ -76,8 +76,8 @@ S2="a_enum_to_string String, b_enum_to_enum Enum('t' = 1, 'f' = 0), c_array_stri
|
|||||||
echo "\"A\",\"t\",\"['s1','s2']\",\"[['a1'],['a2']]\",\"s1\",\N,\"79cd909892d7e7ade1987cc7422628ba\"" | ${CLICKHOUSE_LOCAL} --input-format CSV -S "$S2" -q "select * from table format Avro" | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "$S2" -q 'select * from table'
|
echo "\"A\",\"t\",\"['s1','s2']\",\"[['a1'],['a2']]\",\"s1\",\N,\"79cd909892d7e7ade1987cc7422628ba\"" | ${CLICKHOUSE_LOCAL} --input-format CSV -S "$S2" -q "select * from table format Avro" | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "$S2" -q 'select * from table'
|
||||||
|
|
||||||
echo = logical_types
|
echo = logical_types
|
||||||
S3="a_date Date, b_timestamp_millis DateTime64(3, 'UTC'), c_timestamp_micros DateTime64(6, 'UTC')"
|
S3="a_date Date, b_timestamp_millis DateTime64(3, 'UTC'), c_timestamp_micros DateTime64(6, 'UTC'), d_uuid UUID"
|
||||||
echo '"2019-12-20","2020-01-10 07:31:56.227","2020-01-10 07:31:56.227000"' | ${CLICKHOUSE_LOCAL} --input-format CSV -S "$S3" -q "select * from table format Avro" | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "$S3" -q 'select * from table'
|
echo '"2019-12-20","2020-01-10 07:31:56.227","2020-01-10 07:31:56.227000","7c856fd6-005f-46c7-a7b5-3a082ef6c659"' | ${CLICKHOUSE_LOCAL} --input-format CSV -S "$S3" -q "select * from table format Avro" | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "$S3" -q 'select * from table'
|
||||||
|
|
||||||
echo = other
|
echo = other
|
||||||
S4="a Int64"
|
S4="a Int64"
|
||||||
|
Binary file not shown.
@ -4,6 +4,7 @@
|
|||||||
"fields": [
|
"fields": [
|
||||||
{"name": "a_date", "type": { "type": "int", "logicalType": "date"}},
|
{"name": "a_date", "type": { "type": "int", "logicalType": "date"}},
|
||||||
{"name": "b_timestamp_millis", "type": { "type": "long", "logicalType": "timestamp-millis"}},
|
{"name": "b_timestamp_millis", "type": { "type": "long", "logicalType": "timestamp-millis"}},
|
||||||
{"name": "c_timestamp_micros", "type": { "type": "long", "logicalType": "timestamp-micros"}}
|
{"name": "c_timestamp_micros", "type": { "type": "long", "logicalType": "timestamp-micros"}},
|
||||||
|
{"name": "d_uuid", "type": { "type": "string", "logicalType": "uuid"}}
|
||||||
]
|
]
|
||||||
}
|
}
|
@ -1 +1 @@
|
|||||||
{"a_date":18250,"b_timestamp_millis":1578641516227,"c_timestamp_micros":1578641516227000}
|
{"a_date":18250,"b_timestamp_millis":1578641516227,"c_timestamp_micros":1578641516227000, "d_uuid":"7c856fd6-005f-46c7-a7b5-3a082ef6c659"}
|
||||||
|
Loading…
Reference in New Issue
Block a user