Support structs in Arrow/Parquet/ORC

This commit is contained in:
Pavel Kruglov 2021-05-20 16:47:12 +03:00
parent be11ff0820
commit 235e3e2f5b
13 changed files with 237 additions and 21 deletions

View File

@ -7,15 +7,17 @@
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeTuple.h>
#include <common/DateLUTImpl.h>
#include <common/types.h>
#include <Core/Block.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnTuple.h>
#include <Interpreters/castColumn.h>
#include <algorithm>
#include <DataTypes/DataTypeLowCardinality.h>
namespace DB
@ -309,8 +311,6 @@ namespace DB
break;
case arrow::Type::LIST:
{
const auto * list_type = static_cast<arrow::ListType *>(arrow_column->type().get());
auto list_nested_type = list_type->value_type();
arrow::ArrayVector array_vector;
array_vector.reserve(arrow_column->num_chunks());
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
@ -326,6 +326,25 @@ namespace DB
fillOffsetsFromArrowListColumn(arrow_column, column_array.getOffsetsColumn());
break;
}
case arrow::Type::STRUCT:
{
ColumnTuple & column_tuple = typeid_cast<ColumnTuple &>(internal_column);
int fields_count = column_tuple.tupleSize();
std::vector<arrow::ArrayVector> nested_arrow_columns(fields_count);
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{
arrow::StructArray & struct_chunk = static_cast<arrow::StructArray &>(*(arrow_column->chunk(chunk_i)));
for (int i = 0; i < fields_count; ++i)
nested_arrow_columns[i].emplace_back(struct_chunk.field(i));
}
for (int i = 0; i != fields_count; ++i)
{
auto nested_arrow_column = std::make_shared<arrow::ChunkedArray>(nested_arrow_columns[i]);
readColumnFromArrowColumn(nested_arrow_column, column_tuple.getColumn(i), column_name, format_name, false);
}
break;
}
# define DISPATCH(ARROW_NUMERIC_TYPE, CPP_NUMERIC_TYPE) \
case ARROW_NUMERIC_TYPE: \
fillColumnWithNumericData<CPP_NUMERIC_TYPE>(arrow_column, internal_column); \
@ -372,6 +391,29 @@ namespace DB
return std::make_shared<DataTypeArray>(getInternalType(list_nested_type, array_type->getNestedType(), column_name, format_name));
}
if (arrow_type->id() == arrow::Type::STRUCT)
{
const auto * struct_type = static_cast<arrow::StructType *>(arrow_type.get());
const DataTypeTuple * tuple_type = typeid_cast<const DataTypeTuple *>(column_type.get());
if (!tuple_type)
throw Exception{"Cannot convert arrow STRUCT type to a not Tuple ClickHouse type " + column_type->getName(), ErrorCodes::CANNOT_CONVERT_TYPE};
const DataTypes & tuple_nested_types = tuple_type->getElements();
int internal_fields_num = tuple_nested_types.size();
/// If internal column has less elements then arrow struct, we will select only first internal_fields_num columns.
if (internal_fields_num > struct_type->num_fields())
throw Exception{
"Cannot convert arrow STRUCT with " + std::to_string(struct_type->num_fields()) + " fields to a ClickHouse Tuple with "
+ std::to_string(internal_fields_num) + " elements " + column_type->getName(),
ErrorCodes::CANNOT_CONVERT_TYPE};
DataTypes nested_types;
for (int i = 0; i < internal_fields_num; ++i)
nested_types.push_back(getInternalType(struct_type->field(i)->type(), tuple_nested_types[i], column_name, format_name));
return std::make_shared<DataTypeTuple>(std::move(nested_types));
}
if (const auto * internal_type_it = std::find_if(arrow_type_to_internal_type.begin(), arrow_type_to_internal_type.end(),
[=](auto && elem) { return elem.first == arrow_type->id(); });
internal_type_it != arrow_type_to_internal_type.end())

View File

@ -6,11 +6,13 @@
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnTuple.h>
#include <Core/callOnTypeIndex.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <Processors/Formats/IOutputFormat.h>
#include <arrow/api.h>
#include <arrow/builder.h>
@ -113,7 +115,7 @@ namespace DB
size_t start,
size_t end)
{
const auto * column_array = static_cast<const ColumnArray *>(column.get());
const auto * column_array = typeid_cast<const ColumnArray *>(column.get());
ColumnPtr nested_column = column_array->getDataPtr();
DataTypePtr nested_type = typeid_cast<const DataTypeArray *>(column_type.get())->getNestedType();
const auto & offsets = column_array->getOffsets();
@ -124,13 +126,41 @@ namespace DB
for (size_t array_idx = start; array_idx < end; ++array_idx)
{
/// Start new array
/// Start new array.
components_status = builder.Append();
checkStatus(components_status, nested_column->getName(), format_name);
fillArrowArray(column_name, nested_column, nested_type, null_bytemap, value_builder, format_name, offsets[array_idx - 1], offsets[array_idx]);
}
}
static void fillArrowArrayWithTupleColumnData(
const String & column_name,
ColumnPtr & column,
const std::shared_ptr<const IDataType> & column_type,
const PaddedPODArray<UInt8> * null_bytemap,
arrow::ArrayBuilder * array_builder,
String format_name,
size_t start,
size_t end)
{
const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get());
const auto & nested_types = typeid_cast<const DataTypeTuple *>(column_type.get())->getElements();
arrow::StructBuilder & builder = assert_cast<arrow::StructBuilder &>(*array_builder);
for (size_t i = 0; i != column_tuple->tupleSize(); ++i)
{
ColumnPtr nested_column = column_tuple->getColumnPtr(i);
fillArrowArray(column_name + "." + std::to_string(i), nested_column, nested_types[i], null_bytemap, builder.field_builder(i), format_name, start, end);
}
for (size_t i = start; i != end; ++i)
{
auto status = builder.Append();
checkStatus(status, column->getName(), format_name);
}
}
template <typename ColumnType>
static void fillArrowArrayWithStringColumnData(
ColumnPtr write_column,
@ -251,6 +281,10 @@ namespace DB
{
fillArrowArrayWithArrayColumnData(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end);
}
else if ("Tuple" == column_type_name)
{
fillArrowArrayWithTupleColumnData(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end);
}
else if (isDecimal(column_type))
{
auto fill_decimal = [&](const auto & types) -> bool
@ -351,6 +385,19 @@ namespace DB
return arrow::list(nested_arrow_type);
}
if (isTuple(column_type))
{
const auto & nested_types = typeid_cast<const DataTypeTuple *>(column_type.get())->getElements();
std::vector<std::shared_ptr<arrow::Field>> nested_fields;
for (size_t i = 0; i != nested_types.size(); ++i)
{
String name = column_name + "." + std::to_string(i);
auto nested_arrow_type = getArrowType(nested_types[i], name, format_name, is_column_nullable);
nested_fields.push_back(std::make_shared<arrow::Field>(name, nested_arrow_type, *is_column_nullable));
}
return arrow::struct_(std::move(nested_fields));
}
const std::string type_name = column_type->getFamilyName();
if (const auto * arrow_type_it = std::find_if(
internal_type_to_arrow_type.begin(),

View File

@ -67,7 +67,7 @@ void ORCBlockInputFormat::resetParser()
stripe_current = 0;
}
size_t countIndicesForType(std::shared_ptr<arrow::DataType> type)
static size_t countIndicesForType(std::shared_ptr<arrow::DataType> type)
{
if (type->id() == arrow::Type::LIST)
return countIndicesForType(static_cast<arrow::ListType *>(type.get())->value_type()) + 1;

View File

@ -10,12 +10,14 @@
#include <Columns/ColumnVector.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
namespace DB
{
@ -46,15 +48,9 @@ void ORCOutputStream::write(const void* buf, size_t length)
ORCBlockOutputFormat::ORCBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_)
: IOutputFormat(header_, out_), format_settings{format_settings_}, output_stream(out_), data_types(header_.getDataTypes())
{
schema = orc::createStructType();
options.setCompression(orc::CompressionKind::CompressionKind_NONE);
size_t columns_count = header_.columns();
for (size_t i = 0; i != columns_count; ++i)
schema->addStructField(header_.safeGetByPosition(i).name, getORCType(data_types[i]));
writer = orc::createWriter(*schema, &output_stream, options);
}
ORC_UNIQUE_PTR<orc::Type> ORCBlockOutputFormat::getORCType(const DataTypePtr & type)
ORC_UNIQUE_PTR<orc::Type> ORCBlockOutputFormat::getORCType(const DataTypePtr & type, const std::string & column_name)
{
switch (type->getTypeId())
{
@ -102,12 +98,12 @@ ORC_UNIQUE_PTR<orc::Type> ORCBlockOutputFormat::getORCType(const DataTypePtr & t
}
case TypeIndex::Nullable:
{
return getORCType(removeNullable(type));
return getORCType(removeNullable(type), column_name);
}
case TypeIndex::Array:
{
const auto * array_type = typeid_cast<const DataTypeArray *>(type.get());
return orc::createListType(getORCType(array_type->getNestedType()));
return orc::createListType(getORCType(array_type->getNestedType(), column_name));
}
case TypeIndex::Decimal32:
{
@ -124,6 +120,18 @@ ORC_UNIQUE_PTR<orc::Type> ORCBlockOutputFormat::getORCType(const DataTypePtr & t
const auto * decimal_type = typeid_cast<const DataTypeDecimal<Decimal128> *>(type.get());
return orc::createDecimalType(decimal_type->getPrecision(), decimal_type->getScale());
}
case TypeIndex::Tuple:
{
const auto * tuple_type = typeid_cast<const DataTypeTuple *>(type.get());
const auto & nested_types = tuple_type->getElements();
auto struct_type = orc::createStructType();
for (size_t i = 0; i < nested_types.size(); ++i)
{
String name = column_name + "." + std::to_string(i);
struct_type->addStructField(name, getORCType(nested_types[i], name));
}
return struct_type;
}
default:
{
throw Exception("Type " + type->getName() + " is not supported for ORC output format", ErrorCodes::ILLEGAL_COLUMN);
@ -149,6 +157,8 @@ void ORCBlockOutputFormat::writeNumbers(
number_orc_column.notNull[i] = 0;
continue;
}
else
number_orc_column.notNull[i] = 1;
number_orc_column.data[i] = convert(number_column.getElement(i));
}
number_orc_column.numElements = number_column.size();
@ -175,6 +185,9 @@ void ORCBlockOutputFormat::writeDecimals(
decimal_orc_column.notNull[i] = 0;
continue;
}
else
decimal_orc_column.notNull[i] = 1;
decimal_orc_column.values[i] = convert(decimal_column.getElement(i).value);
}
decimal_orc_column.numElements = decimal_column.size();
@ -197,6 +210,9 @@ void ORCBlockOutputFormat::writeStrings(
string_orc_column.notNull[i] = 0;
continue;
}
else
string_orc_column.notNull[i] = 1;
const StringRef & string = string_column.getDataAt(i);
string_orc_column.data[i] = const_cast<char *>(string.data);
string_orc_column.length[i] = string.size;
@ -223,6 +239,9 @@ void ORCBlockOutputFormat::writeDateTimes(
timestamp_orc_column.notNull[i] = 0;
continue;
}
else
timestamp_orc_column.notNull[i] = 1;
timestamp_orc_column.data[i] = get_seconds(timestamp_column.getElement(i));
timestamp_orc_column.nanoseconds[i] = get_nanoseconds(timestamp_column.getElement(i));
}
@ -235,11 +254,10 @@ void ORCBlockOutputFormat::writeColumn(
DataTypePtr & type,
const PaddedPODArray<UInt8> * null_bytemap)
{
if (null_bytemap)
{
orc_column.hasNulls = true;
orc_column.notNull.resize(column.size());
}
if (null_bytemap)
orc_column.hasNulls = true;
switch (type->getTypeId())
{
case TypeIndex::Int8:
@ -374,12 +392,25 @@ void ORCBlockOutputFormat::writeColumn(
for (size_t i = 0; i != list_column.size(); ++i)
{
list_orc_column.offsets[i + 1] = offsets[i];
list_orc_column.notNull[i] = 1;
}
orc::ColumnVectorBatch & nested_orc_column = *list_orc_column.elements;
writeColumn(nested_orc_column, list_column.getData(), nested_type, null_bytemap);
list_orc_column.numElements = list_column.size();
break;
}
case TypeIndex::Tuple:
{
orc::StructVectorBatch & struct_orc_column = dynamic_cast<orc::StructVectorBatch &>(orc_column);
const auto & tuple_column = assert_cast<const ColumnTuple &>(column);
auto nested_types = typeid_cast<const DataTypeTuple *>(type.get())->getElements();
for (size_t i = 0; i != tuple_column.size(); ++i)
struct_orc_column.notNull[i] = 1;
for (size_t i = 0; i != tuple_column.tupleSize(); ++i)
writeColumn(*struct_orc_column.fields[i], tuple_column.getColumn(i), nested_types[i], null_bytemap);
break;
}
default:
throw Exception("Type " + type->getName() + " is not supported for ORC output format", ErrorCodes::ILLEGAL_COLUMN);
}
@ -409,6 +440,8 @@ size_t ORCBlockOutputFormat::getMaxColumnSize(Chunk & chunk)
void ORCBlockOutputFormat::consume(Chunk chunk)
{
if (!writer)
prepareWriter();
size_t columns_num = chunk.getNumColumns();
size_t rows_num = chunk.getNumRows();
/// getMaxColumnSize is needed to write arrays.
@ -425,9 +458,23 @@ void ORCBlockOutputFormat::consume(Chunk chunk)
void ORCBlockOutputFormat::finalize()
{
if (!writer)
prepareWriter();
writer->close();
}
void ORCBlockOutputFormat::prepareWriter()
{
const Block & header = getPort(PortKind::Main).getHeader();
schema = orc::createStructType();
options.setCompression(orc::CompressionKind::CompressionKind_NONE);
size_t columns_count = header.columns();
for (size_t i = 0; i != columns_count; ++i)
schema->addStructField(header.safeGetByPosition(i).name, getORCType(data_types[i], header.safeGetByPosition(i).name));
writer = orc::createWriter(*schema, &output_stream, options);
}
void registerOutputFormatProcessorORC(FormatFactory & factory)
{
factory.registerOutputFormatProcessor("ORC", [](

View File

@ -43,7 +43,7 @@ public:
void finalize() override;
private:
ORC_UNIQUE_PTR<orc::Type> getORCType(const DataTypePtr & type);
ORC_UNIQUE_PTR<orc::Type> getORCType(const DataTypePtr & type, const std::string & column_name);
/// ConvertFunc is needed for type UInt8, because firstly UInt8 (char8_t) must be
/// converted to unsigned char (bugprone-signed-char-misuse in clang).
@ -71,6 +71,8 @@ private:
size_t getColumnSize(const IColumn & column, DataTypePtr & type);
size_t getMaxColumnSize(Chunk & chunk);
void prepareWriter();
const FormatSettings format_settings;
ORCOutputStream output_stream;
DataTypes data_types;

View File

@ -67,6 +67,24 @@ void ParquetBlockInputFormat::resetParser()
row_group_current = 0;
}
static size_t countIndicesForType(std::shared_ptr<arrow::DataType> type)
{
if (type->id() == arrow::Type::LIST)
return countIndicesForType(static_cast<arrow::ListType *>(type.get())->value_type());
int indices = 0;
if (type->id() == arrow::Type::STRUCT)
{
auto * struct_type = static_cast<arrow::StructType *>(type.get());
for (int i = 0; i != struct_type->num_fields(); ++i)
indices += countIndicesForType(struct_type->field(i)->type());
}
else
indices = 1;
return indices;
}
void ParquetBlockInputFormat::prepareReader()
{
THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(asArrowFile(in), arrow::default_memory_pool(), &file_reader));
@ -76,11 +94,14 @@ void ParquetBlockInputFormat::prepareReader()
std::shared_ptr<arrow::Schema> schema;
THROW_ARROW_NOT_OK(file_reader->GetSchema(&schema));
int index = 0;
for (int i = 0; i < schema->num_fields(); ++i)
{
if (getPort().getHeader().has(schema->field(i)->name()))
{
column_indices.push_back(i);
int indexes_count = countIndicesForType(schema->field(i)->type());
for (int j = 0; j != indexes_count; ++j)
column_indices.push_back(index++);
}
}
}

View File

@ -0,0 +1,2 @@
(1,2) ('1','2') ((1,'1'),1) ((1,2),('1','2')) ([1,2,3],1) (([1,2,3],[1,2,3]),([[1,2,3],[1,2,3]],1)) [([[1,2,3],[1,2,3]],([(1,2),(1,2)],1))]
(1,2) ('1','2') ((1,'1'),1) ((1,2),('1','2')) ([1,2,3],1) (([1,2,3],[1,2,3]),([[1,2,3],[1,2,3]],1)) [([[1,2,3],[1,2,3]],([(1,2),(1,2)],1))]

View File

@ -0,0 +1,17 @@
#!/usr/bin/env bash
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS orc_tuples"
${CLICKHOUSE_CLIENT} --query="CREATE TABLE orc_tuples (t1 Tuple(UInt32, UInt32), t2 Tuple(String, String), t3 Tuple(Tuple(UInt32, String), UInt32), t4 Tuple(Tuple(UInt32, UInt32), Tuple(String, String)), t5 Tuple(Array(UInt32), UInt32), t6 Tuple(Tuple(Array(UInt32), Array(UInt32)), Tuple(Array(Array(UInt32)), UInt32)), t7 Array(Tuple(Array(Array(UInt32)), Tuple(Array(Tuple(UInt32, UInt32)), UInt32)))) ENGINE=Memory()"
${CLICKHOUSE_CLIENT} --query="INSERT INTO orc_tuples VALUES ((1, 2), ('1', '2'), ((1, '1'), 1), ((1, 2), ('1', '2')), ([1,2,3], 1), (([1,2,3], [1,2,3]), ([[1,2,3], [1,2,3]], 1)), [([[1,2,3], [1,2,3]], ([(1, 2), (1, 2)], 1))])"
${CLICKHOUSE_CLIENT} --query="SELECT * FROM orc_tuples FORMAT ORC" > "${CLICKHOUSE_TMP}"/tuples.orc
cat "${CLICKHOUSE_TMP}"/tuples.orc | ${CLICKHOUSE_CLIENT} -q "INSERT INTO orc_tuples FORMAT ORC"
${CLICKHOUSE_CLIENT} --query="SELECT * FROM orc_tuples"
${CLICKHOUSE_CLIENT} --query="DROP TABLE orc_tuples"

View File

@ -0,0 +1,2 @@
(1,2) ('1','2') ((1,'1'),1) ((1,2),('1','2')) ([1,2,3],1) (([1,2,3],[1,2,3]),([[1,2,3],[1,2,3]],1)) [([[1,2,3],[1,2,3]],([(1,2),(1,2)],1))]
(1,2) ('1','2') ((1,'1'),1) ((1,2),('1','2')) ([1,2,3],1) (([1,2,3],[1,2,3]),([[1,2,3],[1,2,3]],1)) [([[1,2,3],[1,2,3]],([(1,2),(1,2)],1))]

View File

@ -0,0 +1,17 @@
#!/usr/bin/env bash
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_tuples"
${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_tuples (t1 Tuple(UInt32, UInt32), t2 Tuple(String, String), t3 Tuple(Tuple(UInt32, String), UInt32), t4 Tuple(Tuple(UInt32, UInt32), Tuple(String, String)), t5 Tuple(Array(UInt32), UInt32), t6 Tuple(Tuple(Array(UInt32), Array(UInt32)), Tuple(Array(Array(UInt32)), UInt32)), t7 Array(Tuple(Array(Array(UInt32)), Tuple(Array(Tuple(UInt32, UInt32)), UInt32)))) ENGINE=Memory()"
${CLICKHOUSE_CLIENT} --query="INSERT INTO parquet_tuples VALUES ((1, 2), ('1', '2'), ((1, '1'), 1), ((1, 2), ('1', '2')), ([1,2,3], 1), (([1,2,3], [1,2,3]), ([[1,2,3], [1,2,3]], 1)), [([[1,2,3], [1,2,3]], ([(1, 2), (1, 2)], 1))])"
${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_tuples FORMAT Parquet" > "${CLICKHOUSE_TMP}"/tuples.parquet
cat "${CLICKHOUSE_TMP}"/tuples.parquet | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_tuples FORMAT Parquet"
${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_tuples"
${CLICKHOUSE_CLIENT} --query="DROP TABLE parquet_tuples"

View File

@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS orc_arrays"
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_arrays"
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_arrays (arr1 Array(Int8), arr2 Array(UInt8), arr3 Array(Int16), arr4 Array(UInt16), arr5 Array(Int32), arr6 Array(UInt32), arr7 Array(Int64), arr8 Array(UInt64), arr9 Array(String), arr10 Array(FixedString(4)), arr11 Array(Float32), arr12 Array(Float64), arr13 Array(Date), arr14 Array(Datetime)) ENGINE=Memory()"
${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_arrays VALUES ([1,-2,3],[1,2,3],[100,-200,300],[100,200,300],[10000000,-20000000,30000000],[10000000,2000000,3000000],[100000000000000,-200000000000,3000000000000],[100000000000000,20000000000000,3000000000000],['Some string','Some string','Some string'],['0000','1111','2222'],[42.42,424.2,0.4242],[424242.424242,4242042420.242424,42],['2000-01-01','2001-01-01','2002-01-01'],['2000-01-01 00:00:00','2001-01-01 00:00:00','2002-01-01 00:00:00']),([],[],[],[],[],[],[],[],[],[],[],[],[],[])"

View File

@ -0,0 +1,2 @@
(1,2) ('1','2') ((1,'1'),1) ((1,2),('1','2')) ([1,2,3],1) (([1,2,3],[1,2,3]),([[1,2,3],[1,2,3]],1)) [([[1,2,3],[1,2,3]],([(1,2),(1,2)],1))]
(1,2) ('1','2') ((1,'1'),1) ((1,2),('1','2')) ([1,2,3],1) (([1,2,3],[1,2,3]),([[1,2,3],[1,2,3]],1)) [([[1,2,3],[1,2,3]],([(1,2),(1,2)],1))]

View File

@ -0,0 +1,17 @@
#!/usr/bin/env bash
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_tuples"
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_tuples (t1 Tuple(UInt32, UInt32), t2 Tuple(String, String), t3 Tuple(Tuple(UInt32, String), UInt32), t4 Tuple(Tuple(UInt32, UInt32), Tuple(String, String)), t5 Tuple(Array(UInt32), UInt32), t6 Tuple(Tuple(Array(UInt32), Array(UInt32)), Tuple(Array(Array(UInt32)), UInt32)), t7 Array(Tuple(Array(Array(UInt32)), Tuple(Array(Tuple(UInt32, UInt32)), UInt32)))) ENGINE=Memory()"
${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_tuples VALUES ((1, 2), ('1', '2'), ((1, '1'), 1), ((1, 2), ('1', '2')), ([1,2,3], 1), (([1,2,3], [1,2,3]), ([[1,2,3], [1,2,3]], 1)), [([[1,2,3], [1,2,3]], ([(1, 2), (1, 2)], 1))])"
${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_tuples FORMAT Arrow" > "${CLICKHOUSE_TMP}"/tuples.arrow
cat "${CLICKHOUSE_TMP}"/tuples.arrow | ${CLICKHOUSE_CLIENT} -q "INSERT INTO arrow_tuples FORMAT Arrow"
${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_tuples"
${CLICKHOUSE_CLIENT} --query="DROP TABLE arrow_tuples"