mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 07:01:59 +00:00
Correct read of Date and UInt16 as DateTime in Arrow format
This commit is contained in:
parent
68df1b4564
commit
d6c1593b22
@ -10,6 +10,7 @@
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <common/DateLUTImpl.h>
|
||||
#include <common/types.h>
|
||||
#include <Core/Block.h>
|
||||
@ -122,7 +123,7 @@ static void fillColumnWithStringData(std::shared_ptr<arrow::ChunkedArray> & arro
|
||||
size_t chars_t_size = 0;
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
arrow::BinaryArray & chunk = static_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i)));
|
||||
arrow::BinaryArray & chunk = assert_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i)));
|
||||
const size_t chunk_length = chunk.length();
|
||||
|
||||
if (chunk_length > 0)
|
||||
@ -137,7 +138,7 @@ static void fillColumnWithStringData(std::shared_ptr<arrow::ChunkedArray> & arro
|
||||
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
arrow::BinaryArray & chunk = static_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i)));
|
||||
arrow::BinaryArray & chunk = assert_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i)));
|
||||
std::shared_ptr<arrow::Buffer> buffer = chunk.value_data();
|
||||
const size_t chunk_length = chunk.length();
|
||||
|
||||
@ -162,7 +163,7 @@ static void fillColumnWithBooleanData(std::shared_ptr<arrow::ChunkedArray> & arr
|
||||
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
arrow::BooleanArray & chunk = static_cast<arrow::BooleanArray &>(*(arrow_column->chunk(chunk_i)));
|
||||
arrow::BooleanArray & chunk = assert_cast<arrow::BooleanArray &>(*(arrow_column->chunk(chunk_i)));
|
||||
/// buffers[0] is a null bitmap and buffers[1] are actual values
|
||||
std::shared_ptr<arrow::Buffer> buffer = chunk.data()->buffers[1];
|
||||
|
||||
@ -179,20 +180,16 @@ static void fillColumnWithDate32Data(std::shared_ptr<arrow::ChunkedArray> & arro
|
||||
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
arrow::Date32Array & chunk = static_cast<arrow::Date32Array &>(*(arrow_column->chunk(chunk_i)));
|
||||
arrow::Date32Array & chunk = assert_cast<arrow::Date32Array &>(*(arrow_column->chunk(chunk_i)));
|
||||
|
||||
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
|
||||
{
|
||||
UInt32 days_num = static_cast<UInt32>(chunk.Value(value_i));
|
||||
|
||||
if (days_num > DATE_LUT_MAX_DAY_NUM)
|
||||
{
|
||||
// TODO: will it rollback correctly?
|
||||
throw Exception
|
||||
{
|
||||
ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE,
|
||||
"Input value {} of a column '{}' is greater than max allowed Date value, which is {}", days_num, internal_column.getName(), DATE_LUT_MAX_DAY_NUM
|
||||
};
|
||||
}
|
||||
throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE,
|
||||
"Input value {} of a column '{}' is greater than max allowed Date value, which is {}",
|
||||
days_num, internal_column.getName(), DATE_LUT_MAX_DAY_NUM);
|
||||
|
||||
column_data.emplace_back(days_num);
|
||||
}
|
||||
@ -206,18 +203,14 @@ static void fillDate32ColumnWithDate32Data(std::shared_ptr<arrow::ChunkedArray>
|
||||
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
arrow::Date32Array & chunk = static_cast<arrow::Date32Array &>(*(arrow_column->chunk(chunk_i)));
|
||||
arrow::Date32Array & chunk = assert_cast<arrow::Date32Array &>(*(arrow_column->chunk(chunk_i)));
|
||||
|
||||
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
|
||||
{
|
||||
Int32 days_num = static_cast<Int32>(chunk.Value(value_i));
|
||||
if (days_num > DATE_LUT_MAX_EXTEND_DAY_NUM)
|
||||
{
|
||||
// TODO: will it rollback correctly?
|
||||
throw Exception{
|
||||
ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE,
|
||||
"Input value {} of a column '{}' is greater than max allowed Date value, which is {}", days_num, internal_column.getName(), DATE_LUT_MAX_DAY_NUM};
|
||||
}
|
||||
throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE,
|
||||
"Input value {} of a column '{}' is greater than max allowed Date value, which is {}", days_num, internal_column.getName(), DATE_LUT_MAX_DAY_NUM);
|
||||
|
||||
column_data.emplace_back(days_num);
|
||||
}
|
||||
@ -232,7 +225,7 @@ static void fillColumnWithDate64Data(std::shared_ptr<arrow::ChunkedArray> & arro
|
||||
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
auto & chunk = static_cast<arrow::Date64Array &>(*(arrow_column->chunk(chunk_i)));
|
||||
auto & chunk = assert_cast<arrow::Date64Array &>(*(arrow_column->chunk(chunk_i)));
|
||||
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
|
||||
{
|
||||
auto timestamp = static_cast<UInt32>(chunk.Value(value_i) / 1000); // Always? in ms
|
||||
@ -248,7 +241,7 @@ static void fillColumnWithTimestampData(std::shared_ptr<arrow::ChunkedArray> & a
|
||||
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
auto & chunk = static_cast<arrow::TimestampArray &>(*(arrow_column->chunk(chunk_i)));
|
||||
auto & chunk = assert_cast<arrow::TimestampArray &>(*(arrow_column->chunk(chunk_i)));
|
||||
const auto & type = static_cast<const ::arrow::TimestampType &>(*chunk.type());
|
||||
|
||||
UInt32 divide = 1;
|
||||
@ -316,9 +309,9 @@ static void fillOffsetsFromArrowListColumn(std::shared_ptr<arrow::ChunkedArray>
|
||||
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
arrow::ListArray & list_chunk = static_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i)));
|
||||
arrow::ListArray & list_chunk = assert_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i)));
|
||||
auto arrow_offsets_array = list_chunk.offsets();
|
||||
auto & arrow_offsets = static_cast<arrow::Int32Array &>(*arrow_offsets_array);
|
||||
auto & arrow_offsets = assert_cast<arrow::Int32Array &>(*arrow_offsets_array);
|
||||
auto start = offsets_data.back();
|
||||
for (int64_t i = 1; i < arrow_offsets.length(); ++i)
|
||||
offsets_data.emplace_back(start + arrow_offsets.Value(i));
|
||||
@ -353,7 +346,8 @@ static void readColumnFromArrowColumn(
|
||||
if (internal_column.isNullable())
|
||||
{
|
||||
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(internal_column);
|
||||
readColumnFromArrowColumn(arrow_column, column_nullable.getNestedColumn(), column_name, format_name, true, dictionary_values);
|
||||
readColumnFromArrowColumn(
|
||||
arrow_column, column_nullable.getNestedColumn(), column_name, format_name, true, dictionary_values);
|
||||
fillByteMapFromArrowColumn(arrow_column, column_nullable.getNullMapColumn());
|
||||
return;
|
||||
}
|
||||
@ -408,7 +402,7 @@ static void readColumnFromArrowColumn(
|
||||
array_vector.reserve(arrow_column->num_chunks());
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
arrow::ListArray & list_chunk = static_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i)));
|
||||
arrow::ListArray & list_chunk = assert_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i)));
|
||||
std::shared_ptr<arrow::Array> chunk = list_chunk.values();
|
||||
array_vector.emplace_back(std::move(chunk));
|
||||
}
|
||||
@ -418,7 +412,9 @@ static void readColumnFromArrowColumn(
|
||||
? assert_cast<ColumnMap &>(internal_column).getNestedColumn()
|
||||
: assert_cast<ColumnArray &>(internal_column);
|
||||
|
||||
readColumnFromArrowColumn(arrow_nested_column, column_array.getData(), column_name, format_name, false, dictionary_values);
|
||||
readColumnFromArrowColumn(
|
||||
arrow_nested_column, column_array.getData(), column_name, format_name, false, dictionary_values);
|
||||
|
||||
fillOffsetsFromArrowListColumn(arrow_column, column_array.getOffsetsColumn());
|
||||
break;
|
||||
}
|
||||
@ -429,7 +425,7 @@ static void readColumnFromArrowColumn(
|
||||
std::vector<arrow::ArrayVector> nested_arrow_columns(fields_count);
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
arrow::StructArray & struct_chunk = static_cast<arrow::StructArray &>(*(arrow_column->chunk(chunk_i)));
|
||||
arrow::StructArray & struct_chunk = assert_cast<arrow::StructArray &>(*(arrow_column->chunk(chunk_i)));
|
||||
for (int i = 0; i < fields_count; ++i)
|
||||
nested_arrow_columns[i].emplace_back(struct_chunk.field(i));
|
||||
}
|
||||
@ -437,7 +433,8 @@ static void readColumnFromArrowColumn(
|
||||
for (int i = 0; i != fields_count; ++i)
|
||||
{
|
||||
auto nested_arrow_column = std::make_shared<arrow::ChunkedArray>(nested_arrow_columns[i]);
|
||||
readColumnFromArrowColumn(nested_arrow_column, column_tuple.getColumn(i), column_name, format_name, false, dictionary_values);
|
||||
readColumnFromArrowColumn(
|
||||
nested_arrow_column, column_tuple.getColumn(i), column_name, format_name, false, dictionary_values);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -445,13 +442,14 @@ static void readColumnFromArrowColumn(
|
||||
{
|
||||
ColumnLowCardinality & column_lc = assert_cast<ColumnLowCardinality &>(internal_column);
|
||||
auto & dict_values = dictionary_values[column_name];
|
||||
|
||||
/// Load dictionary values only once and reuse it.
|
||||
if (!dict_values)
|
||||
{
|
||||
arrow::ArrayVector dict_array;
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
arrow::DictionaryArray & dict_chunk = static_cast<arrow::DictionaryArray &>(*(arrow_column->chunk(chunk_i)));
|
||||
arrow::DictionaryArray & dict_chunk = assert_cast<arrow::DictionaryArray &>(*(arrow_column->chunk(chunk_i)));
|
||||
dict_array.emplace_back(dict_chunk.dictionary());
|
||||
}
|
||||
auto arrow_dict_column = std::make_shared<arrow::ChunkedArray>(dict_array);
|
||||
@ -459,7 +457,8 @@ static void readColumnFromArrowColumn(
|
||||
auto dict_column = IColumn::mutate(column_lc.getDictionaryPtr());
|
||||
auto * uniq_column = static_cast<IColumnUnique *>(dict_column.get());
|
||||
auto values_column = uniq_column->getNestedColumn()->cloneEmpty();
|
||||
readColumnFromArrowColumn(arrow_dict_column, *values_column, column_name, format_name, false, dictionary_values);
|
||||
readColumnFromArrowColumn(
|
||||
arrow_dict_column, *values_column, column_name, format_name, false, dictionary_values);
|
||||
uniq_column->uniqueInsertRangeFrom(*values_column, 0, values_column->size());
|
||||
dict_values = std::move(dict_column);
|
||||
}
|
||||
@ -467,7 +466,7 @@ static void readColumnFromArrowColumn(
|
||||
arrow::ArrayVector indexes_array;
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
arrow::DictionaryArray & dict_chunk = static_cast<arrow::DictionaryArray &>(*(arrow_column->chunk(chunk_i)));
|
||||
arrow::DictionaryArray & dict_chunk = assert_cast<arrow::DictionaryArray &>(*(arrow_column->chunk(chunk_i)));
|
||||
indexes_array.emplace_back(dict_chunk.indices());
|
||||
}
|
||||
|
||||
@ -489,11 +488,8 @@ static void readColumnFromArrowColumn(
|
||||
// TODO: read JSON as a string?
|
||||
// TODO: read UUID as a string?
|
||||
default:
|
||||
throw Exception
|
||||
{
|
||||
ErrorCodes::UNKNOWN_TYPE,
|
||||
"Unsupported {} type '{}' of an input column '{}'.", format_name, arrow_column->type()->name(), column_name
|
||||
};
|
||||
throw Exception(ErrorCodes::UNKNOWN_TYPE,
|
||||
"Unsupported {} type '{}' of an input column '{}'.", format_name, arrow_column->type()->name(), column_name);
|
||||
}
|
||||
}
|
||||
|
||||
@ -511,19 +507,19 @@ static DataTypePtr getInternalType(
|
||||
|
||||
if (arrow_type->id() == arrow::Type::DECIMAL128)
|
||||
{
|
||||
const auto * decimal_type = static_cast<arrow::DecimalType *>(arrow_type.get());
|
||||
const auto * decimal_type = assert_cast<arrow::DecimalType *>(arrow_type.get());
|
||||
return std::make_shared<DataTypeDecimal<Decimal128>>(decimal_type->precision(), decimal_type->scale());
|
||||
}
|
||||
|
||||
if (arrow_type->id() == arrow::Type::DECIMAL256)
|
||||
{
|
||||
const auto * decimal_type = static_cast<arrow::DecimalType *>(arrow_type.get());
|
||||
const auto * decimal_type = assert_cast<arrow::DecimalType *>(arrow_type.get());
|
||||
return std::make_shared<DataTypeDecimal<Decimal256>>(decimal_type->precision(), decimal_type->scale());
|
||||
}
|
||||
|
||||
if (arrow_type->id() == arrow::Type::LIST)
|
||||
{
|
||||
const auto * list_type = static_cast<arrow::ListType *>(arrow_type.get());
|
||||
const auto * list_type = assert_cast<arrow::ListType *>(arrow_type.get());
|
||||
auto list_nested_type = list_type->value_type();
|
||||
|
||||
const DataTypeArray * array_type = typeid_cast<const DataTypeArray *>(column_type.get());
|
||||
@ -536,7 +532,7 @@ static DataTypePtr getInternalType(
|
||||
|
||||
if (arrow_type->id() == arrow::Type::STRUCT)
|
||||
{
|
||||
const auto * struct_type = static_cast<arrow::StructType *>(arrow_type.get());
|
||||
const auto * struct_type = assert_cast<arrow::StructType *>(arrow_type.get());
|
||||
const DataTypeTuple * tuple_type = typeid_cast<const DataTypeTuple *>(column_type.get());
|
||||
if (!tuple_type)
|
||||
throw Exception{ErrorCodes::CANNOT_CONVERT_TYPE,
|
||||
@ -546,14 +542,12 @@ static DataTypePtr getInternalType(
|
||||
int internal_fields_num = tuple_nested_types.size();
|
||||
/// If internal column has less elements then arrow struct, we will select only first internal_fields_num columns.
|
||||
if (internal_fields_num > struct_type->num_fields())
|
||||
throw Exception
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_CONVERT_TYPE,
|
||||
"Cannot convert arrow STRUCT with {} fields to a ClickHouse Tuple with {} elements: {}.",
|
||||
struct_type->num_fields(),
|
||||
internal_fields_num,
|
||||
column_type->getName()
|
||||
};
|
||||
column_type->getName());
|
||||
|
||||
DataTypes nested_types;
|
||||
for (int i = 0; i < internal_fields_num; ++i)
|
||||
@ -564,7 +558,7 @@ static DataTypePtr getInternalType(
|
||||
|
||||
if (arrow_type->id() == arrow::Type::DICTIONARY)
|
||||
{
|
||||
const auto * arrow_dict_type = static_cast<arrow::DictionaryType *>(arrow_type.get());
|
||||
const auto * arrow_dict_type = assert_cast<arrow::DictionaryType *>(arrow_type.get());
|
||||
const auto * lc_type = typeid_cast<const DataTypeLowCardinality *>(column_type.get());
|
||||
/// We allow to insert arrow dictionary into a non-LowCardinality column.
|
||||
const auto & dict_type = lc_type ? lc_type->getDictionaryType() : column_type;
|
||||
@ -583,6 +577,13 @@ static DataTypePtr getInternalType(
|
||||
getInternalType(arrow_map_type->item_type(), map_type->getValueType(), column_name, format_name));
|
||||
}
|
||||
|
||||
if (arrow_type->id() == arrow::Type::UINT16
|
||||
&& (isDate(column_type) || isDateTime(column_type) || isDate32(column_type) || isDateTime64(column_type)))
|
||||
{
|
||||
/// Read UInt16 as Date. It will allow correct conversion to DateTime futher.
|
||||
return std::make_shared<DataTypeDate>();
|
||||
}
|
||||
|
||||
auto filter = [=](auto && elem)
|
||||
{
|
||||
auto which = WhichDataType(column_type);
|
||||
|
@ -70,11 +70,8 @@ namespace DB
|
||||
{"Float32", arrow::float32()},
|
||||
{"Float64", arrow::float64()},
|
||||
|
||||
//{"Date", arrow::date64()},
|
||||
//{"Date", arrow::date32()},
|
||||
{"Date", arrow::uint16()}, // CHECK
|
||||
//{"DateTime", arrow::date64()}, // BUG! saves as date32
|
||||
{"DateTime", arrow::uint32()},
|
||||
{"Date", arrow::uint16()}, /// uint16 is used instead of date32, because Apache Arrow cannot correctly serialize Date32Array.
|
||||
{"DateTime", arrow::uint32()}, /// uint32 is used instead of date64, because we don't need milliseconds.
|
||||
|
||||
{"String", arrow::binary()},
|
||||
{"FixedString", arrow::binary()},
|
||||
@ -289,11 +286,11 @@ namespace DB
|
||||
auto value_type = assert_cast<arrow::DictionaryType *>(array_builder->type().get())->value_type();
|
||||
|
||||
#define DISPATCH(ARROW_TYPE_ID, ARROW_TYPE) \
|
||||
if (arrow::Type::ARROW_TYPE_ID == value_type->id()) \
|
||||
{ \
|
||||
fillArrowArrayWithLowCardinalityColumnDataImpl<ARROW_TYPE>(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, dictionary_values); \
|
||||
return; \
|
||||
}
|
||||
if (arrow::Type::ARROW_TYPE_ID == value_type->id()) \
|
||||
{ \
|
||||
fillArrowArrayWithLowCardinalityColumnDataImpl<ARROW_TYPE>(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, dictionary_values); \
|
||||
return; \
|
||||
}
|
||||
|
||||
FOR_ARROW_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
@ -361,7 +358,6 @@ namespace DB
|
||||
size_t end)
|
||||
{
|
||||
const auto & internal_data = assert_cast<const ColumnVector<UInt32> &>(*write_column).getData();
|
||||
//arrow::Date64Builder builder;
|
||||
arrow::UInt32Builder & builder = assert_cast<arrow::UInt32Builder &>(*array_builder);
|
||||
arrow::Status status;
|
||||
|
||||
@ -370,8 +366,6 @@ namespace DB
|
||||
if (null_bytemap && (*null_bytemap)[value_i])
|
||||
status = builder.AppendNull();
|
||||
else
|
||||
/// Implicitly converts UInt16 to Int32
|
||||
//status = date_builder.Append(static_cast<int64_t>(internal_data[value_i]) * 1000); // now ms. TODO check other units
|
||||
status = builder.Append(internal_data[value_i]);
|
||||
|
||||
checkStatus(status, write_column->getName(), format_name);
|
||||
@ -526,14 +520,15 @@ namespace DB
|
||||
}
|
||||
}
|
||||
|
||||
static std::shared_ptr<arrow::DataType> getArrowType(DataTypePtr column_type, ColumnPtr column, const std::string & column_name, const std::string & format_name, bool * is_column_nullable)
|
||||
static std::shared_ptr<arrow::DataType> getArrowType(
|
||||
DataTypePtr column_type, ColumnPtr column, const std::string & column_name, const std::string & format_name, bool * out_is_column_nullable)
|
||||
{
|
||||
if (column_type->isNullable())
|
||||
{
|
||||
DataTypePtr nested_type = assert_cast<const DataTypeNullable *>(column_type.get())->getNestedType();
|
||||
ColumnPtr nested_column = assert_cast<const ColumnNullable *>(column.get())->getNestedColumnPtr();
|
||||
auto arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, is_column_nullable);
|
||||
*is_column_nullable = true;
|
||||
auto arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, out_is_column_nullable);
|
||||
*out_is_column_nullable = true;
|
||||
return arrow_type;
|
||||
}
|
||||
|
||||
@ -566,7 +561,7 @@ namespace DB
|
||||
{
|
||||
auto nested_type = assert_cast<const DataTypeArray *>(column_type.get())->getNestedType();
|
||||
auto nested_column = assert_cast<const ColumnArray *>(column.get())->getDataPtr();
|
||||
auto nested_arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, is_column_nullable);
|
||||
auto nested_arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, out_is_column_nullable);
|
||||
return arrow::list(nested_arrow_type);
|
||||
}
|
||||
|
||||
@ -578,8 +573,8 @@ namespace DB
|
||||
for (size_t i = 0; i != nested_types.size(); ++i)
|
||||
{
|
||||
String name = column_name + "." + std::to_string(i);
|
||||
auto nested_arrow_type = getArrowType(nested_types[i], tuple_column->getColumnPtr(i), name, format_name, is_column_nullable);
|
||||
nested_fields.push_back(std::make_shared<arrow::Field>(name, nested_arrow_type, *is_column_nullable));
|
||||
auto nested_arrow_type = getArrowType(nested_types[i], tuple_column->getColumnPtr(i), name, format_name, out_is_column_nullable);
|
||||
nested_fields.push_back(std::make_shared<arrow::Field>(name, nested_arrow_type, *out_is_column_nullable));
|
||||
}
|
||||
return arrow::struct_(std::move(nested_fields));
|
||||
}
|
||||
@ -592,7 +587,7 @@ namespace DB
|
||||
const auto & indexes_column = lc_column->getIndexesPtr();
|
||||
return arrow::dictionary(
|
||||
getArrowTypeForLowCardinalityIndexes(indexes_column),
|
||||
getArrowType(nested_type, nested_column, column_name, format_name, is_column_nullable));
|
||||
getArrowType(nested_type, nested_column, column_name, format_name, out_is_column_nullable));
|
||||
}
|
||||
|
||||
if (isMap(column_type))
|
||||
@ -603,9 +598,8 @@ namespace DB
|
||||
|
||||
const auto & columns = assert_cast<const ColumnMap *>(column.get())->getNestedData().getColumns();
|
||||
return arrow::map(
|
||||
getArrowType(key_type, columns[0], column_name, format_name, is_column_nullable),
|
||||
getArrowType(val_type, columns[1], column_name, format_name, is_column_nullable)
|
||||
);
|
||||
getArrowType(key_type, columns[0], column_name, format_name, out_is_column_nullable),
|
||||
getArrowType(val_type, columns[1], column_name, format_name, out_is_column_nullable));
|
||||
}
|
||||
|
||||
const std::string type_name = column_type->getFamilyName();
|
||||
@ -618,8 +612,9 @@ namespace DB
|
||||
return arrow_type_it->second;
|
||||
}
|
||||
|
||||
throw Exception{fmt::format("The type '{}' of a column '{}' is not supported for conversion into {} data format.", column_type->getName(), column_name, format_name),
|
||||
ErrorCodes::UNKNOWN_TYPE};
|
||||
throw Exception(ErrorCodes::UNKNOWN_TYPE,
|
||||
"The type '{}' of a column '{}' is not supported for conversion into {} data format.",
|
||||
column_type->getName(), column_name, format_name);
|
||||
}
|
||||
|
||||
CHColumnToArrowColumn::CHColumnToArrowColumn(const Block & header, const std::string & format_name_, bool low_cardinality_as_dictionary_)
|
||||
|
@ -41,7 +41,7 @@ converted:
|
||||
127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1.032 -1.064 string-2 fixedstring-2\0\0 2004-06-07 2004-02-03 04:05:06
|
||||
diff:
|
||||
dest:
|
||||
79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 1970-01-01 06:29:04
|
||||
79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06 00:00:00
|
||||
80 81 82 83 84 85 86 87 88 89 str02 fstr2\0\0\0\0\0\0\0\0\0\0 2005-03-04 2006-08-09 10:11:12
|
||||
min:
|
||||
-128 0 0 0 0 0 0 0 -1 -1 string-1\0\0\0\0\0\0\0 fixedstring-1\0\0 2003-04-05 2003-02-03
|
||||
@ -49,10 +49,10 @@ min:
|
||||
79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06
|
||||
127 -1 -1 -1 -1 -1 -1 -1 -1 -1 string-2\0\0\0\0\0\0\0 fixedstring-2\0\0 2004-06-07 2004-02-03
|
||||
max:
|
||||
-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1 -1 string-1 fixedstring-1\0\0 1970-01-01 06:22:27 2003-02-03 04:05:06
|
||||
-108 108 -1016 1116 -1032 1132 -1064 1164 -1 -1 string-0 fixedstring\0\0\0\0 1970-01-01 06:09:16 2002-02-03 04:05:06
|
||||
-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1 -1 string-1 fixedstring-1\0\0 2003-04-05 00:00:00 2003-02-03 04:05:06
|
||||
-108 108 -1016 1116 -1032 1132 -1064 1164 -1 -1 string-0 fixedstring\0\0\0\0 2001-02-03 00:00:00 2002-02-03 04:05:06
|
||||
80 81 82 83 84 85 86 87 88 89 str02 fstr2 2005-03-04 05:06:07 2006-08-09 10:11:12
|
||||
127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1 -1 string-2 fixedstring-2\0\0 1970-01-01 06:29:36 2004-02-03 04:05:06
|
||||
127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1 -1 string-2 fixedstring-2\0\0 2004-06-07 00:00:00 2004-02-03 04:05:06
|
||||
dest from null:
|
||||
-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1.032 -1.064 string-1 fixedstring-1\0\0 2003-04-05 2003-02-03 04:05:06
|
||||
-108 108 -1016 1116 -1032 1132 -1064 1164 -1.032 -1.064 string-0 fixedstring\0\0\0\0 2001-02-03 2002-02-03 04:05:06
|
||||
|
@ -39,12 +39,12 @@ ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types1"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types2"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types3"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types4"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types1 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime('Europe/Moscow')) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types2 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime('Europe/Moscow')) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types1 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types2 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime) ENGINE = Memory"
|
||||
# convert min type
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types3 (int8 Int8, uint8 Int8, int16 Int8, uint16 Int8, int32 Int8, uint32 Int8, int64 Int8, uint64 Int8, float32 Int8, float64 Int8, string FixedString(15), fixedstring FixedString(15), date Date, datetime Date) ENGINE = Memory"
|
||||
# convert max type
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types4 (int8 Int64, uint8 Int64, int16 Int64, uint16 Int64, int32 Int64, uint32 Int64, int64 Int64, uint64 Int64, float32 Int64, float64 Int64, string String, fixedstring String, date DateTime('Europe/Moscow'), datetime DateTime('Europe/Moscow')) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types4 (int8 Int64, uint8 Int64, int16 Int64, uint16 Int64, int32 Int64, uint32 Int64, int64 Int64, uint64 Int64, float32 Int64, float64 Int64, string String, fixedstring String, date DateTime, datetime DateTime) ENGINE = Memory"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types1 values ( -108, 108, -1016, 1116, -1032, 1132, -1064, 1164, -1.032, -1.064, 'string-0', 'fixedstring', '2001-02-03', '2002-02-03 04:05:06')"
|
||||
|
||||
@ -85,8 +85,8 @@ ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types4 ORDER BY int8"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types5"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types6"
|
||||
${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE arrow_types2"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types5 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime('Europe/Moscow'))) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types6 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime('Europe/Moscow'))) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types5 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime)) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types6 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime)) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types5 values ( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT Arrow" > "${CLICKHOUSE_TMP}"/arrow_all_types_5.arrow
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT Arrow" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types6 FORMAT Arrow"
|
||||
|
@ -41,7 +41,7 @@ converted:
|
||||
127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1.032 -1.064 string-2 fixedstring-2\0\0 2004-06-07 2004-02-03 04:05:06
|
||||
diff:
|
||||
dest:
|
||||
79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 1970-01-01 06:29:04
|
||||
79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06 00:00:00
|
||||
80 81 82 83 84 85 86 87 88 89 str02 fstr2\0\0\0\0\0\0\0\0\0\0 2005-03-04 2006-08-09 10:11:12
|
||||
min:
|
||||
-128 0 0 0 0 0 0 0 -1 -1 string-1\0\0\0\0\0\0\0 fixedstring-1\0\0 2003-04-05 2003-02-03
|
||||
@ -49,10 +49,10 @@ min:
|
||||
79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06
|
||||
127 -1 -1 -1 -1 -1 -1 -1 -1 -1 string-2\0\0\0\0\0\0\0 fixedstring-2\0\0 2004-06-07 2004-02-03
|
||||
max:
|
||||
-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1 -1 string-1 fixedstring-1\0\0 1970-01-01 06:22:27 2003-02-03 04:05:06
|
||||
-108 108 -1016 1116 -1032 1132 -1064 1164 -1 -1 string-0 fixedstring\0\0\0\0 1970-01-01 06:09:16 2002-02-03 04:05:06
|
||||
-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1 -1 string-1 fixedstring-1\0\0 2003-04-05 00:00:00 2003-02-03 04:05:06
|
||||
-108 108 -1016 1116 -1032 1132 -1064 1164 -1 -1 string-0 fixedstring\0\0\0\0 2001-02-03 00:00:00 2002-02-03 04:05:06
|
||||
80 81 82 83 84 85 86 87 88 89 str02 fstr2 2005-03-04 05:06:07 2006-08-09 10:11:12
|
||||
127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1 -1 string-2 fixedstring-2\0\0 1970-01-01 06:29:36 2004-02-03 04:05:06
|
||||
127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1 -1 string-2 fixedstring-2\0\0 2004-06-07 00:00:00 2004-02-03 04:05:06
|
||||
dest from null:
|
||||
-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1.032 -1.064 string-1 fixedstring-1\0\0 2003-04-05 2003-02-03 04:05:06
|
||||
-108 108 -1016 1116 -1032 1132 -1064 1164 -1.032 -1.064 string-0 fixedstring\0\0\0\0 2001-02-03 2002-02-03 04:05:06
|
||||
|
@ -39,12 +39,12 @@ ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types1"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types2"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types3"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types4"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types1 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime('Europe/Moscow')) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types2 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime('Europe/Moscow')) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types1 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types2 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime) ENGINE = Memory"
|
||||
# convert min type
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types3 (int8 Int8, uint8 Int8, int16 Int8, uint16 Int8, int32 Int8, uint32 Int8, int64 Int8, uint64 Int8, float32 Int8, float64 Int8, string FixedString(15), fixedstring FixedString(15), date Date, datetime Date) ENGINE = Memory"
|
||||
# convert max type
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types4 (int8 Int64, uint8 Int64, int16 Int64, uint16 Int64, int32 Int64, uint32 Int64, int64 Int64, uint64 Int64, float32 Int64, float64 Int64, string String, fixedstring String, date DateTime('Europe/Moscow'), datetime DateTime('Europe/Moscow')) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types4 (int8 Int64, uint8 Int64, int16 Int64, uint16 Int64, int32 Int64, uint32 Int64, int64 Int64, uint64 Int64, float32 Int64, float64 Int64, string String, fixedstring String, date DateTime, datetime DateTime) ENGINE = Memory"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types1 values ( -108, 108, -1016, 1116, -1032, 1132, -1064, 1164, -1.032, -1.064, 'string-0', 'fixedstring', '2001-02-03', '2002-02-03 04:05:06')"
|
||||
|
||||
@ -85,8 +85,8 @@ ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types4 ORDER BY int8"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types5"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types6"
|
||||
${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE arrow_types2"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types5 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime('Europe/Moscow'))) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types6 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime('Europe/Moscow'))) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types5 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime)) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types6 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime)) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types5 values ( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT ArrowStream" > "${CLICKHOUSE_TMP}"/arrow_all_types_5.arrow
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT ArrowStream" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types6 FORMAT ArrowStream"
|
||||
|
Loading…
Reference in New Issue
Block a user