Correct read of Date and UInt16 as DateTime in Arrow format

This commit is contained in:
Alexey Milovidov 2021-07-24 17:36:21 +03:00
parent 68df1b4564
commit d6c1593b22
6 changed files with 84 additions and 88 deletions

View File

@ -10,6 +10,7 @@
#include <DataTypes/DataTypeLowCardinality.h> #include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeTuple.h> #include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeMap.h> #include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeDate.h>
#include <common/DateLUTImpl.h> #include <common/DateLUTImpl.h>
#include <common/types.h> #include <common/types.h>
#include <Core/Block.h> #include <Core/Block.h>
@ -122,7 +123,7 @@ static void fillColumnWithStringData(std::shared_ptr<arrow::ChunkedArray> & arro
size_t chars_t_size = 0; size_t chars_t_size = 0;
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
arrow::BinaryArray & chunk = static_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i))); arrow::BinaryArray & chunk = assert_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i)));
const size_t chunk_length = chunk.length(); const size_t chunk_length = chunk.length();
if (chunk_length > 0) if (chunk_length > 0)
@ -137,7 +138,7 @@ static void fillColumnWithStringData(std::shared_ptr<arrow::ChunkedArray> & arro
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
arrow::BinaryArray & chunk = static_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i))); arrow::BinaryArray & chunk = assert_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i)));
std::shared_ptr<arrow::Buffer> buffer = chunk.value_data(); std::shared_ptr<arrow::Buffer> buffer = chunk.value_data();
const size_t chunk_length = chunk.length(); const size_t chunk_length = chunk.length();
@ -162,7 +163,7 @@ static void fillColumnWithBooleanData(std::shared_ptr<arrow::ChunkedArray> & arr
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
arrow::BooleanArray & chunk = static_cast<arrow::BooleanArray &>(*(arrow_column->chunk(chunk_i))); arrow::BooleanArray & chunk = assert_cast<arrow::BooleanArray &>(*(arrow_column->chunk(chunk_i)));
/// buffers[0] is a null bitmap and buffers[1] are actual values /// buffers[0] is a null bitmap and buffers[1] are actual values
std::shared_ptr<arrow::Buffer> buffer = chunk.data()->buffers[1]; std::shared_ptr<arrow::Buffer> buffer = chunk.data()->buffers[1];
@ -179,20 +180,16 @@ static void fillColumnWithDate32Data(std::shared_ptr<arrow::ChunkedArray> & arro
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
arrow::Date32Array & chunk = static_cast<arrow::Date32Array &>(*(arrow_column->chunk(chunk_i))); arrow::Date32Array & chunk = assert_cast<arrow::Date32Array &>(*(arrow_column->chunk(chunk_i)));
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i) for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
{ {
UInt32 days_num = static_cast<UInt32>(chunk.Value(value_i)); UInt32 days_num = static_cast<UInt32>(chunk.Value(value_i));
if (days_num > DATE_LUT_MAX_DAY_NUM) if (days_num > DATE_LUT_MAX_DAY_NUM)
{ throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE,
// TODO: will it rollback correctly? "Input value {} of a column '{}' is greater than max allowed Date value, which is {}",
throw Exception days_num, internal_column.getName(), DATE_LUT_MAX_DAY_NUM);
{
ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE,
"Input value {} of a column '{}' is greater than max allowed Date value, which is {}", days_num, internal_column.getName(), DATE_LUT_MAX_DAY_NUM
};
}
column_data.emplace_back(days_num); column_data.emplace_back(days_num);
} }
@ -206,18 +203,14 @@ static void fillDate32ColumnWithDate32Data(std::shared_ptr<arrow::ChunkedArray>
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
arrow::Date32Array & chunk = static_cast<arrow::Date32Array &>(*(arrow_column->chunk(chunk_i))); arrow::Date32Array & chunk = assert_cast<arrow::Date32Array &>(*(arrow_column->chunk(chunk_i)));
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i) for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
{ {
Int32 days_num = static_cast<Int32>(chunk.Value(value_i)); Int32 days_num = static_cast<Int32>(chunk.Value(value_i));
if (days_num > DATE_LUT_MAX_EXTEND_DAY_NUM) if (days_num > DATE_LUT_MAX_EXTEND_DAY_NUM)
{ throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE,
// TODO: will it rollback correctly? "Input value {} of a column '{}' is greater than max allowed Date value, which is {}", days_num, internal_column.getName(), DATE_LUT_MAX_DAY_NUM);
throw Exception{
ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE,
"Input value {} of a column '{}' is greater than max allowed Date value, which is {}", days_num, internal_column.getName(), DATE_LUT_MAX_DAY_NUM};
}
column_data.emplace_back(days_num); column_data.emplace_back(days_num);
} }
@ -232,7 +225,7 @@ static void fillColumnWithDate64Data(std::shared_ptr<arrow::ChunkedArray> & arro
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
auto & chunk = static_cast<arrow::Date64Array &>(*(arrow_column->chunk(chunk_i))); auto & chunk = assert_cast<arrow::Date64Array &>(*(arrow_column->chunk(chunk_i)));
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i) for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
{ {
auto timestamp = static_cast<UInt32>(chunk.Value(value_i) / 1000); // Always? in ms auto timestamp = static_cast<UInt32>(chunk.Value(value_i) / 1000); // Always? in ms
@ -248,7 +241,7 @@ static void fillColumnWithTimestampData(std::shared_ptr<arrow::ChunkedArray> & a
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
auto & chunk = static_cast<arrow::TimestampArray &>(*(arrow_column->chunk(chunk_i))); auto & chunk = assert_cast<arrow::TimestampArray &>(*(arrow_column->chunk(chunk_i)));
const auto & type = static_cast<const ::arrow::TimestampType &>(*chunk.type()); const auto & type = static_cast<const ::arrow::TimestampType &>(*chunk.type());
UInt32 divide = 1; UInt32 divide = 1;
@ -316,9 +309,9 @@ static void fillOffsetsFromArrowListColumn(std::shared_ptr<arrow::ChunkedArray>
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
arrow::ListArray & list_chunk = static_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i))); arrow::ListArray & list_chunk = assert_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i)));
auto arrow_offsets_array = list_chunk.offsets(); auto arrow_offsets_array = list_chunk.offsets();
auto & arrow_offsets = static_cast<arrow::Int32Array &>(*arrow_offsets_array); auto & arrow_offsets = assert_cast<arrow::Int32Array &>(*arrow_offsets_array);
auto start = offsets_data.back(); auto start = offsets_data.back();
for (int64_t i = 1; i < arrow_offsets.length(); ++i) for (int64_t i = 1; i < arrow_offsets.length(); ++i)
offsets_data.emplace_back(start + arrow_offsets.Value(i)); offsets_data.emplace_back(start + arrow_offsets.Value(i));
@ -353,7 +346,8 @@ static void readColumnFromArrowColumn(
if (internal_column.isNullable()) if (internal_column.isNullable())
{ {
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(internal_column); ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(internal_column);
readColumnFromArrowColumn(arrow_column, column_nullable.getNestedColumn(), column_name, format_name, true, dictionary_values); readColumnFromArrowColumn(
arrow_column, column_nullable.getNestedColumn(), column_name, format_name, true, dictionary_values);
fillByteMapFromArrowColumn(arrow_column, column_nullable.getNullMapColumn()); fillByteMapFromArrowColumn(arrow_column, column_nullable.getNullMapColumn());
return; return;
} }
@ -408,7 +402,7 @@ static void readColumnFromArrowColumn(
array_vector.reserve(arrow_column->num_chunks()); array_vector.reserve(arrow_column->num_chunks());
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
arrow::ListArray & list_chunk = static_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i))); arrow::ListArray & list_chunk = assert_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i)));
std::shared_ptr<arrow::Array> chunk = list_chunk.values(); std::shared_ptr<arrow::Array> chunk = list_chunk.values();
array_vector.emplace_back(std::move(chunk)); array_vector.emplace_back(std::move(chunk));
} }
@ -418,7 +412,9 @@ static void readColumnFromArrowColumn(
? assert_cast<ColumnMap &>(internal_column).getNestedColumn() ? assert_cast<ColumnMap &>(internal_column).getNestedColumn()
: assert_cast<ColumnArray &>(internal_column); : assert_cast<ColumnArray &>(internal_column);
readColumnFromArrowColumn(arrow_nested_column, column_array.getData(), column_name, format_name, false, dictionary_values); readColumnFromArrowColumn(
arrow_nested_column, column_array.getData(), column_name, format_name, false, dictionary_values);
fillOffsetsFromArrowListColumn(arrow_column, column_array.getOffsetsColumn()); fillOffsetsFromArrowListColumn(arrow_column, column_array.getOffsetsColumn());
break; break;
} }
@ -429,7 +425,7 @@ static void readColumnFromArrowColumn(
std::vector<arrow::ArrayVector> nested_arrow_columns(fields_count); std::vector<arrow::ArrayVector> nested_arrow_columns(fields_count);
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
arrow::StructArray & struct_chunk = static_cast<arrow::StructArray &>(*(arrow_column->chunk(chunk_i))); arrow::StructArray & struct_chunk = assert_cast<arrow::StructArray &>(*(arrow_column->chunk(chunk_i)));
for (int i = 0; i < fields_count; ++i) for (int i = 0; i < fields_count; ++i)
nested_arrow_columns[i].emplace_back(struct_chunk.field(i)); nested_arrow_columns[i].emplace_back(struct_chunk.field(i));
} }
@ -437,7 +433,8 @@ static void readColumnFromArrowColumn(
for (int i = 0; i != fields_count; ++i) for (int i = 0; i != fields_count; ++i)
{ {
auto nested_arrow_column = std::make_shared<arrow::ChunkedArray>(nested_arrow_columns[i]); auto nested_arrow_column = std::make_shared<arrow::ChunkedArray>(nested_arrow_columns[i]);
readColumnFromArrowColumn(nested_arrow_column, column_tuple.getColumn(i), column_name, format_name, false, dictionary_values); readColumnFromArrowColumn(
nested_arrow_column, column_tuple.getColumn(i), column_name, format_name, false, dictionary_values);
} }
break; break;
} }
@ -445,13 +442,14 @@ static void readColumnFromArrowColumn(
{ {
ColumnLowCardinality & column_lc = assert_cast<ColumnLowCardinality &>(internal_column); ColumnLowCardinality & column_lc = assert_cast<ColumnLowCardinality &>(internal_column);
auto & dict_values = dictionary_values[column_name]; auto & dict_values = dictionary_values[column_name];
/// Load dictionary values only once and reuse it. /// Load dictionary values only once and reuse it.
if (!dict_values) if (!dict_values)
{ {
arrow::ArrayVector dict_array; arrow::ArrayVector dict_array;
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
arrow::DictionaryArray & dict_chunk = static_cast<arrow::DictionaryArray &>(*(arrow_column->chunk(chunk_i))); arrow::DictionaryArray & dict_chunk = assert_cast<arrow::DictionaryArray &>(*(arrow_column->chunk(chunk_i)));
dict_array.emplace_back(dict_chunk.dictionary()); dict_array.emplace_back(dict_chunk.dictionary());
} }
auto arrow_dict_column = std::make_shared<arrow::ChunkedArray>(dict_array); auto arrow_dict_column = std::make_shared<arrow::ChunkedArray>(dict_array);
@ -459,7 +457,8 @@ static void readColumnFromArrowColumn(
auto dict_column = IColumn::mutate(column_lc.getDictionaryPtr()); auto dict_column = IColumn::mutate(column_lc.getDictionaryPtr());
auto * uniq_column = static_cast<IColumnUnique *>(dict_column.get()); auto * uniq_column = static_cast<IColumnUnique *>(dict_column.get());
auto values_column = uniq_column->getNestedColumn()->cloneEmpty(); auto values_column = uniq_column->getNestedColumn()->cloneEmpty();
readColumnFromArrowColumn(arrow_dict_column, *values_column, column_name, format_name, false, dictionary_values); readColumnFromArrowColumn(
arrow_dict_column, *values_column, column_name, format_name, false, dictionary_values);
uniq_column->uniqueInsertRangeFrom(*values_column, 0, values_column->size()); uniq_column->uniqueInsertRangeFrom(*values_column, 0, values_column->size());
dict_values = std::move(dict_column); dict_values = std::move(dict_column);
} }
@ -467,7 +466,7 @@ static void readColumnFromArrowColumn(
arrow::ArrayVector indexes_array; arrow::ArrayVector indexes_array;
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{ {
arrow::DictionaryArray & dict_chunk = static_cast<arrow::DictionaryArray &>(*(arrow_column->chunk(chunk_i))); arrow::DictionaryArray & dict_chunk = assert_cast<arrow::DictionaryArray &>(*(arrow_column->chunk(chunk_i)));
indexes_array.emplace_back(dict_chunk.indices()); indexes_array.emplace_back(dict_chunk.indices());
} }
@ -489,11 +488,8 @@ static void readColumnFromArrowColumn(
// TODO: read JSON as a string? // TODO: read JSON as a string?
// TODO: read UUID as a string? // TODO: read UUID as a string?
default: default:
throw Exception throw Exception(ErrorCodes::UNKNOWN_TYPE,
{ "Unsupported {} type '{}' of an input column '{}'.", format_name, arrow_column->type()->name(), column_name);
ErrorCodes::UNKNOWN_TYPE,
"Unsupported {} type '{}' of an input column '{}'.", format_name, arrow_column->type()->name(), column_name
};
} }
} }
@ -511,19 +507,19 @@ static DataTypePtr getInternalType(
if (arrow_type->id() == arrow::Type::DECIMAL128) if (arrow_type->id() == arrow::Type::DECIMAL128)
{ {
const auto * decimal_type = static_cast<arrow::DecimalType *>(arrow_type.get()); const auto * decimal_type = assert_cast<arrow::DecimalType *>(arrow_type.get());
return std::make_shared<DataTypeDecimal<Decimal128>>(decimal_type->precision(), decimal_type->scale()); return std::make_shared<DataTypeDecimal<Decimal128>>(decimal_type->precision(), decimal_type->scale());
} }
if (arrow_type->id() == arrow::Type::DECIMAL256) if (arrow_type->id() == arrow::Type::DECIMAL256)
{ {
const auto * decimal_type = static_cast<arrow::DecimalType *>(arrow_type.get()); const auto * decimal_type = assert_cast<arrow::DecimalType *>(arrow_type.get());
return std::make_shared<DataTypeDecimal<Decimal256>>(decimal_type->precision(), decimal_type->scale()); return std::make_shared<DataTypeDecimal<Decimal256>>(decimal_type->precision(), decimal_type->scale());
} }
if (arrow_type->id() == arrow::Type::LIST) if (arrow_type->id() == arrow::Type::LIST)
{ {
const auto * list_type = static_cast<arrow::ListType *>(arrow_type.get()); const auto * list_type = assert_cast<arrow::ListType *>(arrow_type.get());
auto list_nested_type = list_type->value_type(); auto list_nested_type = list_type->value_type();
const DataTypeArray * array_type = typeid_cast<const DataTypeArray *>(column_type.get()); const DataTypeArray * array_type = typeid_cast<const DataTypeArray *>(column_type.get());
@ -536,7 +532,7 @@ static DataTypePtr getInternalType(
if (arrow_type->id() == arrow::Type::STRUCT) if (arrow_type->id() == arrow::Type::STRUCT)
{ {
const auto * struct_type = static_cast<arrow::StructType *>(arrow_type.get()); const auto * struct_type = assert_cast<arrow::StructType *>(arrow_type.get());
const DataTypeTuple * tuple_type = typeid_cast<const DataTypeTuple *>(column_type.get()); const DataTypeTuple * tuple_type = typeid_cast<const DataTypeTuple *>(column_type.get());
if (!tuple_type) if (!tuple_type)
throw Exception{ErrorCodes::CANNOT_CONVERT_TYPE, throw Exception{ErrorCodes::CANNOT_CONVERT_TYPE,
@ -546,14 +542,12 @@ static DataTypePtr getInternalType(
int internal_fields_num = tuple_nested_types.size(); int internal_fields_num = tuple_nested_types.size();
/// If internal column has less elements then arrow struct, we will select only first internal_fields_num columns. /// If internal column has less elements then arrow struct, we will select only first internal_fields_num columns.
if (internal_fields_num > struct_type->num_fields()) if (internal_fields_num > struct_type->num_fields())
throw Exception throw Exception(
{
ErrorCodes::CANNOT_CONVERT_TYPE, ErrorCodes::CANNOT_CONVERT_TYPE,
"Cannot convert arrow STRUCT with {} fields to a ClickHouse Tuple with {} elements: {}.", "Cannot convert arrow STRUCT with {} fields to a ClickHouse Tuple with {} elements: {}.",
struct_type->num_fields(), struct_type->num_fields(),
internal_fields_num, internal_fields_num,
column_type->getName() column_type->getName());
};
DataTypes nested_types; DataTypes nested_types;
for (int i = 0; i < internal_fields_num; ++i) for (int i = 0; i < internal_fields_num; ++i)
@ -564,7 +558,7 @@ static DataTypePtr getInternalType(
if (arrow_type->id() == arrow::Type::DICTIONARY) if (arrow_type->id() == arrow::Type::DICTIONARY)
{ {
const auto * arrow_dict_type = static_cast<arrow::DictionaryType *>(arrow_type.get()); const auto * arrow_dict_type = assert_cast<arrow::DictionaryType *>(arrow_type.get());
const auto * lc_type = typeid_cast<const DataTypeLowCardinality *>(column_type.get()); const auto * lc_type = typeid_cast<const DataTypeLowCardinality *>(column_type.get());
/// We allow to insert arrow dictionary into a non-LowCardinality column. /// We allow to insert arrow dictionary into a non-LowCardinality column.
const auto & dict_type = lc_type ? lc_type->getDictionaryType() : column_type; const auto & dict_type = lc_type ? lc_type->getDictionaryType() : column_type;
@ -583,6 +577,13 @@ static DataTypePtr getInternalType(
getInternalType(arrow_map_type->item_type(), map_type->getValueType(), column_name, format_name)); getInternalType(arrow_map_type->item_type(), map_type->getValueType(), column_name, format_name));
} }
if (arrow_type->id() == arrow::Type::UINT16
&& (isDate(column_type) || isDateTime(column_type) || isDate32(column_type) || isDateTime64(column_type)))
{
/// Read UInt16 as Date. It will allow correct conversion to DateTime futher.
return std::make_shared<DataTypeDate>();
}
auto filter = [=](auto && elem) auto filter = [=](auto && elem)
{ {
auto which = WhichDataType(column_type); auto which = WhichDataType(column_type);

View File

@ -70,11 +70,8 @@ namespace DB
{"Float32", arrow::float32()}, {"Float32", arrow::float32()},
{"Float64", arrow::float64()}, {"Float64", arrow::float64()},
//{"Date", arrow::date64()}, {"Date", arrow::uint16()}, /// uint16 is used instead of date32, because Apache Arrow cannot correctly serialize Date32Array.
//{"Date", arrow::date32()}, {"DateTime", arrow::uint32()}, /// uint32 is used instead of date64, because we don't need milliseconds.
{"Date", arrow::uint16()}, // CHECK
//{"DateTime", arrow::date64()}, // BUG! saves as date32
{"DateTime", arrow::uint32()},
{"String", arrow::binary()}, {"String", arrow::binary()},
{"FixedString", arrow::binary()}, {"FixedString", arrow::binary()},
@ -289,11 +286,11 @@ namespace DB
auto value_type = assert_cast<arrow::DictionaryType *>(array_builder->type().get())->value_type(); auto value_type = assert_cast<arrow::DictionaryType *>(array_builder->type().get())->value_type();
#define DISPATCH(ARROW_TYPE_ID, ARROW_TYPE) \ #define DISPATCH(ARROW_TYPE_ID, ARROW_TYPE) \
if (arrow::Type::ARROW_TYPE_ID == value_type->id()) \ if (arrow::Type::ARROW_TYPE_ID == value_type->id()) \
{ \ { \
fillArrowArrayWithLowCardinalityColumnDataImpl<ARROW_TYPE>(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, dictionary_values); \ fillArrowArrayWithLowCardinalityColumnDataImpl<ARROW_TYPE>(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, dictionary_values); \
return; \ return; \
} }
FOR_ARROW_TYPES(DISPATCH) FOR_ARROW_TYPES(DISPATCH)
#undef DISPATCH #undef DISPATCH
@ -361,7 +358,6 @@ namespace DB
size_t end) size_t end)
{ {
const auto & internal_data = assert_cast<const ColumnVector<UInt32> &>(*write_column).getData(); const auto & internal_data = assert_cast<const ColumnVector<UInt32> &>(*write_column).getData();
//arrow::Date64Builder builder;
arrow::UInt32Builder & builder = assert_cast<arrow::UInt32Builder &>(*array_builder); arrow::UInt32Builder & builder = assert_cast<arrow::UInt32Builder &>(*array_builder);
arrow::Status status; arrow::Status status;
@ -370,8 +366,6 @@ namespace DB
if (null_bytemap && (*null_bytemap)[value_i]) if (null_bytemap && (*null_bytemap)[value_i])
status = builder.AppendNull(); status = builder.AppendNull();
else else
/// Implicitly converts UInt16 to Int32
//status = date_builder.Append(static_cast<int64_t>(internal_data[value_i]) * 1000); // now ms. TODO check other units
status = builder.Append(internal_data[value_i]); status = builder.Append(internal_data[value_i]);
checkStatus(status, write_column->getName(), format_name); checkStatus(status, write_column->getName(), format_name);
@ -526,14 +520,15 @@ namespace DB
} }
} }
static std::shared_ptr<arrow::DataType> getArrowType(DataTypePtr column_type, ColumnPtr column, const std::string & column_name, const std::string & format_name, bool * is_column_nullable) static std::shared_ptr<arrow::DataType> getArrowType(
DataTypePtr column_type, ColumnPtr column, const std::string & column_name, const std::string & format_name, bool * out_is_column_nullable)
{ {
if (column_type->isNullable()) if (column_type->isNullable())
{ {
DataTypePtr nested_type = assert_cast<const DataTypeNullable *>(column_type.get())->getNestedType(); DataTypePtr nested_type = assert_cast<const DataTypeNullable *>(column_type.get())->getNestedType();
ColumnPtr nested_column = assert_cast<const ColumnNullable *>(column.get())->getNestedColumnPtr(); ColumnPtr nested_column = assert_cast<const ColumnNullable *>(column.get())->getNestedColumnPtr();
auto arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, is_column_nullable); auto arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, out_is_column_nullable);
*is_column_nullable = true; *out_is_column_nullable = true;
return arrow_type; return arrow_type;
} }
@ -566,7 +561,7 @@ namespace DB
{ {
auto nested_type = assert_cast<const DataTypeArray *>(column_type.get())->getNestedType(); auto nested_type = assert_cast<const DataTypeArray *>(column_type.get())->getNestedType();
auto nested_column = assert_cast<const ColumnArray *>(column.get())->getDataPtr(); auto nested_column = assert_cast<const ColumnArray *>(column.get())->getDataPtr();
auto nested_arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, is_column_nullable); auto nested_arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, out_is_column_nullable);
return arrow::list(nested_arrow_type); return arrow::list(nested_arrow_type);
} }
@ -578,8 +573,8 @@ namespace DB
for (size_t i = 0; i != nested_types.size(); ++i) for (size_t i = 0; i != nested_types.size(); ++i)
{ {
String name = column_name + "." + std::to_string(i); String name = column_name + "." + std::to_string(i);
auto nested_arrow_type = getArrowType(nested_types[i], tuple_column->getColumnPtr(i), name, format_name, is_column_nullable); auto nested_arrow_type = getArrowType(nested_types[i], tuple_column->getColumnPtr(i), name, format_name, out_is_column_nullable);
nested_fields.push_back(std::make_shared<arrow::Field>(name, nested_arrow_type, *is_column_nullable)); nested_fields.push_back(std::make_shared<arrow::Field>(name, nested_arrow_type, *out_is_column_nullable));
} }
return arrow::struct_(std::move(nested_fields)); return arrow::struct_(std::move(nested_fields));
} }
@ -592,7 +587,7 @@ namespace DB
const auto & indexes_column = lc_column->getIndexesPtr(); const auto & indexes_column = lc_column->getIndexesPtr();
return arrow::dictionary( return arrow::dictionary(
getArrowTypeForLowCardinalityIndexes(indexes_column), getArrowTypeForLowCardinalityIndexes(indexes_column),
getArrowType(nested_type, nested_column, column_name, format_name, is_column_nullable)); getArrowType(nested_type, nested_column, column_name, format_name, out_is_column_nullable));
} }
if (isMap(column_type)) if (isMap(column_type))
@ -603,9 +598,8 @@ namespace DB
const auto & columns = assert_cast<const ColumnMap *>(column.get())->getNestedData().getColumns(); const auto & columns = assert_cast<const ColumnMap *>(column.get())->getNestedData().getColumns();
return arrow::map( return arrow::map(
getArrowType(key_type, columns[0], column_name, format_name, is_column_nullable), getArrowType(key_type, columns[0], column_name, format_name, out_is_column_nullable),
getArrowType(val_type, columns[1], column_name, format_name, is_column_nullable) getArrowType(val_type, columns[1], column_name, format_name, out_is_column_nullable));
);
} }
const std::string type_name = column_type->getFamilyName(); const std::string type_name = column_type->getFamilyName();
@ -618,8 +612,9 @@ namespace DB
return arrow_type_it->second; return arrow_type_it->second;
} }
throw Exception{fmt::format("The type '{}' of a column '{}' is not supported for conversion into {} data format.", column_type->getName(), column_name, format_name), throw Exception(ErrorCodes::UNKNOWN_TYPE,
ErrorCodes::UNKNOWN_TYPE}; "The type '{}' of a column '{}' is not supported for conversion into {} data format.",
column_type->getName(), column_name, format_name);
} }
CHColumnToArrowColumn::CHColumnToArrowColumn(const Block & header, const std::string & format_name_, bool low_cardinality_as_dictionary_) CHColumnToArrowColumn::CHColumnToArrowColumn(const Block & header, const std::string & format_name_, bool low_cardinality_as_dictionary_)

View File

@ -41,7 +41,7 @@ converted:
127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1.032 -1.064 string-2 fixedstring-2\0\0 2004-06-07 2004-02-03 04:05:06 127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1.032 -1.064 string-2 fixedstring-2\0\0 2004-06-07 2004-02-03 04:05:06
diff: diff:
dest: dest:
79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 1970-01-01 06:29:04 79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06 00:00:00
80 81 82 83 84 85 86 87 88 89 str02 fstr2\0\0\0\0\0\0\0\0\0\0 2005-03-04 2006-08-09 10:11:12 80 81 82 83 84 85 86 87 88 89 str02 fstr2\0\0\0\0\0\0\0\0\0\0 2005-03-04 2006-08-09 10:11:12
min: min:
-128 0 0 0 0 0 0 0 -1 -1 string-1\0\0\0\0\0\0\0 fixedstring-1\0\0 2003-04-05 2003-02-03 -128 0 0 0 0 0 0 0 -1 -1 string-1\0\0\0\0\0\0\0 fixedstring-1\0\0 2003-04-05 2003-02-03
@ -49,10 +49,10 @@ min:
79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06 79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06
127 -1 -1 -1 -1 -1 -1 -1 -1 -1 string-2\0\0\0\0\0\0\0 fixedstring-2\0\0 2004-06-07 2004-02-03 127 -1 -1 -1 -1 -1 -1 -1 -1 -1 string-2\0\0\0\0\0\0\0 fixedstring-2\0\0 2004-06-07 2004-02-03
max: max:
-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1 -1 string-1 fixedstring-1\0\0 1970-01-01 06:22:27 2003-02-03 04:05:06 -128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1 -1 string-1 fixedstring-1\0\0 2003-04-05 00:00:00 2003-02-03 04:05:06
-108 108 -1016 1116 -1032 1132 -1064 1164 -1 -1 string-0 fixedstring\0\0\0\0 1970-01-01 06:09:16 2002-02-03 04:05:06 -108 108 -1016 1116 -1032 1132 -1064 1164 -1 -1 string-0 fixedstring\0\0\0\0 2001-02-03 00:00:00 2002-02-03 04:05:06
80 81 82 83 84 85 86 87 88 89 str02 fstr2 2005-03-04 05:06:07 2006-08-09 10:11:12 80 81 82 83 84 85 86 87 88 89 str02 fstr2 2005-03-04 05:06:07 2006-08-09 10:11:12
127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1 -1 string-2 fixedstring-2\0\0 1970-01-01 06:29:36 2004-02-03 04:05:06 127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1 -1 string-2 fixedstring-2\0\0 2004-06-07 00:00:00 2004-02-03 04:05:06
dest from null: dest from null:
-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1.032 -1.064 string-1 fixedstring-1\0\0 2003-04-05 2003-02-03 04:05:06 -128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1.032 -1.064 string-1 fixedstring-1\0\0 2003-04-05 2003-02-03 04:05:06
-108 108 -1016 1116 -1032 1132 -1064 1164 -1.032 -1.064 string-0 fixedstring\0\0\0\0 2001-02-03 2002-02-03 04:05:06 -108 108 -1016 1116 -1032 1132 -1064 1164 -1.032 -1.064 string-0 fixedstring\0\0\0\0 2001-02-03 2002-02-03 04:05:06

View File

@ -39,12 +39,12 @@ ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types1"
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types2" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types2"
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types3" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types3"
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types4" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types4"
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types1 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime('Europe/Moscow')) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types1 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime) ENGINE = Memory"
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types2 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime('Europe/Moscow')) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types2 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime) ENGINE = Memory"
# convert min type # convert min type
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types3 (int8 Int8, uint8 Int8, int16 Int8, uint16 Int8, int32 Int8, uint32 Int8, int64 Int8, uint64 Int8, float32 Int8, float64 Int8, string FixedString(15), fixedstring FixedString(15), date Date, datetime Date) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types3 (int8 Int8, uint8 Int8, int16 Int8, uint16 Int8, int32 Int8, uint32 Int8, int64 Int8, uint64 Int8, float32 Int8, float64 Int8, string FixedString(15), fixedstring FixedString(15), date Date, datetime Date) ENGINE = Memory"
# convert max type # convert max type
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types4 (int8 Int64, uint8 Int64, int16 Int64, uint16 Int64, int32 Int64, uint32 Int64, int64 Int64, uint64 Int64, float32 Int64, float64 Int64, string String, fixedstring String, date DateTime('Europe/Moscow'), datetime DateTime('Europe/Moscow')) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types4 (int8 Int64, uint8 Int64, int16 Int64, uint16 Int64, int32 Int64, uint32 Int64, int64 Int64, uint64 Int64, float32 Int64, float64 Int64, string String, fixedstring String, date DateTime, datetime DateTime) ENGINE = Memory"
${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types1 values ( -108, 108, -1016, 1116, -1032, 1132, -1064, 1164, -1.032, -1.064, 'string-0', 'fixedstring', '2001-02-03', '2002-02-03 04:05:06')" ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types1 values ( -108, 108, -1016, 1116, -1032, 1132, -1064, 1164, -1.032, -1.064, 'string-0', 'fixedstring', '2001-02-03', '2002-02-03 04:05:06')"
@ -85,8 +85,8 @@ ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types4 ORDER BY int8"
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types5" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types5"
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types6" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types6"
${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE arrow_types2" ${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE arrow_types2"
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types5 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime('Europe/Moscow'))) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types5 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime)) ENGINE = Memory"
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types6 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime('Europe/Moscow'))) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types6 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime)) ENGINE = Memory"
${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types5 values ( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)" ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types5 values ( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)"
${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT Arrow" > "${CLICKHOUSE_TMP}"/arrow_all_types_5.arrow ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT Arrow" > "${CLICKHOUSE_TMP}"/arrow_all_types_5.arrow
${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT Arrow" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types6 FORMAT Arrow" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT Arrow" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types6 FORMAT Arrow"

View File

@ -41,7 +41,7 @@ converted:
127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1.032 -1.064 string-2 fixedstring-2\0\0 2004-06-07 2004-02-03 04:05:06 127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1.032 -1.064 string-2 fixedstring-2\0\0 2004-06-07 2004-02-03 04:05:06
diff: diff:
dest: dest:
79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 1970-01-01 06:29:04 79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06 00:00:00
80 81 82 83 84 85 86 87 88 89 str02 fstr2\0\0\0\0\0\0\0\0\0\0 2005-03-04 2006-08-09 10:11:12 80 81 82 83 84 85 86 87 88 89 str02 fstr2\0\0\0\0\0\0\0\0\0\0 2005-03-04 2006-08-09 10:11:12
min: min:
-128 0 0 0 0 0 0 0 -1 -1 string-1\0\0\0\0\0\0\0 fixedstring-1\0\0 2003-04-05 2003-02-03 -128 0 0 0 0 0 0 0 -1 -1 string-1\0\0\0\0\0\0\0 fixedstring-1\0\0 2003-04-05 2003-02-03
@ -49,10 +49,10 @@ min:
79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06 79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06
127 -1 -1 -1 -1 -1 -1 -1 -1 -1 string-2\0\0\0\0\0\0\0 fixedstring-2\0\0 2004-06-07 2004-02-03 127 -1 -1 -1 -1 -1 -1 -1 -1 -1 string-2\0\0\0\0\0\0\0 fixedstring-2\0\0 2004-06-07 2004-02-03
max: max:
-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1 -1 string-1 fixedstring-1\0\0 1970-01-01 06:22:27 2003-02-03 04:05:06 -128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1 -1 string-1 fixedstring-1\0\0 2003-04-05 00:00:00 2003-02-03 04:05:06
-108 108 -1016 1116 -1032 1132 -1064 1164 -1 -1 string-0 fixedstring\0\0\0\0 1970-01-01 06:09:16 2002-02-03 04:05:06 -108 108 -1016 1116 -1032 1132 -1064 1164 -1 -1 string-0 fixedstring\0\0\0\0 2001-02-03 00:00:00 2002-02-03 04:05:06
80 81 82 83 84 85 86 87 88 89 str02 fstr2 2005-03-04 05:06:07 2006-08-09 10:11:12 80 81 82 83 84 85 86 87 88 89 str02 fstr2 2005-03-04 05:06:07 2006-08-09 10:11:12
127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1 -1 string-2 fixedstring-2\0\0 1970-01-01 06:29:36 2004-02-03 04:05:06 127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1 -1 string-2 fixedstring-2\0\0 2004-06-07 00:00:00 2004-02-03 04:05:06
dest from null: dest from null:
-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1.032 -1.064 string-1 fixedstring-1\0\0 2003-04-05 2003-02-03 04:05:06 -128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1.032 -1.064 string-1 fixedstring-1\0\0 2003-04-05 2003-02-03 04:05:06
-108 108 -1016 1116 -1032 1132 -1064 1164 -1.032 -1.064 string-0 fixedstring\0\0\0\0 2001-02-03 2002-02-03 04:05:06 -108 108 -1016 1116 -1032 1132 -1064 1164 -1.032 -1.064 string-0 fixedstring\0\0\0\0 2001-02-03 2002-02-03 04:05:06

View File

@ -39,12 +39,12 @@ ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types1"
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types2" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types2"
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types3" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types3"
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types4" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types4"
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types1 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime('Europe/Moscow')) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types1 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime) ENGINE = Memory"
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types2 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime('Europe/Moscow')) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types2 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime) ENGINE = Memory"
# convert min type # convert min type
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types3 (int8 Int8, uint8 Int8, int16 Int8, uint16 Int8, int32 Int8, uint32 Int8, int64 Int8, uint64 Int8, float32 Int8, float64 Int8, string FixedString(15), fixedstring FixedString(15), date Date, datetime Date) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types3 (int8 Int8, uint8 Int8, int16 Int8, uint16 Int8, int32 Int8, uint32 Int8, int64 Int8, uint64 Int8, float32 Int8, float64 Int8, string FixedString(15), fixedstring FixedString(15), date Date, datetime Date) ENGINE = Memory"
# convert max type # convert max type
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types4 (int8 Int64, uint8 Int64, int16 Int64, uint16 Int64, int32 Int64, uint32 Int64, int64 Int64, uint64 Int64, float32 Int64, float64 Int64, string String, fixedstring String, date DateTime('Europe/Moscow'), datetime DateTime('Europe/Moscow')) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types4 (int8 Int64, uint8 Int64, int16 Int64, uint16 Int64, int32 Int64, uint32 Int64, int64 Int64, uint64 Int64, float32 Int64, float64 Int64, string String, fixedstring String, date DateTime, datetime DateTime) ENGINE = Memory"
${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types1 values ( -108, 108, -1016, 1116, -1032, 1132, -1064, 1164, -1.032, -1.064, 'string-0', 'fixedstring', '2001-02-03', '2002-02-03 04:05:06')" ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types1 values ( -108, 108, -1016, 1116, -1032, 1132, -1064, 1164, -1.032, -1.064, 'string-0', 'fixedstring', '2001-02-03', '2002-02-03 04:05:06')"
@ -85,8 +85,8 @@ ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types4 ORDER BY int8"
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types5" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types5"
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types6" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types6"
${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE arrow_types2" ${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE arrow_types2"
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types5 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime('Europe/Moscow'))) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types5 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime)) ENGINE = Memory"
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types6 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime('Europe/Moscow'))) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types6 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime)) ENGINE = Memory"
${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types5 values ( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)" ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types5 values ( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)"
${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT ArrowStream" > "${CLICKHOUSE_TMP}"/arrow_all_types_5.arrow ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT ArrowStream" > "${CLICKHOUSE_TMP}"/arrow_all_types_5.arrow
${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT ArrowStream" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types6 FORMAT ArrowStream" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT ArrowStream" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types6 FORMAT ArrowStream"