From d6c1593b2218f865e1108ded93a69cd0baed4096 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Jul 2021 17:36:21 +0300 Subject: [PATCH] Correct read of Date and UInt16 as DateTime in Arrow format --- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 91 ++++++++++--------- .../Formats/Impl/CHColumnToArrowColumn.cpp | 45 ++++----- .../queries/0_stateless/01273_arrow.reference | 8 +- tests/queries/0_stateless/01273_arrow.sh | 10 +- .../0_stateless/01273_arrow_stream.reference | 8 +- .../queries/0_stateless/01273_arrow_stream.sh | 10 +- 6 files changed, 84 insertions(+), 88 deletions(-) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index cf62812b755..68114a2170c 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -122,7 +123,7 @@ static void fillColumnWithStringData(std::shared_ptr & arro size_t chars_t_size = 0; for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) { - arrow::BinaryArray & chunk = static_cast(*(arrow_column->chunk(chunk_i))); + arrow::BinaryArray & chunk = assert_cast(*(arrow_column->chunk(chunk_i))); const size_t chunk_length = chunk.length(); if (chunk_length > 0) @@ -137,7 +138,7 @@ static void fillColumnWithStringData(std::shared_ptr & arro for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) { - arrow::BinaryArray & chunk = static_cast(*(arrow_column->chunk(chunk_i))); + arrow::BinaryArray & chunk = assert_cast(*(arrow_column->chunk(chunk_i))); std::shared_ptr buffer = chunk.value_data(); const size_t chunk_length = chunk.length(); @@ -162,7 +163,7 @@ static void fillColumnWithBooleanData(std::shared_ptr & arr for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) { - arrow::BooleanArray & chunk = static_cast(*(arrow_column->chunk(chunk_i))); + arrow::BooleanArray & chunk = assert_cast(*(arrow_column->chunk(chunk_i))); /// buffers[0] is a null bitmap and buffers[1] are actual values std::shared_ptr buffer = chunk.data()->buffers[1]; @@ -179,20 +180,16 @@ static void fillColumnWithDate32Data(std::shared_ptr & arro for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) { - arrow::Date32Array & chunk = static_cast(*(arrow_column->chunk(chunk_i))); + arrow::Date32Array & chunk = assert_cast(*(arrow_column->chunk(chunk_i))); for (size_t value_i = 0, length = static_cast(chunk.length()); value_i < length; ++value_i) { UInt32 days_num = static_cast(chunk.Value(value_i)); + if (days_num > DATE_LUT_MAX_DAY_NUM) - { - // TODO: will it rollback correctly? - throw Exception - { - ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, - "Input value {} of a column '{}' is greater than max allowed Date value, which is {}", days_num, internal_column.getName(), DATE_LUT_MAX_DAY_NUM - }; - } + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, + "Input value {} of a column '{}' is greater than max allowed Date value, which is {}", + days_num, internal_column.getName(), DATE_LUT_MAX_DAY_NUM); column_data.emplace_back(days_num); } @@ -206,18 +203,14 @@ static void fillDate32ColumnWithDate32Data(std::shared_ptr for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) { - arrow::Date32Array & chunk = static_cast(*(arrow_column->chunk(chunk_i))); + arrow::Date32Array & chunk = assert_cast(*(arrow_column->chunk(chunk_i))); for (size_t value_i = 0, length = static_cast(chunk.length()); value_i < length; ++value_i) { Int32 days_num = static_cast(chunk.Value(value_i)); if (days_num > DATE_LUT_MAX_EXTEND_DAY_NUM) - { - // TODO: will it rollback correctly? - throw Exception{ - ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, - "Input value {} of a column '{}' is greater than max allowed Date value, which is {}", days_num, internal_column.getName(), DATE_LUT_MAX_DAY_NUM}; - } + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, + "Input value {} of a column '{}' is greater than max allowed Date value, which is {}", days_num, internal_column.getName(), DATE_LUT_MAX_DAY_NUM); column_data.emplace_back(days_num); } @@ -232,7 +225,7 @@ static void fillColumnWithDate64Data(std::shared_ptr & arro for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) { - auto & chunk = static_cast(*(arrow_column->chunk(chunk_i))); + auto & chunk = assert_cast(*(arrow_column->chunk(chunk_i))); for (size_t value_i = 0, length = static_cast(chunk.length()); value_i < length; ++value_i) { auto timestamp = static_cast(chunk.Value(value_i) / 1000); // Always? in ms @@ -248,7 +241,7 @@ static void fillColumnWithTimestampData(std::shared_ptr & a for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) { - auto & chunk = static_cast(*(arrow_column->chunk(chunk_i))); + auto & chunk = assert_cast(*(arrow_column->chunk(chunk_i))); const auto & type = static_cast(*chunk.type()); UInt32 divide = 1; @@ -316,9 +309,9 @@ static void fillOffsetsFromArrowListColumn(std::shared_ptr for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) { - arrow::ListArray & list_chunk = static_cast(*(arrow_column->chunk(chunk_i))); + arrow::ListArray & list_chunk = assert_cast(*(arrow_column->chunk(chunk_i))); auto arrow_offsets_array = list_chunk.offsets(); - auto & arrow_offsets = static_cast(*arrow_offsets_array); + auto & arrow_offsets = assert_cast(*arrow_offsets_array); auto start = offsets_data.back(); for (int64_t i = 1; i < arrow_offsets.length(); ++i) offsets_data.emplace_back(start + arrow_offsets.Value(i)); @@ -353,7 +346,8 @@ static void readColumnFromArrowColumn( if (internal_column.isNullable()) { ColumnNullable & column_nullable = assert_cast(internal_column); - readColumnFromArrowColumn(arrow_column, column_nullable.getNestedColumn(), column_name, format_name, true, dictionary_values); + readColumnFromArrowColumn( + arrow_column, column_nullable.getNestedColumn(), column_name, format_name, true, dictionary_values); fillByteMapFromArrowColumn(arrow_column, column_nullable.getNullMapColumn()); return; } @@ -408,7 +402,7 @@ static void readColumnFromArrowColumn( array_vector.reserve(arrow_column->num_chunks()); for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) { - arrow::ListArray & list_chunk = static_cast(*(arrow_column->chunk(chunk_i))); + arrow::ListArray & list_chunk = assert_cast(*(arrow_column->chunk(chunk_i))); std::shared_ptr chunk = list_chunk.values(); array_vector.emplace_back(std::move(chunk)); } @@ -418,7 +412,9 @@ static void readColumnFromArrowColumn( ? assert_cast(internal_column).getNestedColumn() : assert_cast(internal_column); - readColumnFromArrowColumn(arrow_nested_column, column_array.getData(), column_name, format_name, false, dictionary_values); + readColumnFromArrowColumn( + arrow_nested_column, column_array.getData(), column_name, format_name, false, dictionary_values); + fillOffsetsFromArrowListColumn(arrow_column, column_array.getOffsetsColumn()); break; } @@ -429,7 +425,7 @@ static void readColumnFromArrowColumn( std::vector nested_arrow_columns(fields_count); for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) { - arrow::StructArray & struct_chunk = static_cast(*(arrow_column->chunk(chunk_i))); + arrow::StructArray & struct_chunk = assert_cast(*(arrow_column->chunk(chunk_i))); for (int i = 0; i < fields_count; ++i) nested_arrow_columns[i].emplace_back(struct_chunk.field(i)); } @@ -437,7 +433,8 @@ static void readColumnFromArrowColumn( for (int i = 0; i != fields_count; ++i) { auto nested_arrow_column = std::make_shared(nested_arrow_columns[i]); - readColumnFromArrowColumn(nested_arrow_column, column_tuple.getColumn(i), column_name, format_name, false, dictionary_values); + readColumnFromArrowColumn( + nested_arrow_column, column_tuple.getColumn(i), column_name, format_name, false, dictionary_values); } break; } @@ -445,13 +442,14 @@ static void readColumnFromArrowColumn( { ColumnLowCardinality & column_lc = assert_cast(internal_column); auto & dict_values = dictionary_values[column_name]; + /// Load dictionary values only once and reuse it. if (!dict_values) { arrow::ArrayVector dict_array; for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) { - arrow::DictionaryArray & dict_chunk = static_cast(*(arrow_column->chunk(chunk_i))); + arrow::DictionaryArray & dict_chunk = assert_cast(*(arrow_column->chunk(chunk_i))); dict_array.emplace_back(dict_chunk.dictionary()); } auto arrow_dict_column = std::make_shared(dict_array); @@ -459,7 +457,8 @@ static void readColumnFromArrowColumn( auto dict_column = IColumn::mutate(column_lc.getDictionaryPtr()); auto * uniq_column = static_cast(dict_column.get()); auto values_column = uniq_column->getNestedColumn()->cloneEmpty(); - readColumnFromArrowColumn(arrow_dict_column, *values_column, column_name, format_name, false, dictionary_values); + readColumnFromArrowColumn( + arrow_dict_column, *values_column, column_name, format_name, false, dictionary_values); uniq_column->uniqueInsertRangeFrom(*values_column, 0, values_column->size()); dict_values = std::move(dict_column); } @@ -467,7 +466,7 @@ static void readColumnFromArrowColumn( arrow::ArrayVector indexes_array; for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) { - arrow::DictionaryArray & dict_chunk = static_cast(*(arrow_column->chunk(chunk_i))); + arrow::DictionaryArray & dict_chunk = assert_cast(*(arrow_column->chunk(chunk_i))); indexes_array.emplace_back(dict_chunk.indices()); } @@ -489,11 +488,8 @@ static void readColumnFromArrowColumn( // TODO: read JSON as a string? // TODO: read UUID as a string? default: - throw Exception - { - ErrorCodes::UNKNOWN_TYPE, - "Unsupported {} type '{}' of an input column '{}'.", format_name, arrow_column->type()->name(), column_name - }; + throw Exception(ErrorCodes::UNKNOWN_TYPE, + "Unsupported {} type '{}' of an input column '{}'.", format_name, arrow_column->type()->name(), column_name); } } @@ -511,19 +507,19 @@ static DataTypePtr getInternalType( if (arrow_type->id() == arrow::Type::DECIMAL128) { - const auto * decimal_type = static_cast(arrow_type.get()); + const auto * decimal_type = assert_cast(arrow_type.get()); return std::make_shared>(decimal_type->precision(), decimal_type->scale()); } if (arrow_type->id() == arrow::Type::DECIMAL256) { - const auto * decimal_type = static_cast(arrow_type.get()); + const auto * decimal_type = assert_cast(arrow_type.get()); return std::make_shared>(decimal_type->precision(), decimal_type->scale()); } if (arrow_type->id() == arrow::Type::LIST) { - const auto * list_type = static_cast(arrow_type.get()); + const auto * list_type = assert_cast(arrow_type.get()); auto list_nested_type = list_type->value_type(); const DataTypeArray * array_type = typeid_cast(column_type.get()); @@ -536,7 +532,7 @@ static DataTypePtr getInternalType( if (arrow_type->id() == arrow::Type::STRUCT) { - const auto * struct_type = static_cast(arrow_type.get()); + const auto * struct_type = assert_cast(arrow_type.get()); const DataTypeTuple * tuple_type = typeid_cast(column_type.get()); if (!tuple_type) throw Exception{ErrorCodes::CANNOT_CONVERT_TYPE, @@ -546,14 +542,12 @@ static DataTypePtr getInternalType( int internal_fields_num = tuple_nested_types.size(); /// If internal column has less elements then arrow struct, we will select only first internal_fields_num columns. if (internal_fields_num > struct_type->num_fields()) - throw Exception - { + throw Exception( ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert arrow STRUCT with {} fields to a ClickHouse Tuple with {} elements: {}.", struct_type->num_fields(), internal_fields_num, - column_type->getName() - }; + column_type->getName()); DataTypes nested_types; for (int i = 0; i < internal_fields_num; ++i) @@ -564,7 +558,7 @@ static DataTypePtr getInternalType( if (arrow_type->id() == arrow::Type::DICTIONARY) { - const auto * arrow_dict_type = static_cast(arrow_type.get()); + const auto * arrow_dict_type = assert_cast(arrow_type.get()); const auto * lc_type = typeid_cast(column_type.get()); /// We allow to insert arrow dictionary into a non-LowCardinality column. const auto & dict_type = lc_type ? lc_type->getDictionaryType() : column_type; @@ -583,6 +577,13 @@ static DataTypePtr getInternalType( getInternalType(arrow_map_type->item_type(), map_type->getValueType(), column_name, format_name)); } + if (arrow_type->id() == arrow::Type::UINT16 + && (isDate(column_type) || isDateTime(column_type) || isDate32(column_type) || isDateTime64(column_type))) + { + /// Read UInt16 as Date. It will allow correct conversion to DateTime futher. + return std::make_shared(); + } + auto filter = [=](auto && elem) { auto which = WhichDataType(column_type); diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 35541def2ad..0f502b36162 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -70,11 +70,8 @@ namespace DB {"Float32", arrow::float32()}, {"Float64", arrow::float64()}, - //{"Date", arrow::date64()}, - //{"Date", arrow::date32()}, - {"Date", arrow::uint16()}, // CHECK - //{"DateTime", arrow::date64()}, // BUG! saves as date32 - {"DateTime", arrow::uint32()}, + {"Date", arrow::uint16()}, /// uint16 is used instead of date32, because Apache Arrow cannot correctly serialize Date32Array. + {"DateTime", arrow::uint32()}, /// uint32 is used instead of date64, because we don't need milliseconds. {"String", arrow::binary()}, {"FixedString", arrow::binary()}, @@ -289,11 +286,11 @@ namespace DB auto value_type = assert_cast(array_builder->type().get())->value_type(); #define DISPATCH(ARROW_TYPE_ID, ARROW_TYPE) \ - if (arrow::Type::ARROW_TYPE_ID == value_type->id()) \ - { \ - fillArrowArrayWithLowCardinalityColumnDataImpl(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, dictionary_values); \ - return; \ - } + if (arrow::Type::ARROW_TYPE_ID == value_type->id()) \ + { \ + fillArrowArrayWithLowCardinalityColumnDataImpl(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, dictionary_values); \ + return; \ + } FOR_ARROW_TYPES(DISPATCH) #undef DISPATCH @@ -361,7 +358,6 @@ namespace DB size_t end) { const auto & internal_data = assert_cast &>(*write_column).getData(); - //arrow::Date64Builder builder; arrow::UInt32Builder & builder = assert_cast(*array_builder); arrow::Status status; @@ -370,8 +366,6 @@ namespace DB if (null_bytemap && (*null_bytemap)[value_i]) status = builder.AppendNull(); else - /// Implicitly converts UInt16 to Int32 - //status = date_builder.Append(static_cast(internal_data[value_i]) * 1000); // now ms. TODO check other units status = builder.Append(internal_data[value_i]); checkStatus(status, write_column->getName(), format_name); @@ -526,14 +520,15 @@ namespace DB } } - static std::shared_ptr getArrowType(DataTypePtr column_type, ColumnPtr column, const std::string & column_name, const std::string & format_name, bool * is_column_nullable) + static std::shared_ptr getArrowType( + DataTypePtr column_type, ColumnPtr column, const std::string & column_name, const std::string & format_name, bool * out_is_column_nullable) { if (column_type->isNullable()) { DataTypePtr nested_type = assert_cast(column_type.get())->getNestedType(); ColumnPtr nested_column = assert_cast(column.get())->getNestedColumnPtr(); - auto arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, is_column_nullable); - *is_column_nullable = true; + auto arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, out_is_column_nullable); + *out_is_column_nullable = true; return arrow_type; } @@ -566,7 +561,7 @@ namespace DB { auto nested_type = assert_cast(column_type.get())->getNestedType(); auto nested_column = assert_cast(column.get())->getDataPtr(); - auto nested_arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, is_column_nullable); + auto nested_arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, out_is_column_nullable); return arrow::list(nested_arrow_type); } @@ -578,8 +573,8 @@ namespace DB for (size_t i = 0; i != nested_types.size(); ++i) { String name = column_name + "." + std::to_string(i); - auto nested_arrow_type = getArrowType(nested_types[i], tuple_column->getColumnPtr(i), name, format_name, is_column_nullable); - nested_fields.push_back(std::make_shared(name, nested_arrow_type, *is_column_nullable)); + auto nested_arrow_type = getArrowType(nested_types[i], tuple_column->getColumnPtr(i), name, format_name, out_is_column_nullable); + nested_fields.push_back(std::make_shared(name, nested_arrow_type, *out_is_column_nullable)); } return arrow::struct_(std::move(nested_fields)); } @@ -592,7 +587,7 @@ namespace DB const auto & indexes_column = lc_column->getIndexesPtr(); return arrow::dictionary( getArrowTypeForLowCardinalityIndexes(indexes_column), - getArrowType(nested_type, nested_column, column_name, format_name, is_column_nullable)); + getArrowType(nested_type, nested_column, column_name, format_name, out_is_column_nullable)); } if (isMap(column_type)) @@ -603,9 +598,8 @@ namespace DB const auto & columns = assert_cast(column.get())->getNestedData().getColumns(); return arrow::map( - getArrowType(key_type, columns[0], column_name, format_name, is_column_nullable), - getArrowType(val_type, columns[1], column_name, format_name, is_column_nullable) - ); + getArrowType(key_type, columns[0], column_name, format_name, out_is_column_nullable), + getArrowType(val_type, columns[1], column_name, format_name, out_is_column_nullable)); } const std::string type_name = column_type->getFamilyName(); @@ -618,8 +612,9 @@ namespace DB return arrow_type_it->second; } - throw Exception{fmt::format("The type '{}' of a column '{}' is not supported for conversion into {} data format.", column_type->getName(), column_name, format_name), - ErrorCodes::UNKNOWN_TYPE}; + throw Exception(ErrorCodes::UNKNOWN_TYPE, + "The type '{}' of a column '{}' is not supported for conversion into {} data format.", + column_type->getName(), column_name, format_name); } CHColumnToArrowColumn::CHColumnToArrowColumn(const Block & header, const std::string & format_name_, bool low_cardinality_as_dictionary_) diff --git a/tests/queries/0_stateless/01273_arrow.reference b/tests/queries/0_stateless/01273_arrow.reference index 0dc503f65e4..89eca82f8ef 100644 --- a/tests/queries/0_stateless/01273_arrow.reference +++ b/tests/queries/0_stateless/01273_arrow.reference @@ -41,7 +41,7 @@ converted: 127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1.032 -1.064 string-2 fixedstring-2\0\0 2004-06-07 2004-02-03 04:05:06 diff: dest: -79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 1970-01-01 06:29:04 +79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06 00:00:00 80 81 82 83 84 85 86 87 88 89 str02 fstr2\0\0\0\0\0\0\0\0\0\0 2005-03-04 2006-08-09 10:11:12 min: -128 0 0 0 0 0 0 0 -1 -1 string-1\0\0\0\0\0\0\0 fixedstring-1\0\0 2003-04-05 2003-02-03 @@ -49,10 +49,10 @@ min: 79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06 127 -1 -1 -1 -1 -1 -1 -1 -1 -1 string-2\0\0\0\0\0\0\0 fixedstring-2\0\0 2004-06-07 2004-02-03 max: --128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1 -1 string-1 fixedstring-1\0\0 1970-01-01 06:22:27 2003-02-03 04:05:06 --108 108 -1016 1116 -1032 1132 -1064 1164 -1 -1 string-0 fixedstring\0\0\0\0 1970-01-01 06:09:16 2002-02-03 04:05:06 +-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1 -1 string-1 fixedstring-1\0\0 2003-04-05 00:00:00 2003-02-03 04:05:06 +-108 108 -1016 1116 -1032 1132 -1064 1164 -1 -1 string-0 fixedstring\0\0\0\0 2001-02-03 00:00:00 2002-02-03 04:05:06 80 81 82 83 84 85 86 87 88 89 str02 fstr2 2005-03-04 05:06:07 2006-08-09 10:11:12 -127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1 -1 string-2 fixedstring-2\0\0 1970-01-01 06:29:36 2004-02-03 04:05:06 +127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1 -1 string-2 fixedstring-2\0\0 2004-06-07 00:00:00 2004-02-03 04:05:06 dest from null: -128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1.032 -1.064 string-1 fixedstring-1\0\0 2003-04-05 2003-02-03 04:05:06 -108 108 -1016 1116 -1032 1132 -1064 1164 -1.032 -1.064 string-0 fixedstring\0\0\0\0 2001-02-03 2002-02-03 04:05:06 diff --git a/tests/queries/0_stateless/01273_arrow.sh b/tests/queries/0_stateless/01273_arrow.sh index e5f6b58b3bb..ad8a6f0fdb9 100755 --- a/tests/queries/0_stateless/01273_arrow.sh +++ b/tests/queries/0_stateless/01273_arrow.sh @@ -39,12 +39,12 @@ ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types1" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types2" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types3" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types4" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types1 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime('Europe/Moscow')) ENGINE = Memory" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types2 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime('Europe/Moscow')) ENGINE = Memory" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types1 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime) ENGINE = Memory" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types2 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime) ENGINE = Memory" # convert min type ${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types3 (int8 Int8, uint8 Int8, int16 Int8, uint16 Int8, int32 Int8, uint32 Int8, int64 Int8, uint64 Int8, float32 Int8, float64 Int8, string FixedString(15), fixedstring FixedString(15), date Date, datetime Date) ENGINE = Memory" # convert max type -${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types4 (int8 Int64, uint8 Int64, int16 Int64, uint16 Int64, int32 Int64, uint32 Int64, int64 Int64, uint64 Int64, float32 Int64, float64 Int64, string String, fixedstring String, date DateTime('Europe/Moscow'), datetime DateTime('Europe/Moscow')) ENGINE = Memory" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types4 (int8 Int64, uint8 Int64, int16 Int64, uint16 Int64, int32 Int64, uint32 Int64, int64 Int64, uint64 Int64, float32 Int64, float64 Int64, string String, fixedstring String, date DateTime, datetime DateTime) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types1 values ( -108, 108, -1016, 1116, -1032, 1132, -1064, 1164, -1.032, -1.064, 'string-0', 'fixedstring', '2001-02-03', '2002-02-03 04:05:06')" @@ -85,8 +85,8 @@ ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types4 ORDER BY int8" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types5" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types6" ${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE arrow_types2" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types5 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime('Europe/Moscow'))) ENGINE = Memory" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types6 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime('Europe/Moscow'))) ENGINE = Memory" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types5 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime)) ENGINE = Memory" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types6 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime)) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types5 values ( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT Arrow" > "${CLICKHOUSE_TMP}"/arrow_all_types_5.arrow ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT Arrow" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types6 FORMAT Arrow" diff --git a/tests/queries/0_stateless/01273_arrow_stream.reference b/tests/queries/0_stateless/01273_arrow_stream.reference index 0dc503f65e4..89eca82f8ef 100644 --- a/tests/queries/0_stateless/01273_arrow_stream.reference +++ b/tests/queries/0_stateless/01273_arrow_stream.reference @@ -41,7 +41,7 @@ converted: 127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1.032 -1.064 string-2 fixedstring-2\0\0 2004-06-07 2004-02-03 04:05:06 diff: dest: -79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 1970-01-01 06:29:04 +79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06 00:00:00 80 81 82 83 84 85 86 87 88 89 str02 fstr2\0\0\0\0\0\0\0\0\0\0 2005-03-04 2006-08-09 10:11:12 min: -128 0 0 0 0 0 0 0 -1 -1 string-1\0\0\0\0\0\0\0 fixedstring-1\0\0 2003-04-05 2003-02-03 @@ -49,10 +49,10 @@ min: 79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06 127 -1 -1 -1 -1 -1 -1 -1 -1 -1 string-2\0\0\0\0\0\0\0 fixedstring-2\0\0 2004-06-07 2004-02-03 max: --128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1 -1 string-1 fixedstring-1\0\0 1970-01-01 06:22:27 2003-02-03 04:05:06 --108 108 -1016 1116 -1032 1132 -1064 1164 -1 -1 string-0 fixedstring\0\0\0\0 1970-01-01 06:09:16 2002-02-03 04:05:06 +-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1 -1 string-1 fixedstring-1\0\0 2003-04-05 00:00:00 2003-02-03 04:05:06 +-108 108 -1016 1116 -1032 1132 -1064 1164 -1 -1 string-0 fixedstring\0\0\0\0 2001-02-03 00:00:00 2002-02-03 04:05:06 80 81 82 83 84 85 86 87 88 89 str02 fstr2 2005-03-04 05:06:07 2006-08-09 10:11:12 -127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1 -1 string-2 fixedstring-2\0\0 1970-01-01 06:29:36 2004-02-03 04:05:06 +127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1 -1 string-2 fixedstring-2\0\0 2004-06-07 00:00:00 2004-02-03 04:05:06 dest from null: -128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1.032 -1.064 string-1 fixedstring-1\0\0 2003-04-05 2003-02-03 04:05:06 -108 108 -1016 1116 -1032 1132 -1064 1164 -1.032 -1.064 string-0 fixedstring\0\0\0\0 2001-02-03 2002-02-03 04:05:06 diff --git a/tests/queries/0_stateless/01273_arrow_stream.sh b/tests/queries/0_stateless/01273_arrow_stream.sh index 6646cd90399..af5931a4bce 100755 --- a/tests/queries/0_stateless/01273_arrow_stream.sh +++ b/tests/queries/0_stateless/01273_arrow_stream.sh @@ -39,12 +39,12 @@ ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types1" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types2" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types3" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types4" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types1 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime('Europe/Moscow')) ENGINE = Memory" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types2 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime('Europe/Moscow')) ENGINE = Memory" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types1 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime) ENGINE = Memory" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types2 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime) ENGINE = Memory" # convert min type ${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types3 (int8 Int8, uint8 Int8, int16 Int8, uint16 Int8, int32 Int8, uint32 Int8, int64 Int8, uint64 Int8, float32 Int8, float64 Int8, string FixedString(15), fixedstring FixedString(15), date Date, datetime Date) ENGINE = Memory" # convert max type -${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types4 (int8 Int64, uint8 Int64, int16 Int64, uint16 Int64, int32 Int64, uint32 Int64, int64 Int64, uint64 Int64, float32 Int64, float64 Int64, string String, fixedstring String, date DateTime('Europe/Moscow'), datetime DateTime('Europe/Moscow')) ENGINE = Memory" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types4 (int8 Int64, uint8 Int64, int16 Int64, uint16 Int64, int32 Int64, uint32 Int64, int64 Int64, uint64 Int64, float32 Int64, float64 Int64, string String, fixedstring String, date DateTime, datetime DateTime) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types1 values ( -108, 108, -1016, 1116, -1032, 1132, -1064, 1164, -1.032, -1.064, 'string-0', 'fixedstring', '2001-02-03', '2002-02-03 04:05:06')" @@ -85,8 +85,8 @@ ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types4 ORDER BY int8" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types5" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types6" ${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE arrow_types2" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types5 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime('Europe/Moscow'))) ENGINE = Memory" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types6 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime('Europe/Moscow'))) ENGINE = Memory" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types5 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime)) ENGINE = Memory" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types6 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime)) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types5 values ( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT ArrowStream" > "${CLICKHOUSE_TMP}"/arrow_all_types_5.arrow ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT ArrowStream" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types6 FORMAT ArrowStream"