diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index cc3ea467ab1..f4b082c57ab 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -2507,6 +2507,14 @@ Result: └───────────────┘ ``` +**Selecting Data** + +You can select data from a ClickHouse table and save them into some file in the Npy format by the following command: + +```bash +$ clickhouse-client --query="SELECT {column} FROM {some_table} FORMAT Npy" > {filename.npy} +``` + ## LineAsString {#lineasstring} In this format, every line of input data is interpreted as a single string value. This format can only be parsed for table with a single field of type [String](/docs/en/sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](/docs/en/sql-reference/statements/create/table.md/#default) or [MATERIALIZED](/docs/en/sql-reference/statements/create/table.md/#materialized), or omitted. diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index cc9cf380693..2070956883c 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -76,6 +76,8 @@ void registerInputFormatCustomSeparated(FormatFactory & factory); void registerOutputFormatCustomSeparated(FormatFactory & factory); void registerInputFormatCapnProto(FormatFactory & factory); void registerOutputFormatCapnProto(FormatFactory & factory); +void registerInputFormatNpy(FormatFactory & factory); +void registerOutputFormatNpy(FormatFactory & factory); /// Output only (presentational) formats. @@ -103,7 +105,6 @@ void registerInputFormatMySQLDump(FormatFactory & factory); void registerInputFormatParquetMetadata(FormatFactory & factory); void registerInputFormatDWARF(FormatFactory & factory); void registerInputFormatOne(FormatFactory & factory); -void registerInputFormatNpy(FormatFactory & factory); #if USE_HIVE void registerInputFormatHiveText(FormatFactory & factory); @@ -221,6 +222,8 @@ void registerFormats() registerOutputFormatAvro(factory); registerInputFormatArrow(factory); registerOutputFormatArrow(factory); + registerInputFormatNpy(factory); + registerOutputFormatNpy(factory); registerOutputFormatPretty(factory); registerOutputFormatPrettyCompact(factory); @@ -251,7 +254,6 @@ void registerFormats() registerInputFormatParquetMetadata(factory); registerInputFormatDWARF(factory); registerInputFormatOne(factory); - registerInputFormatNpy(factory); registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(factory); registerNonTrivialPrefixAndSuffixCheckerJSONAsString(factory); diff --git a/src/Processors/Formats/Impl/NpyOutputFormat.cpp b/src/Processors/Formats/Impl/NpyOutputFormat.cpp new file mode 100644 index 00000000000..4a4af67b07e --- /dev/null +++ b/src/Processors/Formats/Impl/NpyOutputFormat.cpp @@ -0,0 +1,223 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int TOO_MANY_COLUMNS; + extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; +} + +namespace +{ + +template +void writeNumpyNumbers(const ColumnPtr & column, WriteBuffer & buf) +{ + const auto * number_column = assert_cast(column.get()); + for (size_t i = 0; i < number_column->size(); ++i) + writeBinaryLittleEndian(ValueType(number_column->getElement(i)), buf); +} + +template +void writeNumpyStrings(const ColumnPtr & column, size_t length, WriteBuffer & buf) +{ + const auto * string_column = assert_cast(column.get()); + for (size_t i = 0; i < string_column->size(); ++i) + buf.write(string_column->getDataAt(i).data, length); +} + +} + +String NpyOutputFormat::NumpyDataType::str() +{ + std::ostringstream dtype; + dtype << endianness << type << std::to_string(size); + return dtype.str(); +} + +NpyOutputFormat::NpyOutputFormat(WriteBuffer & out_, const Block & header_) : IOutputFormat(header_, out_) +{ + const auto & header = getPort(PortKind::Main).getHeader(); + auto data_types = header.getDataTypes(); + if (data_types.size() > 1) + throw Exception(ErrorCodes::TOO_MANY_COLUMNS, "Expected single column for Npy output format, got {}", data_types.size()); + data_type = data_types[0]; +} + +void NpyOutputFormat::initialize(const ColumnPtr & column) +{ + auto type = data_type; + ColumnPtr nested_column = column; + while (type->getTypeId() == TypeIndex::Array) + { + const auto * array_column = assert_cast(nested_column.get()); + numpy_shape.push_back(array_column->getOffsets()[0]); + type = assert_cast(type.get())->getNestedType(); + nested_column = array_column->getDataPtr(); + } + + switch (type->getTypeId()) + { + case TypeIndex::Int8: numpy_data_type = NumpyDataType('<', 'i', sizeof(Int8)); break; + case TypeIndex::Int16: numpy_data_type = NumpyDataType('<', 'i', sizeof(Int16)); break; + case TypeIndex::Int32: numpy_data_type = NumpyDataType('<', 'i', sizeof(Int32)); break; + case TypeIndex::Int64: numpy_data_type = NumpyDataType('<', 'i', sizeof(Int64)); break; + case TypeIndex::UInt8: numpy_data_type = NumpyDataType('<', 'u', sizeof(UInt8)); break; + case TypeIndex::UInt16: numpy_data_type = NumpyDataType('<', 'u', sizeof(UInt16)); break; + case TypeIndex::UInt32: numpy_data_type = NumpyDataType('<', 'u', sizeof(UInt32)); break; + case TypeIndex::UInt64: numpy_data_type = NumpyDataType('<', 'u', sizeof(UInt64)); break; + case TypeIndex::Float32: numpy_data_type = NumpyDataType('<', 'f', sizeof(Float32)); break; + case TypeIndex::Float64: numpy_data_type = NumpyDataType('<', 'f', sizeof(Float64)); break; + case TypeIndex::FixedString: numpy_data_type = NumpyDataType('|', 'S', assert_cast(type.get())->getN()); break; + case TypeIndex::String: numpy_data_type = NumpyDataType('|', 'S', 0); break; + default: + has_exception = true; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Type {} is not supported for Npy output format", type->getName()); + } + nested_data_type = type; +} + +void NpyOutputFormat::consume(Chunk chunk) +{ + if (!has_exception) + { + num_rows += chunk.getNumRows(); + auto column = chunk.getColumns()[0]; + + if (!is_initialized) + { + initialize(column); + is_initialized = true; + } + + /// check shape + auto type = data_type; + ColumnPtr nested_column = column; + int dim = 0; + while (type->getTypeId() == TypeIndex::Array) + { + const auto * array_column = assert_cast(nested_column.get()); + const auto & array_offset = array_column->getOffsets(); + for (size_t i = 1; i < array_offset.size(); ++i) + { + if (array_offset[i] - array_offset[i - 1] != numpy_shape[dim]) + { + has_exception = true; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ClickHouse doesn't support object types, cannot format ragged nested sequences (which is a list of arrays with different shapes)"); + } + } + type = assert_cast(type.get())->getNestedType(); + nested_column = array_column->getDataPtr(); + dim++; + } + + /// for type String, get maximum string length + if (type->getTypeId() == TypeIndex::String) + { + const auto & string_offsets = assert_cast(nested_column.get())->getOffsets(); + for (size_t i = 0; i < string_offsets.size(); ++i) + { + size_t string_length = static_cast(string_offsets[i] - 1 - string_offsets[i - 1]); + numpy_data_type.size = numpy_data_type.size > string_length ? numpy_data_type.size : string_length; + } + } + + columns.push_back(nested_column); + } +} + +void NpyOutputFormat::finalizeImpl() +{ + if (!has_exception) + { + writeHeader(); + writeColumns(); + } +} + +void NpyOutputFormat::writeHeader() +{ + std::ostringstream static_header; + static_header << MAGIC_STRING << MAJOR_VERSION << MINOR_VERSION; + String static_header_str = static_header.str(); + + std::ostringstream shape; + shape << '(' << std::to_string(num_rows) << ','; + for (auto dim : numpy_shape) + shape << std::to_string(dim) << ','; + shape << ')'; + + std::ostringstream dict; + dict << "{'descr':'" << numpy_data_type.str() << "','fortran_order':False,'shape':" << shape.str() << ",}"; + String dict_str = dict.str(); + String padding_str = "\n"; + + /// completes the length of the header, which is divisible by 64. + size_t dict_length = dict_str.length() + 1; + size_t header_length = static_header_str.length() + sizeof(UInt32) + dict_length; + if (header_length % 64) + { + header_length = ((header_length / 64) + 1) * 64; + dict_length = header_length - static_header_str.length() - sizeof(UInt32); + padding_str = std::string(dict_length - dict_str.length(), '\x20'); + padding_str.back() = '\n'; + } + + out.write(static_header_str.data(), static_header_str.length()); + writeBinaryLittleEndian(assert_cast(dict_length), out); + out.write(dict_str.data(), dict_str.length()); + out.write(padding_str.data(), padding_str.length()); +} + +void NpyOutputFormat::writeColumns() +{ + for (auto column : columns) + { + switch (nested_data_type->getTypeId()) + { + case TypeIndex::Int8: writeNumpyNumbers(column, out); break; + case TypeIndex::Int16: writeNumpyNumbers(column, out); break; + case TypeIndex::Int32: writeNumpyNumbers(column, out); break; + case TypeIndex::Int64: writeNumpyNumbers(column, out); break; + case TypeIndex::UInt8: writeNumpyNumbers(column, out); break; + case TypeIndex::UInt16: writeNumpyNumbers(column, out); break; + case TypeIndex::UInt32: writeNumpyNumbers(column, out); break; + case TypeIndex::UInt64: writeNumpyNumbers(column, out); break; + case TypeIndex::Float32: writeNumpyNumbers(column, out); break; + case TypeIndex::Float64: writeNumpyNumbers(column, out); break; + case TypeIndex::FixedString: writeNumpyStrings(column, numpy_data_type.size, out); break; + case TypeIndex::String: writeNumpyStrings(column, numpy_data_type.size, out); break; + default: break; + } + } +} + +void registerOutputFormatNpy(FormatFactory & factory) +{ + factory.registerOutputFormat("Npy",[]( + WriteBuffer & buf, + const Block & sample, + const FormatSettings &) + { + return std::make_shared(buf, sample); + }); + factory.markFormatHasNoAppendSupport("Npy"); +} + +} diff --git a/src/Processors/Formats/Impl/NpyOutputFormat.h b/src/Processors/Formats/Impl/NpyOutputFormat.h new file mode 100644 index 00000000000..083aa928b7c --- /dev/null +++ b/src/Processors/Formats/Impl/NpyOutputFormat.h @@ -0,0 +1,66 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +namespace DB +{ + +/** Stream for output data in Npy format. + * https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html + */ +class NpyOutputFormat : public IOutputFormat +{ +public: + NpyOutputFormat(WriteBuffer & out_, const Block & header_); + + String getName() const override { return "NpyOutputFormat"; } + + String getContentType() const override { return "application/octet-stream"; } + +private: + struct NumpyDataType + { + char endianness; + char type; + size_t size; + + NumpyDataType() = default; + NumpyDataType(char endianness_, char type_, size_t size_) + : endianness(endianness_), type(type_), size(size_) {} + String str(); + }; + + void initialize(const ColumnPtr & column); + void consume(Chunk) override; + void finalizeImpl() override; + void writeHeader(); + void writeColumns(); + + bool is_initialized = false; + bool has_exception = false; + + DataTypePtr data_type; + DataTypePtr nested_data_type; + NumpyDataType numpy_data_type; + UInt64 num_rows = 0; + std::vector numpy_shape; + Columns columns; + + /// static header (version 3.0) + constexpr static auto MAGIC_STRING = "\x93NUMPY"; + constexpr static auto MAJOR_VERSION = '\x03'; + constexpr static auto MINOR_VERSION = '\x00'; +}; + +} diff --git a/tests/queries/0_stateless/02895_npy_output_format.reference b/tests/queries/0_stateless/02895_npy_output_format.reference new file mode 100644 index 00000000000..b599f1dceea --- /dev/null +++ b/tests/queries/0_stateless/02895_npy_output_format.reference @@ -0,0 +1,60 @@ +-1 +-1 +-1 +-1 +-1 +-1 +-1 +-1 +-1 +-1 +-1 +-1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +0.1 +0.1 +0.1 +0.01 +0.01 +0.01 +npy +npy +npy +npy +npy +npy +array Int8 +array Int16 +array Int32 +array Int64 +array UInt8 +array UInt16 +array UInt32 +array UInt64 +array Float32 +array Float64 +array String +array String +[[[1],[2]],[[3],[4]]] +[[[1],[2]],[[3],[4]]] +[[[1],[2]],[[3],[4]]] +[[0.1],[0.2]] +[[0.1],[0.2]] +[[0.1],[0.2]] +[['abb','bbc'],['ccc','dddd']] +[['abb','bbc'],['ccc','dddd']] +[['abb','bbc'],['ccc','dddd']] +array Array(Array(Array(Int8))) +array Array(Array(Float64)) +array Array(Array(String)) diff --git a/tests/queries/0_stateless/02895_npy_output_format.sh b/tests/queries/0_stateless/02895_npy_output_format.sh new file mode 100755 index 00000000000..e5226e88a8d --- /dev/null +++ b/tests/queries/0_stateless/02895_npy_output_format.sh @@ -0,0 +1,110 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +user_files_path=$($CLICKHOUSE_CLIENT_BINARY -q "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +chmod 777 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ + +${CLICKHOUSE_CLIENT} -q "DROP DATABASE IF EXISTS npy_output_02895;" +${CLICKHOUSE_CLIENT} -q "CREATE DATABASE IF NOT EXISTS npy_output_02895;" + +### test common type +${CLICKHOUSE_CLIENT} -q "CREATE TABLE IF NOT EXISTS npy_output_02895.common +( + i1 Int8, + i2 Int16, + i4 Int32, + i8 Int64, + u1 UInt8, + u2 UInt16, + u4 UInt32, + u8 UInt64, + f4 Float32, + f8 Float64, + fs FixedString(10), + s String, + unknow Int128 +) Engine = MergeTree ORDER BY i1;" + +${CLICKHOUSE_CLIENT} -q "INSERT INTO npy_output_02895.common VALUES (-1,-1,-1,-1,1,1,1,1,0.1,0.01,'npy','npy',1), (-1,-1,-1,-1,1,1,1,1,0.1,0.01,'npy','npy',1), (-1,-1,-1,-1,1,1,1,1,0.1,0.01,'npy','npy',1);" + +${CLICKHOUSE_CLIENT} -n -q "SELECT * FROM npy_output_02895.common FORMAT Npy; -- { clientError TOO_MANY_COLUMNS }" +${CLICKHOUSE_CLIENT} -n -q "SELECT unknow FROM npy_output_02895.common FORMAT Npy; -- { clientError BAD_ARGUMENTS }" + +${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int8.npy') SELECT i1 FROM npy_output_02895.common;" +${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int16.npy') SELECT i2 FROM npy_output_02895.common;" +${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int32.npy') SELECT i4 FROM npy_output_02895.common;" +${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int64.npy') SELECT i8 FROM npy_output_02895.common;" +${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint8.npy') SELECT u1 FROM npy_output_02895.common;" +${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint16.npy') SELECT u2 FROM npy_output_02895.common;" +${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint32.npy') SELECT u4 FROM npy_output_02895.common;" +${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint64.npy') SELECT u8 FROM npy_output_02895.common;" +${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_float32.npy') SELECT f4 FROM npy_output_02895.common;" +${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_float64.npy') SELECT f8 FROM npy_output_02895.common;" +${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_fixedstring.npy') SELECT fs FROM npy_output_02895.common;" +${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_string.npy') SELECT s FROM npy_output_02895.common;" + +${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int8.npy');" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int16.npy');" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int32.npy');" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int64.npy');" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint8.npy');" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint16.npy');" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint32.npy');" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint64.npy');" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_float32.npy');" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_float64.npy');" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_fixedstring.npy');" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_string.npy');" + +${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int8.npy');" +${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int16.npy');" +${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int32.npy');" +${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int64.npy');" +${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint8.npy');" +${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint16.npy');" +${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint32.npy');" +${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint64.npy');" +${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_float32.npy');" +${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_float64.npy');" +${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_fixedstring.npy');" +${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_string.npy');" + +### test nested type +${CLICKHOUSE_CLIENT} -q "CREATE TABLE IF NOT EXISTS npy_output_02895.nested +( + i4 Array(Array(Array(Int8))), + f8 Array(Array(Float64)), + s Array(Array(String)), + unknow Array(Int128), + ragged_1 Array(Array(Int32)), + ragged_2 Array(Array(Int32)) +) Engine = MergeTree ORDER BY i4;" + +${CLICKHOUSE_CLIENT} -q "INSERT INTO npy_output_02895.nested VALUES ([[[1], [2]], [[3], [4]]], [[0.1], [0.2]], [['a', 'bb'], ['ccc', 'dddd']], [1, 2], [[1, 2], [3, 4]], [[1, 2], [3]]), ([[[1], [2]], [[3], [4]]], [[0.1], [0.2]], [['a', 'bb'], ['ccc', 'dddd']], [1, 2], [[1, 2, 3], [4]], [[1, 2], [3]]), ([[[1], [2]], [[3], [4]]], [[0.1], [0.2]], [['a', 'bb'], ['ccc', 'dddd']], [1, 2], [[1], [2, 3, 4]], [[1, 2], [3]]);" + +${CLICKHOUSE_CLIENT} -n -q "SELECT * FROM npy_output_02895.nested FORMAT Npy; -- { clientError TOO_MANY_COLUMNS }" +${CLICKHOUSE_CLIENT} -n -q "SELECT unknow FROM npy_output_02895.nested FORMAT Npy; -- { clientError BAD_ARGUMENTS }" +${CLICKHOUSE_CLIENT} -n -q "SELECT ragged_1 FROM npy_output_02895.nested FORMAT Npy; -- { clientError ILLEGAL_COLUMN }" +${CLICKHOUSE_CLIENT} -n -q "SELECT ragged_2 FROM npy_output_02895.nested FORMAT Npy; -- { clientError ILLEGAL_COLUMN }" + +${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_int32.npy') SELECT i4 FROM npy_output_02895.nested;" +${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_float64.npy') SELECT f8 FROM npy_output_02895.nested;" +${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_string.npy') SELECT s FROM npy_output_02895.nested;" + +${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_int32.npy');" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_float64.npy');" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_string.npy');" + +${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_int32.npy');" +${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_float64.npy');" +${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_string.npy');" + +${CLICKHOUSE_CLIENT} -q "DROP DATABASE IF EXISTS npy_output_02895;" + +rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}