mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
[feature] add npy output format
This commit is contained in:
parent
08f700118f
commit
c0174fa17e
@ -2507,6 +2507,14 @@ Result:
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
**Selecting Data**
|
||||
|
||||
You can select data from a ClickHouse table and save them into some file in the Npy format by the following command:
|
||||
|
||||
```bash
|
||||
$ clickhouse-client --query="SELECT {column} FROM {some_table} FORMAT Npy" > {filename.npy}
|
||||
```
|
||||
|
||||
## LineAsString {#lineasstring}
|
||||
|
||||
In this format, every line of input data is interpreted as a single string value. This format can only be parsed for table with a single field of type [String](/docs/en/sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](/docs/en/sql-reference/statements/create/table.md/#default) or [MATERIALIZED](/docs/en/sql-reference/statements/create/table.md/#materialized), or omitted.
|
||||
|
@ -76,6 +76,8 @@ void registerInputFormatCustomSeparated(FormatFactory & factory);
|
||||
void registerOutputFormatCustomSeparated(FormatFactory & factory);
|
||||
void registerInputFormatCapnProto(FormatFactory & factory);
|
||||
void registerOutputFormatCapnProto(FormatFactory & factory);
|
||||
void registerInputFormatNpy(FormatFactory & factory);
|
||||
void registerOutputFormatNpy(FormatFactory & factory);
|
||||
|
||||
/// Output only (presentational) formats.
|
||||
|
||||
@ -103,7 +105,6 @@ void registerInputFormatMySQLDump(FormatFactory & factory);
|
||||
void registerInputFormatParquetMetadata(FormatFactory & factory);
|
||||
void registerInputFormatDWARF(FormatFactory & factory);
|
||||
void registerInputFormatOne(FormatFactory & factory);
|
||||
void registerInputFormatNpy(FormatFactory & factory);
|
||||
|
||||
#if USE_HIVE
|
||||
void registerInputFormatHiveText(FormatFactory & factory);
|
||||
@ -221,6 +222,8 @@ void registerFormats()
|
||||
registerOutputFormatAvro(factory);
|
||||
registerInputFormatArrow(factory);
|
||||
registerOutputFormatArrow(factory);
|
||||
registerInputFormatNpy(factory);
|
||||
registerOutputFormatNpy(factory);
|
||||
|
||||
registerOutputFormatPretty(factory);
|
||||
registerOutputFormatPrettyCompact(factory);
|
||||
@ -251,7 +254,6 @@ void registerFormats()
|
||||
registerInputFormatParquetMetadata(factory);
|
||||
registerInputFormatDWARF(factory);
|
||||
registerInputFormatOne(factory);
|
||||
registerInputFormatNpy(factory);
|
||||
|
||||
registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(factory);
|
||||
registerNonTrivialPrefixAndSuffixCheckerJSONAsString(factory);
|
||||
|
223
src/Processors/Formats/Impl/NpyOutputFormat.cpp
Normal file
223
src/Processors/Formats/Impl/NpyOutputFormat.cpp
Normal file
@ -0,0 +1,223 @@
|
||||
#include <Processors/Formats/Impl/NpyOutputFormat.h>
|
||||
|
||||
#include <Core/TypeId.h>
|
||||
#include <DataTypes/DataTypeFixedString.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_MANY_COLUMNS;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename ColumnType, typename ValueType>
|
||||
void writeNumpyNumbers(const ColumnPtr & column, WriteBuffer & buf)
|
||||
{
|
||||
const auto * number_column = assert_cast<const ColumnType *>(column.get());
|
||||
for (size_t i = 0; i < number_column->size(); ++i)
|
||||
writeBinaryLittleEndian(ValueType(number_column->getElement(i)), buf);
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
void writeNumpyStrings(const ColumnPtr & column, size_t length, WriteBuffer & buf)
|
||||
{
|
||||
const auto * string_column = assert_cast<const ColumnType *>(column.get());
|
||||
for (size_t i = 0; i < string_column->size(); ++i)
|
||||
buf.write(string_column->getDataAt(i).data, length);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
String NpyOutputFormat::NumpyDataType::str()
|
||||
{
|
||||
std::ostringstream dtype;
|
||||
dtype << endianness << type << std::to_string(size);
|
||||
return dtype.str();
|
||||
}
|
||||
|
||||
NpyOutputFormat::NpyOutputFormat(WriteBuffer & out_, const Block & header_) : IOutputFormat(header_, out_)
|
||||
{
|
||||
const auto & header = getPort(PortKind::Main).getHeader();
|
||||
auto data_types = header.getDataTypes();
|
||||
if (data_types.size() > 1)
|
||||
throw Exception(ErrorCodes::TOO_MANY_COLUMNS, "Expected single column for Npy output format, got {}", data_types.size());
|
||||
data_type = data_types[0];
|
||||
}
|
||||
|
||||
void NpyOutputFormat::initialize(const ColumnPtr & column)
|
||||
{
|
||||
auto type = data_type;
|
||||
ColumnPtr nested_column = column;
|
||||
while (type->getTypeId() == TypeIndex::Array)
|
||||
{
|
||||
const auto * array_column = assert_cast<const ColumnArray *>(nested_column.get());
|
||||
numpy_shape.push_back(array_column->getOffsets()[0]);
|
||||
type = assert_cast<const DataTypeArray *>(type.get())->getNestedType();
|
||||
nested_column = array_column->getDataPtr();
|
||||
}
|
||||
|
||||
switch (type->getTypeId())
|
||||
{
|
||||
case TypeIndex::Int8: numpy_data_type = NumpyDataType('<', 'i', sizeof(Int8)); break;
|
||||
case TypeIndex::Int16: numpy_data_type = NumpyDataType('<', 'i', sizeof(Int16)); break;
|
||||
case TypeIndex::Int32: numpy_data_type = NumpyDataType('<', 'i', sizeof(Int32)); break;
|
||||
case TypeIndex::Int64: numpy_data_type = NumpyDataType('<', 'i', sizeof(Int64)); break;
|
||||
case TypeIndex::UInt8: numpy_data_type = NumpyDataType('<', 'u', sizeof(UInt8)); break;
|
||||
case TypeIndex::UInt16: numpy_data_type = NumpyDataType('<', 'u', sizeof(UInt16)); break;
|
||||
case TypeIndex::UInt32: numpy_data_type = NumpyDataType('<', 'u', sizeof(UInt32)); break;
|
||||
case TypeIndex::UInt64: numpy_data_type = NumpyDataType('<', 'u', sizeof(UInt64)); break;
|
||||
case TypeIndex::Float32: numpy_data_type = NumpyDataType('<', 'f', sizeof(Float32)); break;
|
||||
case TypeIndex::Float64: numpy_data_type = NumpyDataType('<', 'f', sizeof(Float64)); break;
|
||||
case TypeIndex::FixedString: numpy_data_type = NumpyDataType('|', 'S', assert_cast<const DataTypeFixedString *>(type.get())->getN()); break;
|
||||
case TypeIndex::String: numpy_data_type = NumpyDataType('|', 'S', 0); break;
|
||||
default:
|
||||
has_exception = true;
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Type {} is not supported for Npy output format", type->getName());
|
||||
}
|
||||
nested_data_type = type;
|
||||
}
|
||||
|
||||
void NpyOutputFormat::consume(Chunk chunk)
|
||||
{
|
||||
if (!has_exception)
|
||||
{
|
||||
num_rows += chunk.getNumRows();
|
||||
auto column = chunk.getColumns()[0];
|
||||
|
||||
if (!is_initialized)
|
||||
{
|
||||
initialize(column);
|
||||
is_initialized = true;
|
||||
}
|
||||
|
||||
/// check shape
|
||||
auto type = data_type;
|
||||
ColumnPtr nested_column = column;
|
||||
int dim = 0;
|
||||
while (type->getTypeId() == TypeIndex::Array)
|
||||
{
|
||||
const auto * array_column = assert_cast<const ColumnArray *>(nested_column.get());
|
||||
const auto & array_offset = array_column->getOffsets();
|
||||
for (size_t i = 1; i < array_offset.size(); ++i)
|
||||
{
|
||||
if (array_offset[i] - array_offset[i - 1] != numpy_shape[dim])
|
||||
{
|
||||
has_exception = true;
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ClickHouse doesn't support object types, cannot format ragged nested sequences (which is a list of arrays with different shapes)");
|
||||
}
|
||||
}
|
||||
type = assert_cast<const DataTypeArray *>(type.get())->getNestedType();
|
||||
nested_column = array_column->getDataPtr();
|
||||
dim++;
|
||||
}
|
||||
|
||||
/// for type String, get maximum string length
|
||||
if (type->getTypeId() == TypeIndex::String)
|
||||
{
|
||||
const auto & string_offsets = assert_cast<const ColumnString *>(nested_column.get())->getOffsets();
|
||||
for (size_t i = 0; i < string_offsets.size(); ++i)
|
||||
{
|
||||
size_t string_length = static_cast<size_t>(string_offsets[i] - 1 - string_offsets[i - 1]);
|
||||
numpy_data_type.size = numpy_data_type.size > string_length ? numpy_data_type.size : string_length;
|
||||
}
|
||||
}
|
||||
|
||||
columns.push_back(nested_column);
|
||||
}
|
||||
}
|
||||
|
||||
void NpyOutputFormat::finalizeImpl()
|
||||
{
|
||||
if (!has_exception)
|
||||
{
|
||||
writeHeader();
|
||||
writeColumns();
|
||||
}
|
||||
}
|
||||
|
||||
void NpyOutputFormat::writeHeader()
|
||||
{
|
||||
std::ostringstream static_header;
|
||||
static_header << MAGIC_STRING << MAJOR_VERSION << MINOR_VERSION;
|
||||
String static_header_str = static_header.str();
|
||||
|
||||
std::ostringstream shape;
|
||||
shape << '(' << std::to_string(num_rows) << ',';
|
||||
for (auto dim : numpy_shape)
|
||||
shape << std::to_string(dim) << ',';
|
||||
shape << ')';
|
||||
|
||||
std::ostringstream dict;
|
||||
dict << "{'descr':'" << numpy_data_type.str() << "','fortran_order':False,'shape':" << shape.str() << ",}";
|
||||
String dict_str = dict.str();
|
||||
String padding_str = "\n";
|
||||
|
||||
/// completes the length of the header, which is divisible by 64.
|
||||
size_t dict_length = dict_str.length() + 1;
|
||||
size_t header_length = static_header_str.length() + sizeof(UInt32) + dict_length;
|
||||
if (header_length % 64)
|
||||
{
|
||||
header_length = ((header_length / 64) + 1) * 64;
|
||||
dict_length = header_length - static_header_str.length() - sizeof(UInt32);
|
||||
padding_str = std::string(dict_length - dict_str.length(), '\x20');
|
||||
padding_str.back() = '\n';
|
||||
}
|
||||
|
||||
out.write(static_header_str.data(), static_header_str.length());
|
||||
writeBinaryLittleEndian(assert_cast<UInt32>(dict_length), out);
|
||||
out.write(dict_str.data(), dict_str.length());
|
||||
out.write(padding_str.data(), padding_str.length());
|
||||
}
|
||||
|
||||
void NpyOutputFormat::writeColumns()
|
||||
{
|
||||
for (auto column : columns)
|
||||
{
|
||||
switch (nested_data_type->getTypeId())
|
||||
{
|
||||
case TypeIndex::Int8: writeNumpyNumbers<ColumnInt8, Int8>(column, out); break;
|
||||
case TypeIndex::Int16: writeNumpyNumbers<ColumnInt16, Int16>(column, out); break;
|
||||
case TypeIndex::Int32: writeNumpyNumbers<ColumnInt32, Int32>(column, out); break;
|
||||
case TypeIndex::Int64: writeNumpyNumbers<ColumnInt64, Int64>(column, out); break;
|
||||
case TypeIndex::UInt8: writeNumpyNumbers<ColumnUInt8, UInt8>(column, out); break;
|
||||
case TypeIndex::UInt16: writeNumpyNumbers<ColumnUInt16, UInt16>(column, out); break;
|
||||
case TypeIndex::UInt32: writeNumpyNumbers<ColumnUInt32, UInt32>(column, out); break;
|
||||
case TypeIndex::UInt64: writeNumpyNumbers<ColumnUInt64, UInt64>(column, out); break;
|
||||
case TypeIndex::Float32: writeNumpyNumbers<ColumnFloat32, Float32>(column, out); break;
|
||||
case TypeIndex::Float64: writeNumpyNumbers<ColumnFloat64, Float64>(column, out); break;
|
||||
case TypeIndex::FixedString: writeNumpyStrings<ColumnFixedString>(column, numpy_data_type.size, out); break;
|
||||
case TypeIndex::String: writeNumpyStrings<ColumnString>(column, numpy_data_type.size, out); break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void registerOutputFormatNpy(FormatFactory & factory)
|
||||
{
|
||||
factory.registerOutputFormat("Npy",[](
|
||||
WriteBuffer & buf,
|
||||
const Block & sample,
|
||||
const FormatSettings &)
|
||||
{
|
||||
return std::make_shared<NpyOutputFormat>(buf, sample);
|
||||
});
|
||||
factory.markFormatHasNoAppendSupport("Npy");
|
||||
}
|
||||
|
||||
}
|
66
src/Processors/Formats/Impl/NpyOutputFormat.h
Normal file
66
src/Processors/Formats/Impl/NpyOutputFormat.h
Normal file
@ -0,0 +1,66 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Block.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/WriteBufferFromVector.h>
|
||||
#include <Processors/Formats/IRowOutputFormat.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Common/PODArray_fwd.h>
|
||||
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Stream for output data in Npy format.
|
||||
* https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html
|
||||
*/
|
||||
class NpyOutputFormat : public IOutputFormat
|
||||
{
|
||||
public:
|
||||
NpyOutputFormat(WriteBuffer & out_, const Block & header_);
|
||||
|
||||
String getName() const override { return "NpyOutputFormat"; }
|
||||
|
||||
String getContentType() const override { return "application/octet-stream"; }
|
||||
|
||||
private:
|
||||
struct NumpyDataType
|
||||
{
|
||||
char endianness;
|
||||
char type;
|
||||
size_t size;
|
||||
|
||||
NumpyDataType() = default;
|
||||
NumpyDataType(char endianness_, char type_, size_t size_)
|
||||
: endianness(endianness_), type(type_), size(size_) {}
|
||||
String str();
|
||||
};
|
||||
|
||||
void initialize(const ColumnPtr & column);
|
||||
void consume(Chunk) override;
|
||||
void finalizeImpl() override;
|
||||
void writeHeader();
|
||||
void writeColumns();
|
||||
|
||||
bool is_initialized = false;
|
||||
bool has_exception = false;
|
||||
|
||||
DataTypePtr data_type;
|
||||
DataTypePtr nested_data_type;
|
||||
NumpyDataType numpy_data_type;
|
||||
UInt64 num_rows = 0;
|
||||
std::vector<UInt64> numpy_shape;
|
||||
Columns columns;
|
||||
|
||||
/// static header (version 3.0)
|
||||
constexpr static auto MAGIC_STRING = "\x93NUMPY";
|
||||
constexpr static auto MAJOR_VERSION = '\x03';
|
||||
constexpr static auto MINOR_VERSION = '\x00';
|
||||
};
|
||||
|
||||
}
|
60
tests/queries/0_stateless/02895_npy_output_format.reference
Normal file
60
tests/queries/0_stateless/02895_npy_output_format.reference
Normal file
@ -0,0 +1,60 @@
|
||||
-1
|
||||
-1
|
||||
-1
|
||||
-1
|
||||
-1
|
||||
-1
|
||||
-1
|
||||
-1
|
||||
-1
|
||||
-1
|
||||
-1
|
||||
-1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
0.1
|
||||
0.1
|
||||
0.1
|
||||
0.01
|
||||
0.01
|
||||
0.01
|
||||
npy
|
||||
npy
|
||||
npy
|
||||
npy
|
||||
npy
|
||||
npy
|
||||
array Int8
|
||||
array Int16
|
||||
array Int32
|
||||
array Int64
|
||||
array UInt8
|
||||
array UInt16
|
||||
array UInt32
|
||||
array UInt64
|
||||
array Float32
|
||||
array Float64
|
||||
array String
|
||||
array String
|
||||
[[[1],[2]],[[3],[4]]]
|
||||
[[[1],[2]],[[3],[4]]]
|
||||
[[[1],[2]],[[3],[4]]]
|
||||
[[0.1],[0.2]]
|
||||
[[0.1],[0.2]]
|
||||
[[0.1],[0.2]]
|
||||
[['abb','bbc'],['ccc','dddd']]
|
||||
[['abb','bbc'],['ccc','dddd']]
|
||||
[['abb','bbc'],['ccc','dddd']]
|
||||
array Array(Array(Array(Int8)))
|
||||
array Array(Array(Float64))
|
||||
array Array(Array(String))
|
110
tests/queries/0_stateless/02895_npy_output_format.sh
Executable file
110
tests/queries/0_stateless/02895_npy_output_format.sh
Executable file
@ -0,0 +1,110 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-parallel
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
user_files_path=$($CLICKHOUSE_CLIENT_BINARY -q "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
|
||||
mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/
|
||||
rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/*
|
||||
chmod 777 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "DROP DATABASE IF EXISTS npy_output_02895;"
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE DATABASE IF NOT EXISTS npy_output_02895;"
|
||||
|
||||
### test common type
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE TABLE IF NOT EXISTS npy_output_02895.common
|
||||
(
|
||||
i1 Int8,
|
||||
i2 Int16,
|
||||
i4 Int32,
|
||||
i8 Int64,
|
||||
u1 UInt8,
|
||||
u2 UInt16,
|
||||
u4 UInt32,
|
||||
u8 UInt64,
|
||||
f4 Float32,
|
||||
f8 Float64,
|
||||
fs FixedString(10),
|
||||
s String,
|
||||
unknow Int128
|
||||
) Engine = MergeTree ORDER BY i1;"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO npy_output_02895.common VALUES (-1,-1,-1,-1,1,1,1,1,0.1,0.01,'npy','npy',1), (-1,-1,-1,-1,1,1,1,1,0.1,0.01,'npy','npy',1), (-1,-1,-1,-1,1,1,1,1,0.1,0.01,'npy','npy',1);"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -n -q "SELECT * FROM npy_output_02895.common FORMAT Npy; -- { clientError TOO_MANY_COLUMNS }"
|
||||
${CLICKHOUSE_CLIENT} -n -q "SELECT unknow FROM npy_output_02895.common FORMAT Npy; -- { clientError BAD_ARGUMENTS }"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int8.npy') SELECT i1 FROM npy_output_02895.common;"
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int16.npy') SELECT i2 FROM npy_output_02895.common;"
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int32.npy') SELECT i4 FROM npy_output_02895.common;"
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int64.npy') SELECT i8 FROM npy_output_02895.common;"
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint8.npy') SELECT u1 FROM npy_output_02895.common;"
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint16.npy') SELECT u2 FROM npy_output_02895.common;"
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint32.npy') SELECT u4 FROM npy_output_02895.common;"
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint64.npy') SELECT u8 FROM npy_output_02895.common;"
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_float32.npy') SELECT f4 FROM npy_output_02895.common;"
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_float64.npy') SELECT f8 FROM npy_output_02895.common;"
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_fixedstring.npy') SELECT fs FROM npy_output_02895.common;"
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_string.npy') SELECT s FROM npy_output_02895.common;"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int8.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int16.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int32.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int64.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint8.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint16.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint32.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint64.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_float32.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_float64.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_fixedstring.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_string.npy');"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int8.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int16.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int32.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_int64.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint8.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint16.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint32.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_uint64.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_float32.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_float64.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_fixedstring.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_common_string.npy');"
|
||||
|
||||
### test nested type
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE TABLE IF NOT EXISTS npy_output_02895.nested
|
||||
(
|
||||
i4 Array(Array(Array(Int8))),
|
||||
f8 Array(Array(Float64)),
|
||||
s Array(Array(String)),
|
||||
unknow Array(Int128),
|
||||
ragged_1 Array(Array(Int32)),
|
||||
ragged_2 Array(Array(Int32))
|
||||
) Engine = MergeTree ORDER BY i4;"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO npy_output_02895.nested VALUES ([[[1], [2]], [[3], [4]]], [[0.1], [0.2]], [['a', 'bb'], ['ccc', 'dddd']], [1, 2], [[1, 2], [3, 4]], [[1, 2], [3]]), ([[[1], [2]], [[3], [4]]], [[0.1], [0.2]], [['a', 'bb'], ['ccc', 'dddd']], [1, 2], [[1, 2, 3], [4]], [[1, 2], [3]]), ([[[1], [2]], [[3], [4]]], [[0.1], [0.2]], [['a', 'bb'], ['ccc', 'dddd']], [1, 2], [[1], [2, 3, 4]], [[1, 2], [3]]);"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -n -q "SELECT * FROM npy_output_02895.nested FORMAT Npy; -- { clientError TOO_MANY_COLUMNS }"
|
||||
${CLICKHOUSE_CLIENT} -n -q "SELECT unknow FROM npy_output_02895.nested FORMAT Npy; -- { clientError BAD_ARGUMENTS }"
|
||||
${CLICKHOUSE_CLIENT} -n -q "SELECT ragged_1 FROM npy_output_02895.nested FORMAT Npy; -- { clientError ILLEGAL_COLUMN }"
|
||||
${CLICKHOUSE_CLIENT} -n -q "SELECT ragged_2 FROM npy_output_02895.nested FORMAT Npy; -- { clientError ILLEGAL_COLUMN }"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_int32.npy') SELECT i4 FROM npy_output_02895.nested;"
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_float64.npy') SELECT f8 FROM npy_output_02895.nested;"
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_string.npy') SELECT s FROM npy_output_02895.nested;"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_int32.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_float64.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_string.npy');"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_int32.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_float64.npy');"
|
||||
${CLICKHOUSE_CLIENT} -q "DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_string.npy');"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "DROP DATABASE IF EXISTS npy_output_02895;"
|
||||
|
||||
rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}
|
Loading…
Reference in New Issue
Block a user