mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
added docs and tests, style check
This commit is contained in:
parent
4e09fb3e27
commit
2ab1ae42c1
@ -74,6 +74,7 @@ The supported formats are:
|
||||
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
|
||||
| [ORC](#data-format-orc) | ✔ | ✔ |
|
||||
| [One](#data-format-one) | ✔ | ✗ |
|
||||
| [Npy](#data-format-npy) | ✔ | ✗ |
|
||||
| [RowBinary](#rowbinary) | ✔ | ✔ |
|
||||
| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
@ -2445,6 +2446,26 @@ Result:
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
## Npy {#data-format-npy}
|
||||
|
||||
This function is designed to load a NumPy array from a .npy file into ClickHouse. The NumPy file format is a binary format used for efficiently storing arrays of numerical data. It stores all the top-level dimension objects as a separate column.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
```sql
|
||||
SELECT *
|
||||
FROM file('example_array.npy', Npy)
|
||||
```
|
||||
|
||||
Result:
|
||||
```
|
||||
┌─array─────────┐
|
||||
│ [[1],[2],[3]] │
|
||||
│ [[4],[5],[6]] │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
## LineAsString {#lineasstring}
|
||||
|
||||
In this format, every line of input data is interpreted as a single string value. This format can only be parsed for table with a single field of type [String](/docs/en/sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](/docs/en/sql-reference/statements/create/table.md/#default) or [MATERIALIZED](/docs/en/sql-reference/statements/create/table.md/#materialized), or omitted.
|
||||
|
@ -37,9 +37,8 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INCORRECT_DATA;
|
||||
extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
|
||||
extern const int CANNOT_READ_ALL_DATA;
|
||||
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
@ -119,16 +118,21 @@ std::vector<int> parseShape(String shapeString)
|
||||
shapeString.erase(std::remove(shapeString.begin(), shapeString.end(), ')'), shapeString.end());
|
||||
|
||||
// Use a string stream to extract integers
|
||||
std::istringstream ss(shapeString);
|
||||
int value;
|
||||
char comma; // to handle commas between values
|
||||
String value;
|
||||
|
||||
std::vector<int> shape;
|
||||
|
||||
while (ss >> value) {
|
||||
shape.push_back(value);
|
||||
ss >> comma; // read the comma
|
||||
size_t start = 0, end = 0;
|
||||
|
||||
while ((end = shapeString.find(',', start)) != std::string::npos)
|
||||
{
|
||||
shape.push_back(std::stoi(shapeString.substr(start, end - start)));
|
||||
start = end + 1;
|
||||
}
|
||||
|
||||
// Add the last token (or the only token if no delimiter is found)
|
||||
if (start != shapeString.length())
|
||||
shape.push_back(std::stoi(shapeString.substr(start)));
|
||||
return shape;
|
||||
}
|
||||
|
||||
|
28
tests/queries/0_stateless/02895_npy_format.reference
Normal file
28
tests/queries/0_stateless/02895_npy_format.reference
Normal file
@ -0,0 +1,28 @@
|
||||
1
|
||||
2
|
||||
3
|
||||
1.1
|
||||
2.2
|
||||
3.3
|
||||
1
|
||||
a
|
||||
c
|
||||
1
|
||||
a
|
||||
c
|
||||
[1,2,3]
|
||||
[4,5,6]
|
||||
[1.1,2.22,3.33]
|
||||
[4.4,5.5,6.6]
|
||||
['a','b','c']
|
||||
['e','f','g']
|
||||
['a','b','c']
|
||||
['e','f','g']
|
||||
[1]
|
||||
[0]
|
||||
[1]
|
||||
[0]
|
||||
[0,0,0]
|
||||
[0,0,0]
|
||||
[[1,2],[3,4]]
|
||||
[[5,6],[7,8]]
|
18
tests/queries/0_stateless/02895_npy_format.sh
Executable file
18
tests/queries/0_stateless/02895_npy_format.sh
Executable file
@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim.npy')"
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy')"
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_str.npy')"
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_unicode.npy')"
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/two_dim.npy')"
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/two_dim_float.npy')"
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/two_dim_str.npy')"
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/two_dim_unicode.npy')"
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/two_dim_bool.npy')"
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/two_dim_null.npy')"
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/three_dim.npy')"
|
BIN
tests/queries/0_stateless/data_npy/one_dim.npy
Normal file
BIN
tests/queries/0_stateless/data_npy/one_dim.npy
Normal file
Binary file not shown.
BIN
tests/queries/0_stateless/data_npy/one_dim_float.npy
Normal file
BIN
tests/queries/0_stateless/data_npy/one_dim_float.npy
Normal file
Binary file not shown.
BIN
tests/queries/0_stateless/data_npy/one_dim_str.npy
Normal file
BIN
tests/queries/0_stateless/data_npy/one_dim_str.npy
Normal file
Binary file not shown.
BIN
tests/queries/0_stateless/data_npy/one_dim_unicode.npy
Normal file
BIN
tests/queries/0_stateless/data_npy/one_dim_unicode.npy
Normal file
Binary file not shown.
BIN
tests/queries/0_stateless/data_npy/three_dim.npy
Normal file
BIN
tests/queries/0_stateless/data_npy/three_dim.npy
Normal file
Binary file not shown.
BIN
tests/queries/0_stateless/data_npy/two_dim.npy
Normal file
BIN
tests/queries/0_stateless/data_npy/two_dim.npy
Normal file
Binary file not shown.
BIN
tests/queries/0_stateless/data_npy/two_dim_bool.npy
Normal file
BIN
tests/queries/0_stateless/data_npy/two_dim_bool.npy
Normal file
Binary file not shown.
BIN
tests/queries/0_stateless/data_npy/two_dim_float.npy
Normal file
BIN
tests/queries/0_stateless/data_npy/two_dim_float.npy
Normal file
Binary file not shown.
BIN
tests/queries/0_stateless/data_npy/two_dim_null.npy
Normal file
BIN
tests/queries/0_stateless/data_npy/two_dim_null.npy
Normal file
Binary file not shown.
BIN
tests/queries/0_stateless/data_npy/two_dim_str.npy
Normal file
BIN
tests/queries/0_stateless/data_npy/two_dim_str.npy
Normal file
Binary file not shown.
BIN
tests/queries/0_stateless/data_npy/two_dim_unicode.npy
Normal file
BIN
tests/queries/0_stateless/data_npy/two_dim_unicode.npy
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user