added docs and tests, style check

This commit is contained in:
yariks5s 2023-10-25 10:37:05 +00:00
parent 4e09fb3e27
commit 2ab1ae42c1
15 changed files with 80 additions and 9 deletions

View File

@ -74,6 +74,7 @@ The supported formats are:
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
| [ORC](#data-format-orc) | ✔ | ✔ |
| [One](#data-format-one) | ✔ | ✗ |
| [Npy](#data-format-npy) | ✔ | ✗ |
| [RowBinary](#rowbinary) | ✔ | ✔ |
| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
@ -2445,6 +2446,26 @@ Result:
└──────────────┘
```
## Npy {#data-format-npy}
This function is designed to load a NumPy array from a .npy file into ClickHouse. The NumPy file format is a binary format used for efficiently storing arrays of numerical data. It stores all the top-level dimension objects as a separate column.
**Example**
Query:
```sql
SELECT *
FROM file('example_array.npy', Npy)
```
Result:
```
┌─array─────────┐
│ [[1],[2],[3]] │
│ [[4],[5],[6]] │
└───────────────┘
```
## LineAsString {#lineasstring}
In this format, every line of input data is interpreted as a single string value. This format can only be parsed for table with a single field of type [String](/docs/en/sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](/docs/en/sql-reference/statements/create/table.md/#default) or [MATERIALIZED](/docs/en/sql-reference/statements/create/table.md/#materialized), or omitted.

View File

@ -37,9 +37,8 @@ namespace DB
namespace ErrorCodes
{
extern const int INCORRECT_DATA;
extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
extern const int CANNOT_READ_ALL_DATA;
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
}
@ -119,16 +118,21 @@ std::vector<int> parseShape(String shapeString)
shapeString.erase(std::remove(shapeString.begin(), shapeString.end(), ')'), shapeString.end());
// Use a string stream to extract integers
std::istringstream ss(shapeString);
int value;
char comma; // to handle commas between values
String value;
std::vector<int> shape;
while (ss >> value) {
shape.push_back(value);
ss >> comma; // read the comma
size_t start = 0, end = 0;
while ((end = shapeString.find(',', start)) != std::string::npos)
{
shape.push_back(std::stoi(shapeString.substr(start, end - start)));
start = end + 1;
}
// Add the last token (or the only token if no delimiter is found)
if (start != shapeString.length())
shape.push_back(std::stoi(shapeString.substr(start)));
return shape;
}

View File

@ -0,0 +1,28 @@
1
2
3
1.1
2.2
3.3
1
a
c
1
a
c
[1,2,3]
[4,5,6]
[1.1,2.22,3.33]
[4.4,5.5,6.6]
['a','b','c']
['e','f','g']
['a','b','c']
['e','f','g']
[1]
[0]
[1]
[0]
[0,0,0]
[0,0,0]
[[1,2],[3,4]]
[[5,6],[7,8]]

View File

@ -0,0 +1,18 @@
#!/usr/bin/env bash
# Tags: no-fasttest
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim.npy')"
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy')"
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_str.npy')"
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_unicode.npy')"
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/two_dim.npy')"
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/two_dim_float.npy')"
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/two_dim_str.npy')"
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/two_dim_unicode.npy')"
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/two_dim_bool.npy')"
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/two_dim_null.npy')"
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/three_dim.npy')"

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.