mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-03 13:02:00 +00:00
Return include_indices, fix arrays and add more tests
This commit is contained in:
parent
46a7cc5f1d
commit
484cac6193
@ -42,7 +42,7 @@ Chunk ORCBlockInputFormat::generate()
|
||||
return res;
|
||||
|
||||
std::shared_ptr<arrow::RecordBatch> batch_result;
|
||||
arrow::Status batch_status = file_reader->ReadStripe(stripe_current, &batch_result);
|
||||
arrow::Status batch_status = file_reader->ReadStripe(stripe_current, include_indices, &batch_result);
|
||||
if (!batch_status.ok())
|
||||
throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA,
|
||||
"Error while reading batch of ORC data: {}", batch_status.ToString());
|
||||
@ -63,14 +63,39 @@ void ORCBlockInputFormat::resetParser()
|
||||
IInputFormat::resetParser();
|
||||
|
||||
file_reader.reset();
|
||||
include_indices.clear();
|
||||
stripe_current = 0;
|
||||
}
|
||||
|
||||
size_t countIndicesForType(std::shared_ptr<arrow::DataType> type)
|
||||
{
|
||||
if (type->id() == arrow::Type::LIST)
|
||||
return countIndicesForType(static_cast<arrow::ListType *>(type.get())->value_type()) + 1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void ORCBlockInputFormat::prepareReader()
|
||||
{
|
||||
THROW_ARROW_NOT_OK(arrow::adapters::orc::ORCFileReader::Open(asArrowFile(in), arrow::default_memory_pool(), &file_reader));
|
||||
stripe_total = file_reader->NumberOfStripes();
|
||||
stripe_current = 0;
|
||||
|
||||
std::shared_ptr<arrow::Schema> schema;
|
||||
THROW_ARROW_NOT_OK(file_reader->ReadSchema(&schema));
|
||||
|
||||
int index = 0;
|
||||
for (int i = 0; i < schema->num_fields(); ++i)
|
||||
{
|
||||
if (getPort().getHeader().has(schema->field(i)->name()))
|
||||
{
|
||||
/// LIST type require 2 indices, so we should recursively
|
||||
/// count the number of indices we need for this type.
|
||||
int indexes_count = countIndicesForType(schema->field(i)->type());
|
||||
for (int j = 0; j != indexes_count; ++j)
|
||||
include_indices.push_back(index++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void registerInputFormatProcessorORC(FormatFactory &factory)
|
||||
|
@ -30,6 +30,9 @@ private:
|
||||
|
||||
int stripe_current = 0;
|
||||
|
||||
// indices of columns to read from ORC file
|
||||
std::vector<int> include_indices;
|
||||
|
||||
void prepareReader();
|
||||
};
|
||||
|
||||
|
@ -0,0 +1,4 @@
|
||||
[1,-2,3] [1,2,3] [100,-200,300] [100,200,300] [10000000,-20000000,30000000] [10000000,2000000,3000000] [100000000000000,-200000000000,3000000000000] [100000000000000,20000000000000,3000000000000] ['Some string','Some string','Some string'] ['0000','1111','2222'] [42.42,424.2,0.4242] [424242.424242,4242042420.242424,42] ['2000-01-01','2001-01-01','2002-01-01'] ['2000-01-01 00:00:00','2001-01-01 00:00:00','2002-01-01 00:00:00']
|
||||
[] [] [] [] [] [] [] [] [] [] [] [] [] []
|
||||
[1,-2,3] [1,2,3] [100,-200,300] [100,200,300] [10000000,-20000000,30000000] [10000000,2000000,3000000] [100000000000000,-200000000000,3000000000000] [100000000000000,20000000000000,3000000000000] ['Some string','Some string','Some string'] ['0000','1111','2222'] [42.42,424.2,0.4242] [424242.424242,4242042420.242424,42] ['2000-01-01','2001-01-01','2002-01-01'] ['2000-01-01 00:00:00','2001-01-01 00:00:00','2002-01-01 00:00:00']
|
||||
[] [] [] [] [] [] [] [] [] [] [] [] [] []
|
17
tests/queries/0_stateless/01273_arrow_arrays_load.sh
Executable file
17
tests/queries/0_stateless/01273_arrow_arrays_load.sh
Executable file
@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS orc_arrays"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_arrays (arr1 Array(Int8), arr2 Array(UInt8), arr3 Array(Int16), arr4 Array(UInt16), arr5 Array(Int32), arr6 Array(UInt32), arr7 Array(Int64), arr8 Array(UInt64), arr9 Array(String), arr10 Array(FixedString(4)), arr11 Array(Float32), arr12 Array(Float64), arr13 Array(Date), arr14 Array(Datetime)) ENGINE=Memory()"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_arrays VALUES ([1,-2,3],[1,2,3],[100,-200,300],[100,200,300],[10000000,-20000000,30000000],[10000000,2000000,3000000],[100000000000000,-200000000000,3000000000000],[100000000000000,20000000000000,3000000000000],['Some string','Some string','Some string'],['0000','1111','2222'],[42.42,424.2,0.4242],[424242.424242,4242042420.242424,42],['2000-01-01','2001-01-01','2002-01-01'],['2000-01-01 00:00:00','2001-01-01 00:00:00','2002-01-01 00:00:00']),([],[],[],[],[],[],[],[],[],[],[],[],[],[])"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_arrays FORMAT Arrow" > "${CLICKHOUSE_TMP}"/arrays.arrow
|
||||
|
||||
cat "${CLICKHOUSE_TMP}"/arrays.arrow | ${CLICKHOUSE_CLIENT} -q "INSERT INTO arrow_arrays FORMAT Arrow"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_arrays"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE arrow_arrays"
|
@ -0,0 +1,2 @@
|
||||
[[[1,2,3],[1,2,3]],[[1,2,3]],[[],[1,2,3]]] [[['Some string','Some string'],[]],[['Some string']],[[]]] [[NULL,1,2],[NULL],[1,2],[]] [['Some string',NULL,'Some string'],[NULL],[]]
|
||||
[[[1,2,3],[1,2,3]],[[1,2,3]],[[],[1,2,3]]] [[['Some string','Some string'],[]],[['Some string']],[[]]] [[NULL,1,2],[NULL],[1,2],[]] [['Some string',NULL,'Some string'],[NULL],[]]
|
17
tests/queries/0_stateless/01273_arrow_nested_arrays_load.sh
Executable file
17
tests/queries/0_stateless/01273_arrow_nested_arrays_load.sh
Executable file
@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_nested_arrays"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_nested_arrays (arr1 Array(Array(Array(UInt32))), arr2 Array(Array(Array(String))), arr3 Array(Array(Nullable(UInt32))), arr4 Array(Array(Nullable(String)))) engine=Memory()"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_nested_arrays VALUES ([[[1,2,3],[1,2,3]],[[1,2,3]],[[],[1,2,3]]],[[['Some string','Some string'],[]],[['Some string']],[[]]],[[NULL,1,2],[NULL],[1,2],[]],[['Some string',NULL,'Some string'],[NULL],[]])"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_nested_arrays FORMAT Arrow" > "${CLICKHOUSE_TMP}"/nested_arrays.arrow
|
||||
|
||||
cat "${CLICKHOUSE_TMP}"/nested_arrays.arrow | ${CLICKHOUSE_CLIENT} -q "INSERT INTO arrow_nested_arrays FORMAT Arrow"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_nested_arrays"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP table arrow_nested_arrays"
|
@ -0,0 +1,6 @@
|
||||
[1,NULL,2] [NULL,'Some string',NULL] [0,NULL,42.42]
|
||||
[NULL] [NULL] [NULL]
|
||||
[] [] []
|
||||
[1,NULL,2] [NULL,'Some string',NULL] [0,NULL,42.42]
|
||||
[NULL] [NULL] [NULL]
|
||||
[] [] []
|
15
tests/queries/0_stateless/01273_arrow_nullable_arrays_load.sh
Executable file
15
tests/queries/0_stateless/01273_arrow_nullable_arrays_load.sh
Executable file
@ -0,0 +1,15 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_nullable_arrays"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_nullable_arrays (arr1 Array(Nullable(UInt32)), arr2 Array(Nullable(String)), arr3 Array(Nullable(Float32))) ENGINE=Memory()"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_nullable_arrays VALUES ([1,NULL,2],[NULL,'Some string',NULL],[0.00,NULL,42.42]), ([NULL],[NULL],[NULL]), ([],[],[])"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_nullable_arrays FORMAT Arrow" > "${CLICKHOUSE_TMP}"/nullable_arrays.arrow
|
||||
|
||||
cat "${CLICKHOUSE_TMP}"/nullable_arrays.arrow | ${CLICKHOUSE_CLIENT} -q "INSERT INTO arrow_nullable_arrays FORMAT Arrow"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_nullable_arrays"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE arrow_nullable_arrays"
|
Loading…
Reference in New Issue
Block a user