mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Allow to input and output LowCardinality columns in ORC format
This commit is contained in:
parent
8d1bf1b675
commit
2e9858172e
@ -12,6 +12,7 @@
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnMap.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeDateTime64.h>
|
||||
@ -20,6 +21,7 @@
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -48,8 +50,10 @@ void ORCOutputStream::write(const void* buf, size_t length)
|
||||
}
|
||||
|
||||
ORCBlockOutputFormat::ORCBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_)
|
||||
: IOutputFormat(header_, out_), format_settings{format_settings_}, output_stream(out_), data_types(header_.getDataTypes())
|
||||
: IOutputFormat(header_, out_), format_settings{format_settings_}, output_stream(out_)
|
||||
{
|
||||
for (const auto & type : header_.getDataTypes())
|
||||
data_types.push_back(recursiveRemoveLowCardinality(type));
|
||||
}
|
||||
|
||||
ORC_UNIQUE_PTR<orc::Type> ORCBlockOutputFormat::getORCType(const DataTypePtr & type, const std::string & column_name)
|
||||
@ -482,10 +486,12 @@ void ORCBlockOutputFormat::consume(Chunk chunk)
|
||||
/// The size of the batch must be no less than total amount of array elements.
|
||||
ORC_UNIQUE_PTR<orc::ColumnVectorBatch> batch = writer->createRowBatch(getMaxColumnSize(chunk));
|
||||
orc::StructVectorBatch & root = dynamic_cast<orc::StructVectorBatch &>(*batch);
|
||||
auto columns = chunk.getColumns();
|
||||
for (auto & column : columns)
|
||||
column = recursiveRemoveLowCardinality(column);
|
||||
|
||||
for (size_t i = 0; i != columns_num; ++i)
|
||||
{
|
||||
writeColumn(*root.fields[i], *chunk.getColumns()[i], data_types[i], nullptr);
|
||||
}
|
||||
writeColumn(*root.fields[i], *columns[i], data_types[i], nullptr);
|
||||
root.numElements = rows_num;
|
||||
writer->add(*batch);
|
||||
}
|
||||
@ -505,7 +511,7 @@ void ORCBlockOutputFormat::prepareWriter()
|
||||
options.setCompression(orc::CompressionKind::CompressionKind_NONE);
|
||||
size_t columns_count = header.columns();
|
||||
for (size_t i = 0; i != columns_count; ++i)
|
||||
schema->addStructField(header.safeGetByPosition(i).name, getORCType(data_types[i], header.safeGetByPosition(i).name));
|
||||
schema->addStructField(header.safeGetByPosition(i).name, getORCType(recursiveRemoveLowCardinality(data_types[i]), header.safeGetByPosition(i).name));
|
||||
writer = orc::createWriter(*schema, &output_stream, options);
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,10 @@
|
||||
0 ['0'] ('0')
|
||||
1 ['1'] ('1')
|
||||
2 ['2'] ('2')
|
||||
3 ['3'] ('3')
|
||||
4 ['4'] ('4')
|
||||
5 ['5'] ('5')
|
||||
6 ['6'] ('6')
|
||||
7 ['7'] ('7')
|
||||
8 ['8'] ('8')
|
||||
9 ['9'] ('9')
|
16
tests/queries/0_stateless/02029_orc_low_cardinality.sh
Executable file
16
tests/queries/0_stateless/02029_orc_low_cardinality.sh
Executable file
@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-unbundled, no-fasttest
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS orc_lc";
|
||||
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="CREATE TABLE orc_lc (lc LowCardinality(String), array_lc Array(LowCardinality(String)), tuple_lc Tuple(LowCardinality(String))) ENGINE = Memory()";
|
||||
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT [lc] as array_lc, tuple(lc) as tuple_lc, toLowCardinality(toString(number)) as lc from numbers(10) FORMAT ORC" | $CLICKHOUSE_CLIENT --query="INSERT INTO orc_lc FORMAT ORC";
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM orc_lc";
|
Loading…
Reference in New Issue
Block a user