mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-03 21:12:28 +00:00
draft
This commit is contained in:
parent
e1025300d0
commit
03e0e9a14b
@ -296,6 +296,40 @@ void ParquetPlainValuesReader<ColumnString>::readBatch(
|
||||
);
|
||||
}
|
||||
|
||||
template <>
|
||||
void ParquetBitPlainReader<ColumnUInt8>::readBatch(
|
||||
MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values)
|
||||
{
|
||||
auto & column = *assert_cast<ColumnUInt8 *>(col_ptr.get());
|
||||
auto cursor = column.size();
|
||||
|
||||
auto & container = column.getData();
|
||||
|
||||
container.resize(cursor + num_values);
|
||||
|
||||
def_level_reader->visitNullableValues(
|
||||
cursor,
|
||||
num_values,
|
||||
max_def_level,
|
||||
null_map,
|
||||
/* individual_visitor */ [&](size_t nest_cursor)
|
||||
{
|
||||
uint8_t byte;
|
||||
bit_reader->GetValue(1, &byte);
|
||||
container[nest_cursor] = byte;
|
||||
},
|
||||
/* repeated_visitor */ [&](size_t nest_cursor, UInt32 count)
|
||||
{
|
||||
for (UInt32 i = 0; i < count; i++)
|
||||
{
|
||||
uint8_t byte;
|
||||
bit_reader->GetValue(1, &byte);
|
||||
container[nest_cursor++] = byte;
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
void ParquetPlainValuesReader<ColumnDecimal<DateTime64>, ParquetReaderTypes::TimestampInt96>::readBatch(
|
||||
@ -515,6 +549,13 @@ void ParquetRleDictReader<ColumnString>::readBatch(
|
||||
);
|
||||
}
|
||||
|
||||
template <>
|
||||
void ParquetRleDictReader<ColumnUInt8>::readBatch(
|
||||
MutableColumnPtr & , LazyNullMap &, UInt32)
|
||||
{
|
||||
assert(false);
|
||||
}
|
||||
|
||||
template <typename TColumnVector>
|
||||
void ParquetRleDictReader<TColumnVector>::readBatch(
|
||||
MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values)
|
||||
@ -561,6 +602,7 @@ template class ParquetPlainValuesReader<ColumnDecimal<Decimal32>>;
|
||||
template class ParquetPlainValuesReader<ColumnDecimal<Decimal64>>;
|
||||
template class ParquetPlainValuesReader<ColumnDecimal<DateTime64>>;
|
||||
template class ParquetPlainValuesReader<ColumnString>;
|
||||
template class ParquetPlainValuesReader<ColumnUInt8>;
|
||||
|
||||
template class ParquetFixedLenPlainReader<ColumnDecimal<Decimal128>>;
|
||||
template class ParquetFixedLenPlainReader<ColumnDecimal<Decimal256>>;
|
||||
@ -569,6 +611,7 @@ template class ParquetRleLCReader<ColumnUInt8>;
|
||||
template class ParquetRleLCReader<ColumnUInt16>;
|
||||
template class ParquetRleLCReader<ColumnUInt32>;
|
||||
|
||||
template class ParquetRleDictReader<ColumnUInt8>;
|
||||
template class ParquetRleDictReader<ColumnInt32>;
|
||||
template class ParquetRleDictReader<ColumnUInt32>;
|
||||
template class ParquetRleDictReader<ColumnInt64>;
|
||||
|
@ -172,6 +172,27 @@ private:
|
||||
ParquetDataBuffer plain_data_buffer;
|
||||
};
|
||||
|
||||
template <typename TColumn>
|
||||
class ParquetBitPlainReader : public ParquetDataValuesReader
|
||||
{
|
||||
public:
|
||||
ParquetBitPlainReader(
|
||||
Int32 max_def_level_,
|
||||
std::unique_ptr<RleValuesReader> def_level_reader_,
|
||||
std::unique_ptr<arrow::bit_util::BitReader> bit_reader_)
|
||||
: max_def_level(max_def_level_)
|
||||
, def_level_reader(std::move(def_level_reader_))
|
||||
, bit_reader(std::move(bit_reader_))
|
||||
{}
|
||||
|
||||
void readBatch(MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) override;
|
||||
|
||||
private:
|
||||
Int32 max_def_level;
|
||||
std::unique_ptr<RleValuesReader> def_level_reader;
|
||||
std::unique_ptr<arrow::bit_util::BitReader> bit_reader;
|
||||
};
|
||||
|
||||
/**
|
||||
* The data and definition level encoding are same as ParquetPlainValuesReader.
|
||||
* But the element size is const and bigger than primitive data type.
|
||||
|
@ -463,6 +463,28 @@ void ParquetLeafColReader<TColumn>::initDataReader(
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void ParquetLeafColReader<ColumnUInt8>::initDataReader(
|
||||
parquet::Encoding::type enconding_type,
|
||||
const uint8_t * buffer,
|
||||
std::size_t max_size,
|
||||
std::unique_ptr<RleValuesReader> && def_level_reader)
|
||||
{
|
||||
switch (enconding_type)
|
||||
{
|
||||
case parquet::Encoding::PLAIN:
|
||||
{
|
||||
auto bit_reader = std::make_unique<arrow::bit_util::BitReader>(buffer, max_size);
|
||||
data_values_reader = std::make_unique<ParquetBitPlainReader<ColumnUInt8>>(col_descriptor.max_definition_level(),
|
||||
std::move(def_level_reader),
|
||||
std::move(bit_reader));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Unknown encoding type: {}", enconding_type);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TColumn>
|
||||
void ParquetLeafColReader<TColumn>::readPageV1(const parquet::DataPageV1 & page)
|
||||
{
|
||||
@ -620,6 +642,7 @@ std::unique_ptr<ParquetDataValuesReader> ParquetLeafColReader<TColumn>::createDi
|
||||
}
|
||||
|
||||
|
||||
template class ParquetLeafColReader<ColumnUInt8>;
|
||||
template class ParquetLeafColReader<ColumnInt32>;
|
||||
template class ParquetLeafColReader<ColumnUInt32>;
|
||||
template class ParquetLeafColReader<ColumnInt64>;
|
||||
|
@ -263,7 +263,7 @@ std::unique_ptr<ParquetColumnReader> ColReaderFactory::makeReader()
|
||||
switch (col_descriptor.physical_type())
|
||||
{
|
||||
case parquet::Type::BOOLEAN:
|
||||
break;
|
||||
return makeLeafReader<DataTypeUInt8>();
|
||||
case parquet::Type::INT32:
|
||||
return fromInt32();
|
||||
case parquet::Type::INT64:
|
||||
|
Loading…
Reference in New Issue
Block a user