Implemented ColumnArray::replicate for Nullable, Tuple and for generic case [#CLICKHOUSE-4].

This commit is contained in:
Alexey Milovidov 2017-04-18 06:03:39 +03:00
parent b51c6e7592
commit 22e8b8029b
10 changed files with 349 additions and 148 deletions

View File

@ -3,6 +3,8 @@
#include <Columns/ColumnArray.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnsCommon.h>
#include <Common/Exception.h>
@ -385,6 +387,8 @@ ColumnPtr ColumnArray::filter(const Filter & filt, ssize_t result_size_hint) con
if (typeid_cast<const ColumnFloat32 *>(data.get())) return filterNumber<Float32>(filt, result_size_hint);
if (typeid_cast<const ColumnFloat64 *>(data.get())) return filterNumber<Float64>(filt, result_size_hint);
if (typeid_cast<const ColumnString *>(data.get())) return filterString(filt, result_size_hint);
if (typeid_cast<const ColumnTuple *>(data.get())) return filterTuple(filt, result_size_hint);
if (typeid_cast<const ColumnNullable *>(data.get())) return filterNullable(filt, result_size_hint);
return filterGeneric(filt, result_size_hint);
}
@ -516,6 +520,56 @@ ColumnPtr ColumnArray::filterGeneric(const Filter & filt, ssize_t result_size_hi
return res;
}
ColumnPtr ColumnArray::filterNullable(const Filter & filt, ssize_t result_size_hint) const
{
if (getOffsets().size() == 0)
return std::make_shared<ColumnArray>(data);
const ColumnNullable & nullable_elems = static_cast<const ColumnNullable &>(*data);
auto array_of_nested = std::make_shared<ColumnArray>(nullable_elems.getNestedColumn(), offsets);
auto filtered_array_of_nested_owner = array_of_nested->filter(filt, result_size_hint);
auto & filtered_array_of_nested = static_cast<ColumnArray &>(*filtered_array_of_nested_owner);
auto & filtered_offsets = filtered_array_of_nested.getOffsetsColumn();
auto res_null_map = std::make_shared<ColumnUInt8>();
auto res = std::make_shared<ColumnArray>(
std::make_shared<ColumnNullable>(
filtered_array_of_nested.getDataPtr(),
res_null_map),
filtered_offsets);
filterArraysImplOnlyData(nullable_elems.getNullMap(), getOffsets(), res_null_map->getData(), filt, result_size_hint);
return res;
}
ColumnPtr ColumnArray::filterTuple(const Filter & filt, ssize_t result_size_hint) const
{
if (getOffsets().size() == 0)
return std::make_shared<ColumnArray>(data);
const ColumnTuple & tuple = static_cast<const ColumnTuple &>(*data);
/// Make temporary arrays for each components of Tuple, then filter and collect back.
size_t tuple_size = tuple.getColumns().size();
if (tuple_size == 0)
throw Exception("Logical error: empty tuple", ErrorCodes::LOGICAL_ERROR);
Columns temporary_arrays(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
temporary_arrays[i] = ColumnArray(tuple.getColumns()[i], getOffsetsColumn()).filter(filt, result_size_hint);
Block tuple_block = tuple.getData().cloneEmpty();
for (size_t i = 0; i < tuple_size; ++i)
tuple_block.getByPosition(i).column = static_cast<ColumnArray &>(*temporary_arrays[i]).getDataPtr();
return std::make_shared<ColumnArray>(
std::make_shared<ColumnTuple>(tuple_block),
static_cast<ColumnArray &>(*temporary_arrays.front()).getOffsetsColumn());
}
ColumnPtr ColumnArray::permute(const Permutation & perm, size_t limit) const
{
@ -584,8 +638,6 @@ void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_h
ColumnPtr ColumnArray::replicate(const Offsets_t & replicate_offsets) const
{
/// It does not work out in general case.
if (typeid_cast<const ColumnUInt8 *>(data.get())) return replicateNumber<UInt8>(replicate_offsets);
if (typeid_cast<const ColumnUInt16 *>(data.get())) return replicateNumber<UInt16>(replicate_offsets);
if (typeid_cast<const ColumnUInt32 *>(data.get())) return replicateNumber<UInt32>(replicate_offsets);
@ -598,8 +650,9 @@ ColumnPtr ColumnArray::replicate(const Offsets_t & replicate_offsets) const
if (typeid_cast<const ColumnFloat64 *>(data.get())) return replicateNumber<Float64>(replicate_offsets);
if (typeid_cast<const ColumnString *>(data.get())) return replicateString(replicate_offsets);
if (dynamic_cast<const IColumnConst *>(data.get())) return replicateConst(replicate_offsets);
throw Exception("Replication of column " + getName() + " is not implemented.", ErrorCodes::NOT_IMPLEMENTED);
if (typeid_cast<const ColumnNullable *>(data.get())) return replicateNullable(replicate_offsets);
if (typeid_cast<const ColumnTuple *>(data.get())) return replicateTuple(replicate_offsets);
return replicateGeneric(replicate_offsets);
}
@ -765,4 +818,74 @@ ColumnPtr ColumnArray::replicateConst(const Offsets_t & replicate_offsets) const
}
ColumnPtr ColumnArray::replicateGeneric(const Offsets_t & replicate_offsets) const
{
size_t col_size = size();
if (col_size != replicate_offsets.size())
throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
ColumnPtr res = cloneEmpty();
ColumnArray & res_concrete = static_cast<ColumnArray &>(*res);
if (0 == col_size)
return res;
IColumn::Offset_t prev_offset = 0;
const auto & offsets_data = getOffsets();
for (size_t i = 0; i < col_size; ++i)
{
size_t size_to_replicate = offsets_data[i] - prev_offset;
prev_offset = offsets_data[i];
for (size_t j = 0; j < size_to_replicate; ++j)
res_concrete.insertFrom(*this, i);
}
return res;
}
ColumnPtr ColumnArray::replicateNullable(const Offsets_t & replicate_offsets) const
{
const ColumnNullable & nullable = static_cast<const ColumnNullable &>(*data);
/// Make temporary arrays for each components of Nullable. Then replicate them independently and collect back to result.
/// NOTE Offsets are calculated twice and it is redundant.
auto array_of_nested = ColumnArray(nullable.getNestedColumn(), getOffsetsColumn()).replicate(replicate_offsets);
auto array_of_null_map = ColumnArray(nullable.getNullMapColumn(), getOffsetsColumn()).replicate(replicate_offsets);
return std::make_shared<ColumnArray>(
std::make_shared<ColumnNullable>(
static_cast<ColumnArray &>(*array_of_nested).getDataPtr(),
static_cast<ColumnArray &>(*array_of_null_map).getDataPtr()),
static_cast<ColumnArray &>(*array_of_nested).getOffsetsColumn());
}
ColumnPtr ColumnArray::replicateTuple(const Offsets_t & replicate_offsets) const
{
const ColumnTuple & tuple = static_cast<const ColumnTuple &>(*data);
/// Make temporary arrays for each components of Tuple. In the same way as for Nullable.
size_t tuple_size = tuple.getColumns().size();
if (tuple_size == 0)
throw Exception("Logical error: empty tuple", ErrorCodes::LOGICAL_ERROR);
Columns temporary_arrays(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
temporary_arrays[i] = ColumnArray(tuple.getColumns()[i], getOffsetsColumn()).replicate(replicate_offsets);
Block tuple_block = tuple.getData().cloneEmpty();
for (size_t i = 0; i < tuple_size; ++i)
tuple_block.getByPosition(i).column = static_cast<ColumnArray &>(*temporary_arrays[i]).getDataPtr();
return std::make_shared<ColumnArray>(
std::make_shared<ColumnTuple>(tuple_block),
static_cast<ColumnArray &>(*temporary_arrays.front()).getOffsetsColumn());
}
}

View File

@ -102,12 +102,20 @@ private:
*/
ColumnPtr replicateConst(const Offsets_t & replicate_offsets) const;
/** The following is done by simply replicating of nested columns.
*/
ColumnPtr replicateTuple(const Offsets_t & replicate_offsets) const;
ColumnPtr replicateNullable(const Offsets_t & replicate_offsets) const;
ColumnPtr replicateGeneric(const Offsets_t & replicate_offsets) const;
/// Specializations for the filter function.
template <typename T>
ColumnPtr filterNumber(const Filter & filt, ssize_t result_size_hint) const;
ColumnPtr filterString(const Filter & filt, ssize_t result_size_hint) const;
ColumnPtr filterTuple(const Filter & filt, ssize_t result_size_hint) const;
ColumnPtr filterNullable(const Filter & filt, ssize_t result_size_hint) const;
ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint) const;
};

View File

@ -303,7 +303,7 @@ void ColumnVector<T>::getExtremes(Field & min, Field & max) const
}
/// Explicit template instantinations - to avoid code bloat in headers.
/// Explicit template instantiations - to avoid code bloat in headers.
template class ColumnVector<UInt8>;
template class ColumnVector<UInt16>;
template class ColumnVector<UInt32>;

View File

@ -53,19 +53,90 @@ namespace ErrorCodes
}
template <typename T>
void filterArraysImpl(
namespace
{
/// Implementation details of filterArraysImpl function, used as template parameter.
/// Allow to build or not to build offsets array.
struct ResultOffsetsBuilder
{
IColumn::Offsets_t & res_offsets;
IColumn::Offset_t current_src_offset = 0;
ResultOffsetsBuilder(IColumn::Offsets_t * res_offsets_) : res_offsets(*res_offsets_) {}
void reserve(size_t result_size_hint, size_t src_size)
{
res_offsets.reserve(result_size_hint > 0 ? result_size_hint : src_size);
}
void insertOne(size_t array_size)
{
current_src_offset += array_size;
res_offsets.push_back(current_src_offset);
}
template <size_t SIMD_BYTES>
void insertChunk(
const IColumn::Offset_t * src_offsets_pos,
bool first,
IColumn::Offset_t chunk_offset,
size_t chunk_size)
{
const auto offsets_size_old = res_offsets.size();
res_offsets.resize(offsets_size_old + SIMD_BYTES);
memcpy(&res_offsets[offsets_size_old], src_offsets_pos, SIMD_BYTES * sizeof(IColumn::Offset_t));
if (!first)
{
/// difference between current and actual offset
const auto diff_offset = chunk_offset - current_src_offset;
if (diff_offset > 0)
{
const auto res_offsets_pos = &res_offsets[offsets_size_old];
/// adjust offsets
for (size_t i = 0; i < SIMD_BYTES; ++i)
res_offsets_pos[i] -= diff_offset;
}
}
current_src_offset += chunk_size;
}
};
struct NoResultOffsetsBuilder
{
NoResultOffsetsBuilder(IColumn::Offsets_t * res_offsets_) {}
void reserve(size_t result_size_hint, size_t src_size) {}
void insertOne(size_t array_size) {}
template <size_t SIMD_BYTES>
void insertChunk(
const IColumn::Offset_t * src_offsets_pos,
bool first,
IColumn::Offset_t chunk_offset,
size_t chunk_size)
{
}
};
template <typename T, typename ResultOffsetsBuilder>
void filterArraysImplGeneric(
const PaddedPODArray<T> & src_elems, const IColumn::Offsets_t & src_offsets,
PaddedPODArray<T> & res_elems, IColumn::Offsets_t & res_offsets,
PaddedPODArray<T> & res_elems, IColumn::Offsets_t * res_offsets,
const IColumn::Filter & filt, ssize_t result_size_hint)
{
const size_t size = src_offsets.size();
if (size != filt.size())
throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
ResultOffsetsBuilder result_offsets_builder(res_offsets);
if (result_size_hint)
{
res_offsets.reserve(result_size_hint > 0 ? result_size_hint : size);
result_offsets_builder.reserve(result_size_hint, size);
if (result_size_hint < 0)
res_elems.reserve(src_elems.size());
@ -73,8 +144,6 @@ void filterArraysImpl(
res_elems.reserve((result_size_hint * src_elems.size() + size - 1) / size);
}
IColumn::Offset_t current_src_offset = 0;
const UInt8 * filt_pos = &filt[0];
const auto filt_end = filt_pos + size;
@ -87,8 +156,7 @@ void filterArraysImpl(
const auto offset = offset_ptr == offsets_begin ? 0 : offset_ptr[-1];
const auto size = *offset_ptr - offset;
current_src_offset += size;
res_offsets.push_back(current_src_offset);
result_offsets_builder.insertOne(size);
const auto elems_size_old = res_elems.size();
res_elems.resize(elems_size_old + size);
@ -118,25 +186,7 @@ void filterArraysImpl(
const auto chunk_offset = first ? 0 : offsets_pos[-1];
const auto chunk_size = offsets_pos[SIMD_BYTES - 1] - chunk_offset;
const auto offsets_size_old = res_offsets.size();
res_offsets.resize(offsets_size_old + SIMD_BYTES);
memcpy(&res_offsets[offsets_size_old], offsets_pos, SIMD_BYTES * sizeof(IColumn::Offset_t));
if (!first)
{
/// difference between current and actual offset
const auto diff_offset = chunk_offset - current_src_offset;
if (diff_offset > 0)
{
const auto res_offsets_pos = &res_offsets[offsets_size_old];
/// adjust offsets
for (size_t i = 0; i < SIMD_BYTES; ++i)
res_offsets_pos[i] -= diff_offset;
}
}
current_src_offset += chunk_size;
result_offsets_builder.template insertChunk<SIMD_BYTES>(offsets_pos, first, chunk_offset, chunk_size);
/// copy elements for SIMD_BYTES arrays at once
const auto elems_size_old = res_elems.size();
@ -164,48 +214,50 @@ void filterArraysImpl(
++offsets_pos;
}
}
}
template <typename T>
void filterArraysImpl(
const PaddedPODArray<T> & src_elems, const IColumn::Offsets_t & src_offsets,
PaddedPODArray<T> & res_elems, IColumn::Offsets_t & res_offsets,
const IColumn::Filter & filt, ssize_t result_size_hint)
{
return filterArraysImplGeneric<T, ResultOffsetsBuilder>(src_elems, src_offsets, res_elems, &res_offsets, filt, result_size_hint);
}
template <typename T>
void filterArraysImplOnlyData(
const PaddedPODArray<T> & src_elems, const IColumn::Offsets_t & src_offsets,
PaddedPODArray<T> & res_elems,
const IColumn::Filter & filt, ssize_t result_size_hint)
{
return filterArraysImplGeneric<T, NoResultOffsetsBuilder>(src_elems, src_offsets, res_elems, nullptr, filt, result_size_hint);
}
/// Explicit instantiations - not to place the implementation of the function above in the header file.
template void filterArraysImpl<UInt8>(
const PaddedPODArray<UInt8> &, const IColumn::Offsets_t &,
PaddedPODArray<UInt8> &, IColumn::Offsets_t &,
const IColumn::Filter &, ssize_t);
template void filterArraysImpl<UInt16>(
const PaddedPODArray<UInt16> &, const IColumn::Offsets_t &,
PaddedPODArray<UInt16> &, IColumn::Offsets_t &,
const IColumn::Filter &, ssize_t);
template void filterArraysImpl<UInt32>(
const PaddedPODArray<UInt32> &, const IColumn::Offsets_t &,
PaddedPODArray<UInt32> &, IColumn::Offsets_t &,
const IColumn::Filter &, ssize_t);
template void filterArraysImpl<UInt64>(
const PaddedPODArray<UInt64> &, const IColumn::Offsets_t &,
PaddedPODArray<UInt64> &, IColumn::Offsets_t &,
const IColumn::Filter &, ssize_t);
template void filterArraysImpl<Int8>(
const PaddedPODArray<Int8> &, const IColumn::Offsets_t &,
PaddedPODArray<Int8> &, IColumn::Offsets_t &,
const IColumn::Filter &, ssize_t);
template void filterArraysImpl<Int16>(
const PaddedPODArray<Int16> &, const IColumn::Offsets_t &,
PaddedPODArray<Int16> &, IColumn::Offsets_t &,
const IColumn::Filter &, ssize_t);
template void filterArraysImpl<Int32>(
const PaddedPODArray<Int32> &, const IColumn::Offsets_t &,
PaddedPODArray<Int32> &, IColumn::Offsets_t &,
const IColumn::Filter &, ssize_t);
template void filterArraysImpl<Int64>(
const PaddedPODArray<Int64> &, const IColumn::Offsets_t &,
PaddedPODArray<Int64> &, IColumn::Offsets_t &,
const IColumn::Filter &, ssize_t);
template void filterArraysImpl<Float32>(
const PaddedPODArray<Float32> &, const IColumn::Offsets_t &,
PaddedPODArray<Float32> &, IColumn::Offsets_t &,
const IColumn::Filter &, ssize_t);
template void filterArraysImpl<Float64>(
const PaddedPODArray<Float64> &, const IColumn::Offsets_t &,
PaddedPODArray<Float64> &, IColumn::Offsets_t &,
#define INSTANTIATE(TYPE) \
template void filterArraysImpl<TYPE>( \
const PaddedPODArray<TYPE> &, const IColumn::Offsets_t &, \
PaddedPODArray<TYPE> &, IColumn::Offsets_t &, \
const IColumn::Filter &, ssize_t); \
template void filterArraysImplOnlyData<TYPE>( \
const PaddedPODArray<TYPE> &, const IColumn::Offsets_t &, \
PaddedPODArray<TYPE> &, \
const IColumn::Filter &, ssize_t);
INSTANTIATE(UInt8)
INSTANTIATE(UInt16)
INSTANTIATE(UInt32)
INSTANTIATE(UInt64)
INSTANTIATE(Int8)
INSTANTIATE(Int16)
INSTANTIATE(Int32)
INSTANTIATE(Int64)
INSTANTIATE(Float32)
INSTANTIATE(Float64)
#undef INSTANTIATE
}

View File

@ -19,4 +19,11 @@ void filterArraysImpl(
PaddedPODArray<T> & res_elems, IColumn::Offsets_t & res_offsets,
const IColumn::Filter & filt, ssize_t result_size_hint);
/// Same as above, but not fills res_offsets.
template <typename T>
void filterArraysImplOnlyData(
const PaddedPODArray<T> & src_elems, const IColumn::Offsets_t & src_offsets,
PaddedPODArray<T> & res_elems,
const IColumn::Filter & filt, ssize_t result_size_hint);
}

View File

@ -155,6 +155,6 @@ void * Allocator<clear_memory_>::realloc(void * buf, size_t old_size, size_t new
}
/// Explicit template instantinations.
/// Explicit template instantiations.
template class Allocator<true>;
template class Allocator<false>;

View File

@ -196,7 +196,7 @@ ColumnPtr DataTypeNumberBase<T>::createConstColumn(size_t size, const Field & fi
}
/// Explicit template instantinations - to avoid code bloat in headers.
/// Explicit template instantiations - to avoid code bloat in headers.
template class DataTypeNumberBase<UInt8>;
template class DataTypeNumberBase<UInt16>;
template class DataTypeNumberBase<UInt32>;

View File

@ -53,7 +53,7 @@ IColumn::Selector createBlockSelector(
}
/// Explicit instantinations to avoid code bloat in headers.
/// Explicit instantiations to avoid code bloat in headers.
template IColumn::Selector createBlockSelector<UInt8>(const IColumn & column, size_t num_shards, const std::vector<size_t> & slots);
template IColumn::Selector createBlockSelector<UInt16>(const IColumn & column, size_t num_shards, const std::vector<size_t> & slots);
template IColumn::Selector createBlockSelector<UInt32>(const IColumn & column, size_t num_shards, const std::vector<size_t> & slots);

View File

@ -0,0 +1,8 @@
[1,NULL,2] 1
[1,NULL,2] \N
[1,NULL,2] 2
[(1,2),(3,4),(5,6)] (1,2)
[(1,2),(3,4),(5,6)] (3,4)
[(1,2),(3,4),(5,6)] (5,6)
['Hello','world'] Hello
['Hello','world'] world

View File

@ -0,0 +1,3 @@
SELECT x, arrayJoin(x) FROM (SELECT materialize([1, NULL, 2]) AS x);
SELECT x, arrayJoin(x) FROM (SELECT materialize([(1, 2), (3, 4), (5, 6)]) AS x);
SELECT x, arrayJoin(x) FROM (SELECT materialize(arrayMap(x -> toFixedString(x, 5), ['Hello', 'world'])) AS x);