ClickHouse/dbms/Functions/array/arrayCompact.cpp

161 lines
5.6 KiB
C++
Raw Normal View History

2019-10-15 07:43:05 +00:00
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
2019-10-15 07:43:05 +00:00
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnDecimal.h>
2019-11-04 22:13:10 +00:00
#include <Functions/array/FunctionArrayMapped.h>
2019-10-15 07:43:05 +00:00
#include <Functions/FunctionFactory.h>
2019-11-04 22:13:10 +00:00
2019-10-15 07:43:05 +00:00
namespace DB
{
2019-11-04 22:13:10 +00:00
/// arrayCompact(['a', 'a', 'b', 'b', 'a']) = ['a', 'b', 'a'] - compact arrays
namespace ErrorCodes
{
}
struct ArrayCompactImpl
{
static bool useDefaultImplementationForConstants() { return true; }
static bool needBoolean() { return false; }
static bool needExpression() { return false; }
static bool needOneArray() { return false; }
static DataTypePtr getReturnType(const DataTypePtr & nested_type, const DataTypePtr &)
2019-10-15 07:43:05 +00:00
{
2019-11-04 22:13:10 +00:00
return std::make_shared<DataTypeArray>(nested_type);
2019-10-15 07:43:05 +00:00
}
2019-11-04 22:13:10 +00:00
template <typename T>
static bool executeType(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr)
2019-10-15 07:43:05 +00:00
{
using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
const ColVecType * src_values_column = checkAndGetColumn<ColVecType>(mapped.get());
2019-10-15 07:43:05 +00:00
2019-11-04 22:13:10 +00:00
if (!src_values_column)
return false;
2019-10-15 07:43:05 +00:00
2019-11-04 22:13:10 +00:00
const IColumn::Offsets & src_offsets = array.getOffsets();
const typename ColVecType::Container & src_values = src_values_column->getData();
typename ColVecType::MutablePtr res_values_column;
if constexpr (IsDecimalNumber<T>)
res_values_column = ColVecType::create(src_values.size(), src_values.getScale());
else
res_values_column = ColVecType::create(src_values.size());
2019-10-15 07:43:05 +00:00
typename ColVecType::Container & res_values = res_values_column->getData();
2019-11-04 22:13:10 +00:00
size_t src_offsets_size = src_offsets.size();
auto res_offsets_column = ColumnArray::ColumnOffsets::create(src_offsets_size);
IColumn::Offsets & res_offsets = res_offsets_column->getData();
2019-10-15 07:43:05 +00:00
2019-11-04 22:13:10 +00:00
size_t res_pos = 0;
size_t src_pos = 0;
2019-10-15 07:43:05 +00:00
2019-11-04 22:13:10 +00:00
for (size_t i = 0; i < src_offsets_size; ++i)
{
auto src_offset = src_offsets[i];
/// If array is not empty.
if (src_pos < src_offset)
2019-10-15 07:43:05 +00:00
{
2019-11-04 22:13:10 +00:00
/// Insert first element unconditionally.
res_values[res_pos] = src_values[src_pos];
/// For the rest of elements, insert if the element is different from the previous.
++src_pos;
++res_pos;
for (; src_pos < src_offset; ++src_pos)
2019-10-15 07:43:05 +00:00
{
2019-11-04 22:13:10 +00:00
if (src_values[src_pos] != src_values[src_pos - 1])
2019-10-15 07:43:05 +00:00
{
2019-11-04 22:13:10 +00:00
res_values[res_pos] = src_values[src_pos];
++res_pos;
2019-10-15 07:43:05 +00:00
}
}
}
2019-11-04 22:13:10 +00:00
res_offsets[i] = res_pos;
2019-10-15 07:43:05 +00:00
}
2019-11-04 22:13:10 +00:00
res_values.resize(res_pos);
res_ptr = ColumnArray::create(std::move(res_values_column), std::move(res_offsets_column));
return true;
}
static void executeGeneric(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr)
{
const IColumn::Offsets & src_offsets = array.getOffsets();
auto res_values_column = mapped->cloneEmpty();
res_values_column->reserve(mapped->size());
2019-10-15 07:43:05 +00:00
2019-11-04 22:13:10 +00:00
size_t src_offsets_size = src_offsets.size();
auto res_offsets_column = ColumnArray::ColumnOffsets::create(src_offsets_size);
IColumn::Offsets & res_offsets = res_offsets_column->getData();
size_t res_pos = 0;
size_t src_pos = 0;
for (size_t i = 0; i < src_offsets_size; ++i)
2019-10-15 07:43:05 +00:00
{
2019-11-04 22:13:10 +00:00
auto src_offset = src_offsets[i];
2019-10-15 07:43:05 +00:00
2019-11-04 22:13:10 +00:00
/// If array is not empty.
if (src_pos < src_offset)
{
/// Insert first element unconditionally.
res_values_column->insertFrom(*mapped, src_pos);
2019-10-15 07:43:05 +00:00
2019-11-04 22:13:10 +00:00
/// For the rest of elements, insert if the element is different from the previous.
++src_pos;
++res_pos;
for (; src_pos < src_offset; ++src_pos)
{
if (mapped->compareAt(src_pos - 1, src_pos, *mapped, 1))
{
res_values_column->insertFrom(*mapped, src_pos);
++res_pos;
}
}
}
res_offsets[i] = res_pos;
}
2019-10-15 07:43:05 +00:00
2019-11-04 22:13:10 +00:00
res_ptr = ColumnArray::create(std::move(res_values_column), std::move(res_offsets_column));
}
static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
2019-10-15 07:43:05 +00:00
{
2019-11-04 22:13:10 +00:00
ColumnPtr res;
if (!(executeType< UInt8 >(mapped, array, res) ||
executeType< UInt16>(mapped, array, res) ||
executeType< UInt32>(mapped, array, res) ||
executeType< UInt64>(mapped, array, res) ||
executeType< Int8 >(mapped, array, res) ||
executeType< Int16 >(mapped, array, res) ||
executeType< Int32 >(mapped, array, res) ||
executeType< Int64 >(mapped, array, res) ||
executeType<Float32>(mapped, array, res) ||
executeType<Float64>(mapped, array, res)) ||
executeType<Decimal32>(mapped, array, res) ||
executeType<Decimal64>(mapped, array, res) ||
executeType<Decimal128>(mapped, array, res))
2019-11-04 22:13:10 +00:00
{
executeGeneric(mapped, array, res);
}
return res;
2019-10-15 07:43:05 +00:00
}
2019-11-04 22:13:10 +00:00
};
struct NameArrayCompact { static constexpr auto name = "arrayCompact"; };
using FunctionArrayCompact = FunctionArrayMapped<ArrayCompactImpl, NameArrayCompact>;
void registerFunctionArrayCompact(FunctionFactory & factory)
{
factory.registerFunction<FunctionArrayCompact>();
}
2019-10-15 07:43:05 +00:00
2019-10-28 04:51:59 +00:00
}
2019-10-28 06:48:25 +00:00