ClickHouse/dbms/Functions/array/arraySplit.cpp

122 lines
3.8 KiB
C++
Raw Normal View History

2019-10-13 08:06:57 +00:00
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnsNumber.h>
#include "FunctionArrayMapped.h"
#include <Functions/FunctionFactory.h>
namespace DB
{
2020-02-25 18:10:48 +00:00
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
2019-10-13 08:06:57 +00:00
2019-10-25 03:25:02 +00:00
template <bool reverse>
2019-10-13 08:06:57 +00:00
struct ArraySplitImpl
{
static bool needBoolean() { return true; }
static bool needExpression() { return true; }
static bool needOneArray() { return false; }
static DataTypePtr getReturnType(const DataTypePtr & /*expression_return*/, const DataTypePtr & array_element)
{
return std::make_shared<DataTypeArray>(
std::make_shared<DataTypeArray>(array_element)
);
}
static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
{
const ColumnUInt8 * column_cut = typeid_cast<const ColumnUInt8 *>(&*mapped);
const IColumn::Offsets & in_offsets = array.getOffsets();
auto column_offsets_2 = ColumnArray::ColumnOffsets::create();
auto column_offsets_1 = ColumnArray::ColumnOffsets::create();
IColumn::Offsets & out_offsets_2 = column_offsets_2->getData();
IColumn::Offsets & out_offsets_1 = column_offsets_1->getData();
if (column_cut)
{
const IColumn::Filter & cut = column_cut->getData();
size_t pos = 0;
2019-11-13 02:41:23 +00:00
out_offsets_2.reserve(in_offsets.size()); // assume the actual size to be equal or larger
2019-10-13 08:06:57 +00:00
out_offsets_1.reserve(in_offsets.size());
2020-03-09 03:38:43 +00:00
for (auto in_offset : in_offsets)
2019-10-13 08:06:57 +00:00
{
2020-03-09 03:38:43 +00:00
if (pos < in_offset)
2019-10-13 08:06:57 +00:00
{
2019-11-13 02:41:23 +00:00
pos += !reverse;
2020-03-09 03:38:43 +00:00
for (; pos < in_offset - reverse; ++pos)
2019-11-13 02:41:23 +00:00
if (cut[pos])
out_offsets_2.push_back(pos + reverse);
pos += reverse;
out_offsets_2.push_back(pos);
2019-10-13 08:06:57 +00:00
}
out_offsets_1.push_back(out_offsets_2.size());
}
}
else
{
auto column_cut_const = checkAndGetColumnConst<ColumnUInt8>(&*mapped);
if (!column_cut_const)
throw Exception("Unexpected type of cut column", ErrorCodes::ILLEGAL_COLUMN);
if (column_cut_const->getValue<UInt8>())
{
out_offsets_2.reserve(in_offsets.back());
out_offsets_1.reserve(in_offsets.size());
for (size_t i = 0; i < in_offsets.back(); ++i)
out_offsets_2.push_back(i + 1);
2020-03-09 03:38:43 +00:00
for (auto in_offset : in_offsets)
out_offsets_1.push_back(in_offset);
2019-10-13 08:06:57 +00:00
}
else
{
2019-11-13 02:41:23 +00:00
size_t pos = 0;
2019-10-13 08:06:57 +00:00
out_offsets_2.reserve(in_offsets.size());
out_offsets_1.reserve(in_offsets.size());
2020-03-09 03:38:43 +00:00
for (auto in_offset : in_offsets)
2019-10-13 08:06:57 +00:00
{
2020-03-09 03:38:43 +00:00
if (pos < in_offset)
2019-11-13 02:41:23 +00:00
{
2020-03-09 03:38:43 +00:00
pos = in_offset;
2019-11-13 02:41:23 +00:00
out_offsets_2.push_back(pos);
}
out_offsets_1.push_back(out_offsets_2.size());
2019-10-13 08:06:57 +00:00
}
}
}
return ColumnArray::create(
ColumnArray::create(
array.getDataPtr(),
std::move(column_offsets_2)
),
std::move(column_offsets_1)
);
}
};
struct NameArraySplit { static constexpr auto name = "arraySplit"; };
struct NameArrayReverseSplit { static constexpr auto name = "arrayReverseSplit"; };
using FunctionArraySplit = FunctionArrayMapped<ArraySplitImpl<false>, NameArraySplit>;
using FunctionArrayReverseSplit = FunctionArrayMapped<ArraySplitImpl<true>, NameArrayReverseSplit>;
2019-10-25 03:25:02 +00:00
void registerFunctionsArraySplit(FunctionFactory & factory)
2019-10-13 08:06:57 +00:00
{
factory.registerFunction<FunctionArraySplit>();
factory.registerFunction<FunctionArrayReverseSplit>();
}
}