basic version

This commit is contained in:
Lirikl 2023-05-11 13:14:52 +03:00
parent 39a505e3f3
commit 213ac1eba4

View File

@ -108,6 +108,10 @@ public:
else
{
/// The first condition is optimization: do not compare data if the pointers are equal.
if(column_array->lowCardinality() || array_type->getNestedType())
{
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "low cadinality array"); // test
}
if (column_array->getOffsetsPtr() != offsets_column
&& column_array->getOffsets() != typeid_cast<const ColumnArray::ColumnOffsets &>(*offsets_column).getData())
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} must have equal size", getName());
@ -121,73 +125,71 @@ public:
recursiveRemoveLowCardinality(array_type->getNestedType()),
array_with_type_and_name.name));
}
ssize_t row_count = input_rows_count;
ssize_t rows_count = input_rows_count;
ssize_t data_row_count = arrays[0].column->size();
MutableColumnPtr current_column;
current_column = arguments.back().column->convertToFullColumnIfConst()->cloneEmpty();
current_column->insertMany((*arguments.back().column)[0], row_count);
if (rows_count == 0) {
return current_column;
}
current_column->insertMany((*arguments.back().column)[0], rows_count);
MutableColumnPtr result_data = arguments.back().column->convertToFullColumnIfConst()->cloneEmpty();
IColumn::Permutation inverse_perm;
inverse_perm.reserve(row_count);
IColumn::Permutation inverse_perm(rows_count);
size_t inverse_perm_count = 0;
auto array_count = arrays.size();
size_t max_array_size = 0;
auto& offsets = column_first_array->getOffsets();
for(ssize_t i = 0; i < row_count; i++) {
if (offsets[i] - offsets[i-1] > max_array_size)
max_array_size = offsets[i] - offsets[i-1];
}
IColumn::Selector selector(data_row_count);
size_t cur_ind = 0;
ssize_t cur_arr = 0;
for (ssize_t i = 0; i < data_row_count; i++)
if (data_row_count) {
while (offsets[cur_arr] == 0) {
++cur_arr;
}
}
for (ssize_t i = 0; i < data_row_count; ++i)
{
selector[i] = cur_ind++;
while (cur_arr < row_count && cur_ind >= offsets[cur_arr] - offsets[cur_arr-1])
if (cur_ind > max_array_size)
max_array_size = cur_ind;
while (cur_arr < rows_count && cur_ind >= offsets[cur_arr] - offsets[cur_arr - 1])
{
++cur_arr;
cur_ind = 0;
}
}
//if (max_array_size != 2)
// throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Expected array type, found {} ", max_array_size);
std::vector<MutableColumns> data_arrays;
data_arrays.reserve(array_count);
std::vector<MutableColumns> data_arrays;
data_arrays.resize(array_count);
for (size_t i = 0; i < array_count; ++i)
{
data_arrays.push_back(arrays[i].column->scatter(max_array_size, selector));
data_arrays[i] = arrays[i].column->scatter(max_array_size, selector);
}
size_t prev_size = row_count;
size_t prev_size = rows_count;
for (size_t ind = 0; ind < max_array_size; ++ind) {
IColumn::Selector prev_selector(prev_size);
ssize_t prev_ind = 0;
for (ssize_t irow = 0; irow < row_count; ++irow)
size_t prev_ind = 0;
for (ssize_t irow = 0; irow < rows_count; ++irow)
{
if (offsets[irow] - offsets[irow-1] > ind)
if (offsets[irow] - offsets[irow - 1] > ind)
{
prev_selector[prev_ind++] = 1;
}
else if (offsets[irow] - offsets[irow-1] == ind)
else if (offsets[irow] - offsets[irow - 1] == ind)
{
inverse_perm[inverse_perm_count++] = irow;
prev_selector[prev_ind++] = 0;
}
}
if (row_count > 0 && max_array_size > 1 && ind == 0) {
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Expected array type, found {} {} {} ", prev_size, current_column->size() , arrays[0].type->getName());
}
//remove rows that doesn't have any more elements
auto prev = current_column->scatter(2, prev_selector);
@ -197,25 +199,24 @@ public:
auto * res_lambda_ptr = typeid_cast<ColumnFunction *>(res_lambda.get());
for (size_t i = 0; i < array_count; i++)
res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move((data_arrays[i][ind])), arrays[i].type, arrays[i].name)}));
res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(data_arrays[i][ind]), arrays[i].type, arrays[i].name)}));
res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(prev[1]), arguments.back().type, arguments.back().name)}));
current_column = IColumn::mutate(res_lambda_ptr->reduce().column);
prev_size = current_column->size();
}
result_data->insertRangeFrom(*current_column, 0, current_column->size());
for (ssize_t irow = 0; irow < row_count; ++irow)
for (ssize_t irow = 0; irow < rows_count; ++irow)
{
if (offsets[irow] - offsets[irow-1] == max_array_size)
inverse_perm[inverse_perm_count++] = irow;
}
IColumn::Permutation perm(row_count);
for (ssize_t i = 0; i < row_count; i++) {
IColumn::Permutation perm(rows_count);
for (ssize_t i = 0; i < rows_count; i++) {
perm[inverse_perm[i]] = i;
}