#include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; } /** emptyArrayToSingle(arr) - replace empty arrays with arrays of one element with a default value. */ class FunctionEmptyArrayToSingle : public IFunction { public: static constexpr auto name = "emptyArrayToSingle"; static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } bool useDefaultImplementationForConstants() const override { return true; } bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { const DataTypeArray * array_type = checkAndGetDataType(arguments[0].get()); if (!array_type) throw Exception("Argument for function " + getName() + " must be array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return arguments[0]; } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; }; namespace { namespace FunctionEmptyArrayToSingleImpl { bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { if (const ColumnConst * const_array = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get())) { if (const_array->getValue().empty()) { auto nested_type = typeid_cast(*block.getByPosition(arguments[0]).type).getNestedType(); block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst( input_rows_count, Array{nested_type->getDefault()}); } else block.getByPosition(result).column = block.getByPosition(arguments[0]).column; return true; } else return false; } template bool executeNumber( const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data_col, ColumnArray::Offsets & res_offsets, const NullMap * src_null_map, NullMap * res_null_map) { if (const ColumnVector * src_data_concrete = checkAndGetColumn>(&src_data)) { const PaddedPODArray & src_data = src_data_concrete->getData(); PaddedPODArray & res_data = static_cast &>(res_data_col).getData(); size_t size = src_offsets.size(); res_offsets.resize(size); res_data.reserve(src_data.size()); if (nullable) res_null_map->reserve(src_null_map->size()); ColumnArray::Offset src_prev_offset = 0; ColumnArray::Offset res_prev_offset = 0; for (size_t i = 0; i < size; ++i) { if (src_offsets[i] != src_prev_offset) { size_t size_to_write = src_offsets[i] - src_prev_offset; res_data.resize(res_prev_offset + size_to_write); memcpy(&res_data[res_prev_offset], &src_data[src_prev_offset], size_to_write * sizeof(T)); if (nullable) { res_null_map->resize(res_prev_offset + size_to_write); memcpy(&(*res_null_map)[res_prev_offset], &(*src_null_map)[src_prev_offset], size_to_write); } res_prev_offset += size_to_write; res_offsets[i] = res_prev_offset; } else { res_data.push_back(T()); ++res_prev_offset; res_offsets[i] = res_prev_offset; if (nullable) res_null_map->push_back(1); /// Push NULL. } src_prev_offset = src_offsets[i]; } return true; } else return false; } template bool executeFixedString( const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data_col, ColumnArray::Offsets & res_offsets, const NullMap * src_null_map, NullMap * res_null_map) { if (const ColumnFixedString * src_data_concrete = checkAndGetColumn(&src_data)) { const size_t n = src_data_concrete->getN(); const ColumnFixedString::Chars & src_data = src_data_concrete->getChars(); auto concrete_res_data = typeid_cast(&res_data_col); if (!concrete_res_data) throw Exception{"Internal error", ErrorCodes::LOGICAL_ERROR}; ColumnFixedString::Chars & res_data = concrete_res_data->getChars(); size_t size = src_offsets.size(); res_offsets.resize(size); res_data.reserve(src_data.size()); if (nullable) res_null_map->reserve(src_null_map->size()); ColumnArray::Offset src_prev_offset = 0; ColumnArray::Offset res_prev_offset = 0; for (size_t i = 0; i < size; ++i) { if (src_offsets[i] != src_prev_offset) { size_t size_to_write = src_offsets[i] - src_prev_offset; size_t prev_res_data_size = res_data.size(); res_data.resize(prev_res_data_size + size_to_write * n); memcpy(&res_data[prev_res_data_size], &src_data[src_prev_offset * n], size_to_write * n); if (nullable) { res_null_map->resize(res_prev_offset + size_to_write); memcpy(&(*res_null_map)[res_prev_offset], &(*src_null_map)[src_prev_offset], size_to_write); } res_prev_offset += size_to_write; res_offsets[i] = res_prev_offset; } else { size_t prev_res_data_size = res_data.size(); res_data.resize(prev_res_data_size + n); memset(&res_data[prev_res_data_size], 0, n); ++res_prev_offset; res_offsets[i] = res_prev_offset; if (nullable) res_null_map->push_back(1); } src_prev_offset = src_offsets[i]; } return true; } else return false; } template bool executeString( const IColumn & src_data, const ColumnArray::Offsets & src_array_offsets, IColumn & res_data_col, ColumnArray::Offsets & res_array_offsets, const NullMap * src_null_map, NullMap * res_null_map) { if (const ColumnString * src_data_concrete = checkAndGetColumn(&src_data)) { const ColumnString::Offsets & src_string_offsets = src_data_concrete->getOffsets(); auto concrete_res_string_offsets = typeid_cast(&res_data_col); if (!concrete_res_string_offsets) throw Exception{"Internal error", ErrorCodes::LOGICAL_ERROR}; ColumnString::Offsets & res_string_offsets = concrete_res_string_offsets->getOffsets(); const ColumnString::Chars & src_data = src_data_concrete->getChars(); auto concrete_res_data = typeid_cast(&res_data_col); if (!concrete_res_data) throw Exception{"Internal error", ErrorCodes::LOGICAL_ERROR}; ColumnString::Chars & res_data = concrete_res_data->getChars(); size_t size = src_array_offsets.size(); res_array_offsets.resize(size); res_string_offsets.reserve(src_string_offsets.size()); res_data.reserve(src_data.size()); if (nullable) res_null_map->reserve(src_null_map->size()); ColumnArray::Offset src_array_prev_offset = 0; ColumnArray::Offset res_array_prev_offset = 0; ColumnString::Offset src_string_prev_offset = 0; ColumnString::Offset res_string_prev_offset = 0; for (size_t i = 0; i < size; ++i) { if (src_array_offsets[i] != src_array_prev_offset) { size_t array_size = src_array_offsets[i] - src_array_prev_offset; size_t bytes_to_copy = 0; size_t from_string_prev_offset_local = src_string_prev_offset; for (size_t j = 0; j < array_size; ++j) { size_t string_size = src_string_offsets[src_array_prev_offset + j] - from_string_prev_offset_local; res_string_prev_offset += string_size; res_string_offsets.push_back(res_string_prev_offset); from_string_prev_offset_local += string_size; bytes_to_copy += string_size; } size_t res_data_old_size = res_data.size(); res_data.resize(res_data_old_size + bytes_to_copy); memcpy(&res_data[res_data_old_size], &src_data[src_string_prev_offset], bytes_to_copy); if (nullable) { res_null_map->resize(res_array_prev_offset + array_size); memcpy(&(*res_null_map)[res_array_prev_offset], &(*src_null_map)[src_array_prev_offset], array_size); } res_array_prev_offset += array_size; res_array_offsets[i] = res_array_prev_offset; } else { res_data.push_back(0); /// An empty string, including zero at the end. if (nullable) res_null_map->push_back(1); ++res_string_prev_offset; res_string_offsets.push_back(res_string_prev_offset); ++res_array_prev_offset; res_array_offsets[i] = res_array_prev_offset; } src_array_prev_offset = src_array_offsets[i]; if (src_array_prev_offset) src_string_prev_offset = src_string_offsets[src_array_prev_offset - 1]; } return true; } else return false; } template void executeGeneric( const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data, ColumnArray::Offsets & res_offsets, const NullMap * src_null_map, NullMap * res_null_map) { size_t size = src_offsets.size(); res_offsets.resize(size); res_data.reserve(src_data.size()); if (nullable) res_null_map->reserve(src_null_map->size()); ColumnArray::Offset src_prev_offset = 0; ColumnArray::Offset res_prev_offset = 0; for (size_t i = 0; i < size; ++i) { if (src_offsets[i] != src_prev_offset) { size_t size_to_write = src_offsets[i] - src_prev_offset; res_data.insertRangeFrom(src_data, src_prev_offset, size_to_write); if (nullable) { res_null_map->resize(res_prev_offset + size_to_write); memcpy(&(*res_null_map)[res_prev_offset], &(*src_null_map)[src_prev_offset], size_to_write); } res_prev_offset += size_to_write; res_offsets[i] = res_prev_offset; } else { res_data.insertDefault(); ++res_prev_offset; res_offsets[i] = res_prev_offset; if (nullable) res_null_map->push_back(1); } src_prev_offset = src_offsets[i]; } } template void executeDispatch( const IColumn & src_data, const ColumnArray::Offsets & src_array_offsets, IColumn & res_data_col, ColumnArray::Offsets & res_array_offsets, const NullMap * src_null_map, NullMap * res_null_map) { if (!(executeNumber (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) || executeNumber (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) || executeNumber (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) || executeNumber (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) || executeNumber (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) || executeNumber (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) || executeNumber (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) || executeNumber (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) || executeNumber(src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) || executeNumber(src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) || executeString (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) || executeFixedString (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map))) executeGeneric (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map); } } } void FunctionEmptyArrayToSingle::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { if (FunctionEmptyArrayToSingleImpl::executeConst(block, arguments, result, input_rows_count)) return; const ColumnArray * array = checkAndGetColumn(block.getByPosition(arguments[0]).column.get()); if (!array) throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); MutableColumnPtr res_ptr = array->cloneEmpty(); ColumnArray & res = static_cast(*res_ptr); const IColumn & src_data = array->getData(); const ColumnArray::Offsets & src_offsets = array->getOffsets(); IColumn & res_data = res.getData(); ColumnArray::Offsets & res_offsets = res.getOffsets(); const NullMap * src_null_map = nullptr; NullMap * res_null_map = nullptr; const IColumn * inner_col; IColumn * inner_res_col; bool nullable = src_data.isColumnNullable(); if (nullable) { auto nullable_col = static_cast(&src_data); inner_col = &nullable_col->getNestedColumn(); src_null_map = &nullable_col->getNullMapData(); auto nullable_res_col = static_cast(&res_data); inner_res_col = &nullable_res_col->getNestedColumn(); res_null_map = &nullable_res_col->getNullMapData(); } else { inner_col = &src_data; inner_res_col = &res_data; } if (nullable) FunctionEmptyArrayToSingleImpl::executeDispatch(*inner_col, src_offsets, *inner_res_col, res_offsets, src_null_map, res_null_map); else FunctionEmptyArrayToSingleImpl::executeDispatch(*inner_col, src_offsets, *inner_res_col, res_offsets, src_null_map, res_null_map); block.getByPosition(result).column = std::move(res_ptr); } void registerFunctionEmptyArrayToSingle(FunctionFactory & factory) { factory.registerFunction(); } }