#include #include #include #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int ARGUMENT_OUT_OF_BOUND; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } /** Generates array * range(size): [0, size) * range(start, end): [start, end) * range(start, end, step): [start, end) with step increments. */ class FunctionRange : public IFunction { public: static constexpr auto name = "range"; const size_t max_elements; static FunctionPtr create(ContextPtr context_) { return std::make_shared(std::move(context_)); } explicit FunctionRange(ContextPtr context) : max_elements(context->getSettingsRef().function_range_max_elements_in_block) {} private: String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } bool isVariadic() const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.size() > 3 || arguments.empty()) { throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs 1..3 arguments; passed {}.", getName(), arguments.size()); } for (const auto & arg : arguments) { if (!isInteger(arg)) throw Exception{"Illegal type " + arg->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; } DataTypePtr common_type = getLeastSupertype(arguments); return std::make_shared(common_type); } template ColumnPtr executeInternal(const IColumn * arg) const { if (const auto in = checkAndGetColumn>(arg)) { const auto & in_data = in->getData(); const auto total_values = std::accumulate(std::begin(in_data), std::end(in_data), size_t{}, [this] (const size_t lhs, const T rhs) { if (rhs < 0) throw Exception{"A call to function " + getName() + " overflows, only support positive values when only end is provided", ErrorCodes::ARGUMENT_OUT_OF_BOUND}; const auto sum = lhs + rhs; if (sum < lhs) throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", ErrorCodes::ARGUMENT_OUT_OF_BOUND}; return sum; }); if (total_values > max_elements) throw Exception{"A call to function " + getName() + " would produce " + std::to_string(total_values) + " array elements, which is greater than the allowed maximum of " + std::to_string(max_elements), ErrorCodes::ARGUMENT_OUT_OF_BOUND}; auto data_col = ColumnVector::create(total_values); auto offsets_col = ColumnArray::ColumnOffsets::create(in->size()); auto & out_data = data_col->getData(); auto & out_offsets = offsets_col->getData(); IColumn::Offset offset{}; for (size_t row_idx = 0, rows = in->size(); row_idx < rows; ++row_idx) { for (T elem_idx = 0, elems = in_data[row_idx]; elem_idx < elems; ++elem_idx) out_data[offset + elem_idx] = static_cast(elem_idx); offset += in_data[row_idx]; out_offsets[row_idx] = offset; } return ColumnArray::create(std::move(data_col), std::move(offsets_col)); } else return nullptr; } template ColumnPtr executeConstStartStep( const IColumn * end_arg, const T start, const T step, const size_t input_rows_count) const { auto end_column = checkAndGetColumn>(end_arg); if (!end_column) return nullptr; const auto & end_data = end_column->getData(); size_t total_values = 0; size_t pre_values = 0; std::vector row_length(input_rows_count); for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { if (step == 0) throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't be zero", ErrorCodes::ARGUMENT_OUT_OF_BOUND}; if (start < end_data[row_idx] && step > 0) row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start) - 1) / static_cast<__int128_t>(step) + 1; else if (start > end_data[row_idx] && step < 0) row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start) + 1) / static_cast<__int128_t>(step) + 1; pre_values += row_length[row_idx]; if (pre_values < total_values) throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", ErrorCodes::ARGUMENT_OUT_OF_BOUND}; total_values = pre_values; if (total_values > max_elements) throw Exception{"A call to function " + getName() + " would produce " + std::to_string(total_values) + " array elements, which is greater than the allowed maximum of " + std::to_string(max_elements), ErrorCodes::ARGUMENT_OUT_OF_BOUND}; } auto data_col = ColumnVector::create(total_values); auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); auto & out_data = data_col->getData(); auto & out_offsets = offsets_col->getData(); IColumn::Offset offset{}; for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { for (size_t idx = 0; idx < row_length[row_idx]; idx++) out_data[offset++] = static_cast(start + offset * step); out_offsets[row_idx] = offset; } return ColumnArray::create(std::move(data_col), std::move(offsets_col)); } template ColumnPtr executeConstStep( const IColumn * start_arg, const IColumn * end_arg, const T step, const size_t input_rows_count) const { auto start_column = checkAndGetColumn>(start_arg); auto end_column = checkAndGetColumn>(end_arg); if (!end_column || !start_column) return nullptr; const auto & start_data = start_column->getData(); const auto & end_data = end_column->getData(); size_t total_values = 0; size_t pre_values = 0; std::vector row_length(input_rows_count); for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { if (step == 0) throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't be zero", ErrorCodes::ARGUMENT_OUT_OF_BOUND}; if (start_data[row_idx] < end_data[row_idx] && step > 0) row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start_data[row_idx]) - 1) / static_cast<__int128_t>(step) + 1; else if (start_data[row_idx] > end_data[row_idx] && step < 0) row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start_data[row_idx]) + 1) / static_cast<__int128_t>(step) + 1; pre_values += row_length[row_idx]; if (pre_values < total_values) throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", ErrorCodes::ARGUMENT_OUT_OF_BOUND}; total_values = pre_values; if (total_values > max_elements) throw Exception{"A call to function " + getName() + " would produce " + std::to_string(total_values) + " array elements, which is greater than the allowed maximum of " + std::to_string(max_elements), ErrorCodes::ARGUMENT_OUT_OF_BOUND}; } auto data_col = ColumnVector::create(total_values); auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); auto & out_data = data_col->getData(); auto & out_offsets = offsets_col->getData(); IColumn::Offset offset{}; for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { for (size_t idx = 0; idx < row_length[row_idx]; idx++) out_data[offset++] = static_cast(start_data[row_idx] + idx * step); out_offsets[row_idx] = offset; } return ColumnArray::create(std::move(data_col), std::move(offsets_col)); } template ColumnPtr executeConstStart( const IColumn * end_arg, const IColumn * step_arg, const T start, const size_t input_rows_count) const { auto end_column = checkAndGetColumn>(end_arg); auto step_column = checkAndGetColumn>(step_arg); if (!end_column || !step_column) return nullptr; const auto & end_data = end_column->getData(); const auto & step_data = step_column->getData(); size_t total_values = 0; size_t pre_values = 0; std::vector row_length(input_rows_count); for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { if (step_data[row_idx] == 0) throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't be zero", ErrorCodes::ARGUMENT_OUT_OF_BOUND}; if (start < end_data[row_idx] && step_data[row_idx] > 0) row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start) - 1) / static_cast<__int128_t>(step_data[row_idx]) + 1; else if (start > end_data[row_idx] && step_data[row_idx] < 0) row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start) + 1) / static_cast<__int128_t>(step_data[row_idx]) + 1; pre_values += row_length[row_idx]; if (pre_values < total_values) throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", ErrorCodes::ARGUMENT_OUT_OF_BOUND}; total_values = pre_values; if (total_values > max_elements) throw Exception{"A call to function " + getName() + " would produce " + std::to_string(total_values) + " array elements, which is greater than the allowed maximum of " + std::to_string(max_elements), ErrorCodes::ARGUMENT_OUT_OF_BOUND}; } auto data_col = ColumnVector::create(total_values); auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); auto & out_data = data_col->getData(); auto & out_offsets = offsets_col->getData(); IColumn::Offset offset{}; for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { for (size_t idx = 0; idx < row_length[row_idx]; idx++) out_data[offset++] = static_cast(start + offset * step_data[row_idx]); out_offsets[row_idx] = offset; } return ColumnArray::create(std::move(data_col), std::move(offsets_col)); } template ColumnPtr executeGeneric( const IColumn * start_col, const IColumn * end_col, const IColumn * step_col, const size_t input_rows_count) const { auto start_column = checkAndGetColumn>(start_col); auto end_column = checkAndGetColumn>(end_col); auto step_column = checkAndGetColumn>(step_col); if (!start_column || !end_column || !step_column) return nullptr; const auto & start_data = start_column->getData(); const auto & end_start = end_column->getData(); const auto & step_data = step_column->getData(); size_t total_values = 0; size_t pre_values = 0; std::vector row_length(input_rows_count); for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { if (step_data[row_idx] == 0) throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't less or equal to zero", ErrorCodes::ARGUMENT_OUT_OF_BOUND}; if (start_data[row_idx] < end_start[row_idx] && step_data[row_idx] > 0) row_length[row_idx] = (static_cast<__int128_t>(end_start[row_idx]) - static_cast<__int128_t>(start_data[row_idx]) - 1) / static_cast<__int128_t>(step_data[row_idx]) + 1; else if (start_data[row_idx] > end_start[row_idx] && step_data[row_idx] < 0) row_length[row_idx] = (static_cast<__int128_t>(end_start[row_idx]) - static_cast<__int128_t>(start_data[row_idx]) + 1) / static_cast<__int128_t>(step_data[row_idx]) + 1; pre_values += row_length[row_idx]; if (pre_values < total_values) throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", ErrorCodes::ARGUMENT_OUT_OF_BOUND}; total_values = pre_values; if (total_values > max_elements) throw Exception{"A call to function " + getName() + " would produce " + std::to_string(total_values) + " array elements, which is greater than the allowed maximum of " + std::to_string(max_elements), ErrorCodes::ARGUMENT_OUT_OF_BOUND}; } auto data_col = ColumnVector::create(total_values); auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); auto & out_data = data_col->getData(); auto & out_offsets = offsets_col->getData(); IColumn::Offset offset{}; for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { for (size_t idx = 0; idx < row_length[row_idx]; idx++) out_data[offset++] = static_cast(start_data[row_idx] + idx * step_data[row_idx]); out_offsets[row_idx] = offset; } return ColumnArray::create(std::move(data_col), std::move(offsets_col)); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { DataTypePtr elem_type = checkAndGetDataType(result_type.get())->getNestedType(); WhichDataType which(elem_type); if (!which.isNativeUInt() && !which.isNativeInt()) { throw Exception{"Illegal columns of arguments of function " + getName() + ", the function only implemented for unsigned/signed integers up to 64 bit", ErrorCodes::ILLEGAL_COLUMN}; } ColumnPtr res; if (arguments.size() == 1) { const auto * col = arguments[0].column.get(); if (!((res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)))) { throw Exception{"Illegal column " + col->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; } return res; } Columns columns_holder(3); ColumnRawPtrs column_ptrs(3); for (size_t i = 0; i < arguments.size(); ++i) { if (i == 1) columns_holder[i] = castColumn(arguments[i], elem_type)->convertToFullColumnIfConst(); else columns_holder[i] = castColumn(arguments[i], elem_type); column_ptrs[i] = columns_holder[i].get(); } /// Step is one by default. if (arguments.size() == 2) { /// Convert a column with constant 1 to the result type. columns_holder[2] = castColumn( {DataTypeUInt8().createColumnConst(input_rows_count, 1), std::make_shared(), {}}, elem_type); column_ptrs[2] = columns_holder[2].get(); } bool is_start_const = isColumnConst(*column_ptrs[0]); bool is_step_const = isColumnConst(*column_ptrs[2]); if (is_start_const && is_step_const) { if (which.isNativeUInt()) { UInt64 start = assert_cast(*column_ptrs[0]).getUInt(0); UInt64 step = assert_cast(*column_ptrs[2]).getUInt(0); if ((res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count)) || (res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count)) || (res = executeConstStartStep( column_ptrs[1], static_cast(start), static_cast(step), input_rows_count)) || (res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count))) { } } else if (which.isNativeInt()) { Int64 start = assert_cast(*column_ptrs[0]).getInt(0); Int64 step = assert_cast(*column_ptrs[2]).getInt(0); if ((res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count)) || (res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count)) || (res = executeConstStartStep( column_ptrs[1], static_cast(start), static_cast(step), input_rows_count)) || (res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count))) { } } } else if (is_start_const && !is_step_const) { if (which.isNativeUInt()) { UInt64 start = assert_cast(*column_ptrs[0]).getUInt(0); if ((res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count)) || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count)) || (res = executeConstStart(column_ptrs[1], column_ptrs[2], static_cast(start), input_rows_count)) || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count))) { } } else if (which.isNativeInt()) { Int64 start = assert_cast(*column_ptrs[0]).getInt(0); if ((res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count)) || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count)) || (res = executeConstStart(column_ptrs[1], column_ptrs[2], static_cast(start), input_rows_count)) || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count))) { } } } else if (!is_start_const && is_step_const) { if (which.isNativeUInt()) { UInt64 step = assert_cast(*column_ptrs[2]).getUInt(0); if ((res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count)) || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count)) || (res = executeConstStep(column_ptrs[0], column_ptrs[1], static_cast(step), input_rows_count)) || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count))) { } } else if (which.isNativeInt()) { Int64 step = assert_cast(*column_ptrs[2]).getInt(0); if ((res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count)) || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count)) || (res = executeConstStep(column_ptrs[0], column_ptrs[1], static_cast(step), input_rows_count)) || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count))) { } } } else { if ((res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count))) { } } if (!res) { throw Exception{"Illegal columns " + column_ptrs[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; } return res; } }; REGISTER_FUNCTION(Range) { factory.registerFunction(); } }