#pragma once #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } /** Bitmap functions. * Build a bitmap from integer array: * bitmapBuild: integer[] -> bitmap * * Convert bitmap to integer array: * bitmapToArray: bitmap -> integer[] * * Two bitmap and calculation: * bitmapAnd: bitmap,bitmap -> bitmap * * Two bitmap or calculation: * bitmapOr: bitmap,bitmap -> bitmap * * Two bitmap xor calculation: * bitmapXor: bitmap,bitmap -> bitmap * * Two bitmap andnot calculation: * bitmapAndnot: bitmap,bitmap -> bitmap * * Retrun bitmap cardinality: * bitmapCardinality: bitmap -> integer * * Two bitmap and calculation, return cardinality: * bitmapAndCardinality: bitmap,bitmap -> integer * * Two bitmap or calculation, return cardinality: * bitmapOrCardinality: bitmap,bitmap -> integer * * Two bitmap xor calculation, return cardinality: * bitmapXorCardinality: bitmap,bitmap -> integer * * Two bitmap andnot calculation, return cardinality: * bitmapAndnotCardinality: bitmap,bitmap -> integer */ template class FunctionBitmapBuildImpl : public IFunction { public: static constexpr auto name = Name::name; static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { return name; } bool isVariadic() const override { return false; } size_t getNumberOfArguments() const override { return 1; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments[0]->onlyNull()) return arguments[0]; auto array_type = typeid_cast(arguments[0].get()); if (!array_type) throw Exception( "First argument for function " + getName() + " must be an array but it has type " + arguments[0]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); auto nested_type = array_type->getNestedType(); DataTypes argument_types = {nested_type}; Array params_row; AggregateFunctionPtr bitmap_function = AggregateFunctionFactory::instance().get(AggregateFunctionGroupBitmapData::name(), argument_types, params_row); return std::make_shared(bitmap_function, argument_types, params_row); } bool useDefaultImplementationForConstants() const override { return true; } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /* input_rows_count */) override { const IDataType * from_type = block.getByPosition(arguments[0]).type.get(); auto array_type = typeid_cast(from_type); auto nested_type = array_type->getNestedType(); DataTypes argument_types = {nested_type}; WhichDataType which(nested_type); if (which.isUInt8()) executeBitmapData(block, argument_types, arguments, result); else if (which.isUInt16()) executeBitmapData(block, argument_types, arguments, result); else if (which.isUInt32()) executeBitmapData(block, argument_types, arguments, result); else if (which.isUInt64()) executeBitmapData(block, argument_types, arguments, result); else throw Exception( "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } private: template void executeBitmapData(Block & block, DataTypes & argument_types, const ColumnNumbers & arguments, size_t result) { // input data const ColumnArray * array = typeid_cast(block.getByPosition(arguments[0]).column.get()); ColumnPtr mapped = array->getDataPtr(); const ColumnArray::Offsets & offsets = array->getOffsets(); const ColumnVector * column = checkAndGetColumn>(&*mapped); const typename ColumnVector::Container & input_data = column->getData(); // output data Array params_row; AggregateFunctionPtr bitmap_function = AggregateFunctionFactory::instance().get(AggregateFunctionGroupBitmapData::name(), argument_types, params_row); auto col_to = ColumnAggregateFunction::create(bitmap_function); col_to->reserve(offsets.size()); size_t pos = 0; for (size_t i = 0; i < offsets.size(); ++i) { col_to->insertDefault(); AggregateFunctionGroupBitmapData & bitmap_data = *reinterpret_cast *>(col_to->getData()[i]); for (; pos < offsets[i]; ++pos) { bitmap_data.rbs.add(input_data[pos]); } } block.getByPosition(result).column = std::move(col_to); } }; template class FunctionBitmapToArrayImpl : public IFunction { public: static constexpr auto name = Name::name; static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { return name; } bool isVariadic() const override { return false; } size_t getNumberOfArguments() const override { return 1; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { const DataTypeAggregateFunction * bitmap_type = typeid_cast(arguments[0].get()); if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData::name())) throw Exception( "First argument for function " + getName() + " must be an bitmap but it has type " + arguments[0]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); const DataTypePtr data_type = bitmap_type->getArgumentsDataTypes()[0]; return std::make_shared(data_type); } bool useDefaultImplementationForConstants() const override { return true; } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { // input data const auto & return_type = block.getByPosition(result).type; auto res_ptr = return_type->createColumn(); ColumnArray & res = static_cast(*res_ptr); IColumn & res_data = res.getData(); ColumnArray::Offsets & res_offsets = res.getOffsets(); const IDataType * from_type = block.getByPosition(arguments[0]).type.get(); const DataTypeAggregateFunction * aggr_type = typeid_cast(from_type); WhichDataType which(aggr_type->getArgumentsDataTypes()[0]); if (which.isUInt8()) executeIntType(block, arguments, input_rows_count, res_data, res_offsets); else if (which.isUInt16()) executeIntType(block, arguments, input_rows_count, res_data, res_offsets); else if (which.isUInt32()) executeIntType(block, arguments, input_rows_count, res_data, res_offsets); else if (which.isUInt64()) executeIntType(block, arguments, input_rows_count, res_data, res_offsets); else throw Exception( "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); block.getByPosition(result).column = std::move(res_ptr); } private: using ToType = UInt64; template void executeIntType( Block & block, const ColumnNumbers & arguments, size_t input_rows_count, IColumn & res_data_col, ColumnArray::Offsets & res_offsets) const { const ColumnAggregateFunction * column = typeid_cast(block.getByPosition(arguments[0]).column.get()); PaddedPODArray & res_data = typeid_cast &>(res_data_col).getData(); ColumnArray::Offset res_offset = 0; for (size_t i = 0; i < input_rows_count; ++i) { const AggregateFunctionGroupBitmapData & bd1 = *reinterpret_cast *>(column->getData()[i]); UInt64 count = bd1.rbs.rb_to_array(res_data); res_offset += count; res_offsets.emplace_back(res_offset); } } }; template class FunctionBitmapSelfCardinalityImpl : public IFunction { public: static constexpr auto name = Name::name; static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { return name; } bool isVariadic() const override { return false; } size_t getNumberOfArguments() const override { return 1; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { auto bitmap_type = typeid_cast(arguments[0].get()); if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData::name())) throw Exception( "First argument for function " + getName() + " must be an bitmap but it has type " + arguments[0]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return std::make_shared>(); } bool useDefaultImplementationForConstants() const override { return true; } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { auto col_to = ColumnVector::create(input_rows_count); typename ColumnVector::Container & vec_to = col_to->getData(); const IDataType * from_type = block.getByPosition(arguments[0]).type.get(); const DataTypeAggregateFunction * aggr_type = typeid_cast(from_type); WhichDataType which(aggr_type->getArgumentsDataTypes()[0]); if (which.isUInt8()) executeIntType(block, arguments, input_rows_count, vec_to); else if (which.isUInt16()) executeIntType(block, arguments, input_rows_count, vec_to); else if (which.isUInt32()) executeIntType(block, arguments, input_rows_count, vec_to); else if (which.isUInt64()) executeIntType(block, arguments, input_rows_count, vec_to); else throw Exception( "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); block.getByPosition(result).column = std::move(col_to); } private: using ToType = UInt64; template void executeIntType( Block & block, const ColumnNumbers & arguments, size_t input_rows_count, typename ColumnVector::Container & vec_to) { const ColumnAggregateFunction * column = typeid_cast(block.getByPosition(arguments[0]).column.get()); for (size_t i = 0; i < input_rows_count; ++i) { const AggregateFunctionGroupBitmapData & bd1 = *reinterpret_cast *>(column->getData()[i]); vec_to[i] = bd1.rbs.size(); } } }; template struct BitmapAndCardinalityImpl { using ReturnType = UInt64; static UInt64 apply(const AggregateFunctionGroupBitmapData & bd1, const AggregateFunctionGroupBitmapData & bd2) { // roaring_bitmap_and_cardinality( rb1, rb2 ); return bd1.rbs.rb_and_cardinality(bd2.rbs); } }; template struct BitmapOrCardinalityImpl { using ReturnType = UInt64; static UInt64 apply(const AggregateFunctionGroupBitmapData & bd1, const AggregateFunctionGroupBitmapData & bd2) { // return roaring_bitmap_or_cardinality( rb1, rb2 ); return bd1.rbs.rb_or_cardinality(bd2.rbs); } }; template struct BitmapXorCardinalityImpl { using ReturnType = UInt64; static UInt64 apply(const AggregateFunctionGroupBitmapData & bd1, const AggregateFunctionGroupBitmapData & bd2) { // return roaring_bitmap_xor_cardinality( rb1, rb2 ); return bd1.rbs.rb_xor_cardinality(bd2.rbs); } }; template struct BitmapAndnotCardinalityImpl { using ReturnType = UInt64; static UInt64 apply(const AggregateFunctionGroupBitmapData & bd1, const AggregateFunctionGroupBitmapData & bd2) { // roaring_bitmap_andnot_cardinality( rb1, rb2 ); return bd1.rbs.rb_andnot_cardinality(bd2.rbs); } }; template struct BitmapHasAllImpl { using ReturnType = UInt8; static UInt8 apply(const AggregateFunctionGroupBitmapData & bd1, const AggregateFunctionGroupBitmapData & bd2) { return bd1.rbs.rb_is_subset(bd2.rbs); } }; template struct BitmapHasAnyImpl { using ReturnType = UInt8; static UInt8 apply(const AggregateFunctionGroupBitmapData & bd1, const AggregateFunctionGroupBitmapData & bd2) { return bd1.rbs.rb_intersect(bd2.rbs); } }; template