#pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } /** Bitmap functions. * Build a bitmap from integer array: * bitmapBuild: integer[] -> bitmap * * Convert bitmap to integer array: * bitmapToArray: bitmap -> integer[] * * Return subset in specified range (not include the range_end): * bitmapSubsetInRange: bitmap,integer,integer -> bitmap * * Two bitmap and calculation: * bitmapAnd: bitmap,bitmap -> bitmap * * Two bitmap or calculation: * bitmapOr: bitmap,bitmap -> bitmap * * Two bitmap xor calculation: * bitmapXor: bitmap,bitmap -> bitmap * * Two bitmap andnot calculation: * bitmapAndnot: bitmap,bitmap -> bitmap * * Retrun bitmap cardinality: * bitmapCardinality: bitmap -> integer * * Retrun smallest value in the set: * bitmapMin: bitmap -> integer * * Retrun the greatest value in the set: * bitmapMax: bitmap -> integer * * Two bitmap and calculation, return cardinality: * bitmapAndCardinality: bitmap,bitmap -> integer * * Two bitmap or calculation, return cardinality: * bitmapOrCardinality: bitmap,bitmap -> integer * * Two bitmap xor calculation, return cardinality: * bitmapXorCardinality: bitmap,bitmap -> integer * * Two bitmap andnot calculation, return cardinality: * bitmapAndnotCardinality: bitmap,bitmap -> integer * * Determine if a bitmap contains the given integer: * bitmapContains: bitmap,integer -> bool * * Judge if a bitmap is superset of the another one: * bitmapHasAll: bitmap,bitmap -> bool * * Judge if the intersection of two bitmap is nonempty: * bitmapHasAny: bitmap,bitmap -> bool */ template class FunctionBitmapBuildImpl : public IFunction { public: static constexpr auto name = Name::name; static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { return name; } bool isVariadic() const override { return false; } size_t getNumberOfArguments() const override { return 1; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments[0]->onlyNull()) return arguments[0]; auto array_type = typeid_cast(arguments[0].get()); if (!array_type) throw Exception( "First argument for function " + getName() + " must be an array but it has type " + arguments[0]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); auto nested_type = array_type->getNestedType(); DataTypes argument_types = {nested_type}; Array params_row; AggregateFunctionPtr bitmap_function = AggregateFunctionFactory::instance().get(AggregateFunctionGroupBitmapData::name(), argument_types, params_row); return std::make_shared(bitmap_function, argument_types, params_row); } bool useDefaultImplementationForConstants() const override { return true; } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /* input_rows_count */) override { const IDataType * from_type = block.getByPosition(arguments[0]).type.get(); auto array_type = typeid_cast(from_type); auto nested_type = array_type->getNestedType(); DataTypes argument_types = {nested_type}; WhichDataType which(nested_type); if (which.isUInt8()) executeBitmapData(block, argument_types, arguments, result); else if (which.isUInt16()) executeBitmapData(block, argument_types, arguments, result); else if (which.isUInt32()) executeBitmapData(block, argument_types, arguments, result); else if (which.isUInt64()) executeBitmapData(block, argument_types, arguments, result); else throw Exception( "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } private: template void executeBitmapData(Block & block, DataTypes & argument_types, const ColumnNumbers & arguments, size_t result) { // input data const ColumnArray * array = typeid_cast(block.getByPosition(arguments[0]).column.get()); ColumnPtr mapped = array->getDataPtr(); const ColumnArray::Offsets & offsets = array->getOffsets(); const ColumnVector * column = checkAndGetColumn>(&*mapped); const typename ColumnVector::Container & input_data = column->getData(); // output data Array params_row; AggregateFunctionPtr bitmap_function = AggregateFunctionFactory::instance().get(AggregateFunctionGroupBitmapData::name(), argument_types, params_row); auto col_to = ColumnAggregateFunction::create(bitmap_function); col_to->reserve(offsets.size()); size_t pos = 0; for (size_t i = 0; i < offsets.size(); ++i) { col_to->insertDefault(); AggregateFunctionGroupBitmapData & bitmap_data = *reinterpret_cast *>(col_to->getData()[i]); for (; pos < offsets[i]; ++pos) { bitmap_data.rbs.add(input_data[pos]); } } block.getByPosition(result).column = std::move(col_to); } }; template class FunctionBitmapToArrayImpl : public IFunction { public: static constexpr auto name = Name::name; static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { return name; } bool isVariadic() const override { return false; } size_t getNumberOfArguments() const override { return 1; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { const DataTypeAggregateFunction * bitmap_type = typeid_cast(arguments[0].get()); if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData::name())) throw Exception( "First argument for function " + getName() + " must be an bitmap but it has type " + arguments[0]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); const DataTypePtr data_type = bitmap_type->getArgumentsDataTypes()[0]; return std::make_shared(data_type); } bool useDefaultImplementationForConstants() const override { return true; } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { // input data const auto & return_type = block.getByPosition(result).type; auto res_ptr = return_type->createColumn(); ColumnArray & res = assert_cast(*res_ptr); IColumn & res_data = res.getData(); ColumnArray::Offsets & res_offsets = res.getOffsets(); const IDataType * from_type = block.getByPosition(arguments[0]).type.get(); const DataTypeAggregateFunction * aggr_type = typeid_cast(from_type); WhichDataType which(aggr_type->getArgumentsDataTypes()[0]); if (which.isUInt8()) executeIntType(block, arguments, input_rows_count, res_data, res_offsets); else if (which.isUInt16()) executeIntType(block, arguments, input_rows_count, res_data, res_offsets); else if (which.isUInt32()) executeIntType(block, arguments, input_rows_count, res_data, res_offsets); else if (which.isUInt64()) executeIntType(block, arguments, input_rows_count, res_data, res_offsets); else throw Exception( "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); block.getByPosition(result).column = std::move(res_ptr); } private: using ToType = UInt64; template void executeIntType( Block & block, const ColumnNumbers & arguments, size_t input_rows_count, IColumn & res_data_col, ColumnArray::Offsets & res_offsets) const { const ColumnAggregateFunction * column = typeid_cast(block.getByPosition(arguments[0]).column.get()); PaddedPODArray & res_data = typeid_cast &>(res_data_col).getData(); ColumnArray::Offset res_offset = 0; for (size_t i = 0; i < input_rows_count; ++i) { const AggregateFunctionGroupBitmapData & bd1 = *reinterpret_cast *>(column->getData()[i]); UInt64 count = bd1.rbs.rb_to_array(res_data); res_offset += count; res_offsets.emplace_back(res_offset); } } }; class FunctionBitmapSubsetInRange : public IFunction { public: static constexpr auto name = "bitmapSubsetInRange"; static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { return name; } bool isVariadic() const override { return false; } size_t getNumberOfArguments() const override { return 3; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { const DataTypeAggregateFunction * bitmap_type = typeid_cast(arguments[0].get()); if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData::name())) throw Exception( "First argument for function " + getName() + " must be an bitmap but it has type " + arguments[0]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); auto arg_type1 = typeid_cast *>(arguments[1].get()); if (!(arg_type1)) throw Exception( "Second argument for function " + getName() + " must be UInt32 but it has type " + arguments[1]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); auto arg_type2 = typeid_cast *>(arguments[1].get()); if (!(arg_type2)) throw Exception( "Third argument for function " + getName() + " must be UInt32 but it has type " + arguments[2]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return arguments[0]; } bool useDefaultImplementationForConstants() const override { return true; } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { const IDataType * from_type = block.getByPosition(arguments[0]).type.get(); const DataTypeAggregateFunction * aggr_type = typeid_cast(from_type); WhichDataType which(aggr_type->getArgumentsDataTypes()[0]); if (which.isUInt8()) executeIntType(block, arguments, result, input_rows_count); else if (which.isUInt16()) executeIntType(block, arguments, result, input_rows_count); else if (which.isUInt32()) executeIntType(block, arguments, result, input_rows_count); else if (which.isUInt64()) executeIntType(block, arguments, result, input_rows_count); else throw Exception( "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } private: using ToType = UInt64; template void executeIntType( Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const { const IColumn * columns[3]; bool is_column_const[3]; const ColumnAggregateFunction * colAggFunc; const PaddedPODArray * container0; const PaddedPODArray * container1, * container2; for (size_t i = 0; i < 3; ++i) { columns[i] = block.getByPosition(arguments[i]).column.get(); is_column_const[i] = isColumnConst(*columns[i]); } if (is_column_const[0]) { colAggFunc = typeid_cast(typeid_cast(columns[0])->getDataColumnPtr().get()); } else { colAggFunc = typeid_cast(columns[0]); } container0 = &colAggFunc->getData(); if (is_column_const[1]) container1 = &typeid_cast(typeid_cast(columns[1])->getDataColumnPtr().get())->getData(); else container1 = &typeid_cast(columns[1])->getData(); if (is_column_const[2]) container2 = &typeid_cast(typeid_cast(columns[2])->getDataColumnPtr().get())->getData(); else container2 = &typeid_cast(columns[2])->getData(); auto col_to = ColumnAggregateFunction::create(colAggFunc->getAggregateFunction()); col_to->reserve(input_rows_count); for (size_t i = 0; i < input_rows_count; ++i) { const AggregateDataPtr dataPtr0 = is_column_const[0] ? (*container0)[0] : (*container0)[i]; const AggregateFunctionGroupBitmapData& bd0 = *reinterpret_cast*>(dataPtr0); const UInt32 range_start = is_column_const[1] ? (*container1)[0] : (*container1)[i]; const UInt32 range_end = is_column_const[2] ? (*container2)[0] : (*container2)[i]; col_to->insertDefault(); AggregateFunctionGroupBitmapData & bd2 = *reinterpret_cast *>(col_to->getData()[i]); bd0.rbs.rb_range(range_start, range_end, bd2.rbs); } block.getByPosition(result).column = std::move(col_to); } }; template class FunctionBitmapSelfCardinalityImpl : public IFunction { public: static constexpr auto name = Impl::name; static FunctionPtr create(const Context &) { return std::make_shared>(); } String getName() const override { return name; } bool isVariadic() const override { return false; } size_t getNumberOfArguments() const override { return 1; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { auto bitmap_type = typeid_cast(arguments[0].get()); if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData::name())) throw Exception( "First argument for function " + getName() + " must be an bitmap but it has type " + arguments[0]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return std::make_shared>(); } bool useDefaultImplementationForConstants() const override { return true; } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { auto col_to = ColumnVector::create(input_rows_count); typename ColumnVector::Container & vec_to = col_to->getData(); const IDataType * from_type = block.getByPosition(arguments[0]).type.get(); const DataTypeAggregateFunction * aggr_type = typeid_cast(from_type); WhichDataType which(aggr_type->getArgumentsDataTypes()[0]); if (which.isUInt8()) executeIntType(block, arguments, input_rows_count, vec_to); else if (which.isUInt16()) executeIntType(block, arguments, input_rows_count, vec_to); else if (which.isUInt32()) executeIntType(block, arguments, input_rows_count, vec_to); else if (which.isUInt64()) executeIntType(block, arguments, input_rows_count, vec_to); else throw Exception( "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); block.getByPosition(result).column = std::move(col_to); } private: using ToType = UInt64; template void executeIntType( Block & block, const ColumnNumbers & arguments, size_t input_rows_count, typename ColumnVector::Container & vec_to) { const ColumnAggregateFunction * column = typeid_cast(block.getByPosition(arguments[0]).column.get()); for (size_t i = 0; i < input_rows_count; ++i) { const AggregateFunctionGroupBitmapData & bd = *reinterpret_cast *>(column->getData()[i]); vec_to[i] = Impl::apply(bd); } } }; struct BitmapCardinalityImpl { public: static constexpr auto name = "bitmapCardinality"; template static UInt64 apply(const AggregateFunctionGroupBitmapData & bd) { return bd.rbs.size(); } }; struct BitmapMinImpl { public: static constexpr auto name = "bitmapMin"; template static UInt64 apply(const AggregateFunctionGroupBitmapData & bd) { return bd.rbs.rb_min(); } }; struct BitmapMaxImpl { public: static constexpr auto name = "bitmapMax"; template static UInt64 apply(const AggregateFunctionGroupBitmapData & bd) { return bd.rbs.rb_max(); } }; template struct BitmapAndCardinalityImpl { using ReturnType = UInt64; static UInt64 apply(const AggregateFunctionGroupBitmapData & bd1, const AggregateFunctionGroupBitmapData & bd2) { // roaring_bitmap_and_cardinality( rb1, rb2 ); return bd1.rbs.rb_and_cardinality(bd2.rbs); } }; template struct BitmapOrCardinalityImpl { using ReturnType = UInt64; static UInt64 apply(const AggregateFunctionGroupBitmapData & bd1, const AggregateFunctionGroupBitmapData & bd2) { // return roaring_bitmap_or_cardinality( rb1, rb2 ); return bd1.rbs.rb_or_cardinality(bd2.rbs); } }; template struct BitmapXorCardinalityImpl { using ReturnType = UInt64; static UInt64 apply(const AggregateFunctionGroupBitmapData & bd1, const AggregateFunctionGroupBitmapData & bd2) { // return roaring_bitmap_xor_cardinality( rb1, rb2 ); return bd1.rbs.rb_xor_cardinality(bd2.rbs); } }; template struct BitmapAndnotCardinalityImpl { using ReturnType = UInt64; static UInt64 apply(const AggregateFunctionGroupBitmapData & bd1, const AggregateFunctionGroupBitmapData & bd2) { // roaring_bitmap_andnot_cardinality( rb1, rb2 ); return bd1.rbs.rb_andnot_cardinality(bd2.rbs); } }; template struct BitmapHasAllImpl { using ReturnType = UInt8; static UInt8 apply(const AggregateFunctionGroupBitmapData & bd1, const AggregateFunctionGroupBitmapData & bd2) { return bd1.rbs.rb_is_subset(bd2.rbs); } }; template struct BitmapHasAnyImpl { using ReturnType = UInt8; static UInt8 apply(const AggregateFunctionGroupBitmapData & bd1, const AggregateFunctionGroupBitmapData & bd2) { return bd1.rbs.rb_intersect(bd2.rbs); } }; class FunctionBitmapContains : public IFunction { public: static constexpr auto name = "bitmapContains"; static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { return name; } bool isVariadic() const override { return false; } size_t getNumberOfArguments() const override { return 2; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { auto bitmap_type0 = typeid_cast(arguments[0].get()); if (!(bitmap_type0 && bitmap_type0->getFunctionName() == AggregateFunctionGroupBitmapData::name())) throw Exception( "First argument for function " + getName() + " must be an bitmap but it has type " + arguments[0]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); auto arg_type1 = typeid_cast *>(arguments[1].get()); if (!(arg_type1)) throw Exception( "Second argument for function " + getName() + " must be UInt32 but it has type " + arguments[1]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return std::make_shared>(); } bool useDefaultImplementationForConstants() const override { return true; } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { auto col_to = ColumnVector::create(input_rows_count); typename ColumnVector::Container & vec_to = col_to->getData(); const IDataType * from_type = block.getByPosition(arguments[0]).type.get(); const DataTypeAggregateFunction * aggr_type = typeid_cast(from_type); WhichDataType which(aggr_type->getArgumentsDataTypes()[0]); if (which.isUInt8()) executeIntType(block, arguments, input_rows_count, vec_to); else if (which.isUInt16()) executeIntType(block, arguments, input_rows_count, vec_to); else if (which.isUInt32()) executeIntType(block, arguments, input_rows_count, vec_to); else if (which.isUInt64()) executeIntType(block, arguments, input_rows_count, vec_to); else throw Exception( "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); block.getByPosition(result).column = std::move(col_to); } private: template void executeIntType( Block & block, const ColumnNumbers & arguments, size_t input_rows_count, typename ColumnVector::Container & vec_to) { const IColumn * columns[2]; bool is_column_const[2]; const PaddedPODArray * container0; const PaddedPODArray * container1; for (size_t i = 0; i < 2; ++i) { columns[i] = block.getByPosition(arguments[i]).column.get(); is_column_const[i] = isColumnConst(*columns[i]); } if (is_column_const[0]) container0 = &typeid_cast(typeid_cast(columns[0])->getDataColumnPtr().get())->getData(); else container0 = &typeid_cast(columns[0])->getData(); if (is_column_const[1]) container1 = &typeid_cast(typeid_cast(columns[1])->getDataColumnPtr().get())->getData(); else container1 = &typeid_cast(columns[1])->getData(); for (size_t i = 0; i < input_rows_count; ++i) { const AggregateDataPtr dataPtr0 = is_column_const[0] ? (*container0)[0] : (*container0)[i]; const UInt32 data1 = is_column_const[1] ? (*container1)[0] : (*container1)[i]; const AggregateFunctionGroupBitmapData& bd0 = *reinterpret_cast*>(dataPtr0); vec_to[i] = bd0.rbs.rb_contains(data1); } } }; template