#include #include #include #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int LOGICAL_ERROR; } namespace { /** in(x, set) - function for evaluating the IN * notIn(x, set) - and NOT IN. */ template struct FunctionInName; template <> struct FunctionInName { static constexpr auto name = "in"; }; template <> struct FunctionInName { static constexpr auto name = "globalIn"; }; template <> struct FunctionInName { static constexpr auto name = "notIn"; }; template <> struct FunctionInName { static constexpr auto name = "globalNotIn"; }; template <> struct FunctionInName { static constexpr auto name = "nullIn"; }; template <> struct FunctionInName { static constexpr auto name = "globalNullIn"; }; template <> struct FunctionInName { static constexpr auto name = "notNullIn"; }; template <> struct FunctionInName { static constexpr auto name = "globalNotNullIn"; }; template <> struct FunctionInName { static constexpr auto name = "inIgnoreSet"; }; template <> struct FunctionInName { static constexpr auto name = "globalInIgnoreSet"; }; template <> struct FunctionInName { static constexpr auto name = "notInIgnoreSet"; }; template <> struct FunctionInName { static constexpr auto name = "globalNotInIgnoreSet"; }; template <> struct FunctionInName { static constexpr auto name = "nullInIgnoreSet"; }; template <> struct FunctionInName { static constexpr auto name = "globalNullInIgnoreSet"; }; template <> struct FunctionInName { static constexpr auto name = "notNullInIgnoreSet"; }; template <> struct FunctionInName { static constexpr auto name = "globalNotNullInIgnoreSet"; }; template class FunctionIn : public IFunction { public: /// ignore_set flag means that we don't use set from the second argument, just return zero column. /// It is needed to perform type analysis without creation of set. static constexpr auto name = FunctionInName::name; static FunctionPtr create(ContextPtr) { return std::make_shared(); } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 2; } /// Do not use default implementation for LowCardinality. /// For now, Set may be const or non const column, depending on how it was created. /// But we will return UInt8 for any case. /// TODO: we could use special implementation later. bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { return std::make_shared(); } bool useDefaultImplementationForConstants() const override { /// Never return constant for -IgnoreSet functions to avoid constant folding. return !ignore_set; } bool useDefaultImplementationForNulls() const override { return null_is_skipped; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, [[maybe_unused]] size_t input_rows_count) const override { if constexpr (ignore_set) return ColumnUInt8::create(input_rows_count, 0u); if (input_rows_count == 0) return ColumnUInt8::create(); /// Second argument must be ColumnSet. ColumnPtr column_set_ptr = arguments[1].column; const ColumnSet * column_set = checkAndGetColumnConstData(column_set_ptr.get()); if (!column_set) column_set = checkAndGetColumn(column_set_ptr.get()); if (!column_set) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function '{}' must be Set; found {}", getName(), column_set_ptr->getName()); ColumnsWithTypeAndName columns_of_key_columns; /// First argument may be a tuple or a single column. const ColumnWithTypeAndName & left_arg = arguments[0]; const ColumnTuple * tuple = typeid_cast(left_arg.column.get()); const ColumnConst * const_tuple = checkAndGetColumnConst(left_arg.column.get()); const DataTypeTuple * type_tuple = typeid_cast(left_arg.type.get()); ColumnPtr materialized_tuple; if (const_tuple) { materialized_tuple = const_tuple->convertToFullColumn(); tuple = typeid_cast(materialized_tuple.get()); } auto set = column_set->getData(); if (!set) throw Exception(ErrorCodes::LOGICAL_ERROR, "Not-ready Set passed as the second argument for function '{}'", getName()); auto set_types = set->getDataTypes(); if (tuple && set_types.size() != 1 && set_types.size() == tuple->tupleSize()) { const auto & tuple_columns = tuple->getColumns(); const DataTypes & tuple_types = type_tuple->getElements(); size_t tuple_size = tuple_columns.size(); for (size_t i = 0; i < tuple_size; ++i) columns_of_key_columns.emplace_back(tuple_columns[i], tuple_types[i], "_" + toString(i)); } else columns_of_key_columns.emplace_back(left_arg); /// Replace single LowCardinality column to it's dictionary if possible. ColumnPtr lc_indexes = nullptr; bool is_const = false; if (columns_of_key_columns.size() == 1) { auto & arg = columns_of_key_columns.at(0); const auto * col = arg.column.get(); if (const auto * const_col = typeid_cast(col)) { col = &const_col->getDataColumn(); is_const = true; } if (const auto * lc = typeid_cast(col)) { lc_indexes = lc->getIndexesPtr(); arg.column = lc->getDictionary().getNestedColumn(); arg.type = removeLowCardinality(arg.type); } } auto res = set->execute(columns_of_key_columns, negative); if (lc_indexes) res = res->index(*lc_indexes, 0); if (is_const) res = ColumnUInt8::create(input_rows_count, res->getUInt(0)); if (res->size() != input_rows_count) throw Exception(ErrorCodes::LOGICAL_ERROR, "Output size is different from input size, expect {}, get {}", input_rows_count, res->size()); return res; } }; template void registerFunctionsInImpl(FunctionFactory & factory) { factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); } } REGISTER_FUNCTION(In) { registerFunctionsInImpl(factory); registerFunctionsInImpl(factory); } }