#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { void registerFunctionsLogical(FunctionFactory & factory) { factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(FunctionFactory::CaseInsensitive); /// Operator NOT(x) can be parsed as a function. } namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int ILLEGAL_COLUMN; } namespace { using namespace FunctionsLogicalDetail; using UInt8Container = ColumnUInt8::Container; using UInt8ColumnPtrs = std::vector; MutableColumnPtr buildColumnFromTernaryData(const UInt8Container & ternary_data, const bool make_nullable) { const size_t rows_count = ternary_data.size(); auto new_column = ColumnUInt8::create(rows_count); std::transform( ternary_data.cbegin(), ternary_data.cend(), new_column->getData().begin(), [](const auto x) { return x == Ternary::True; }); if (!make_nullable) return new_column; auto null_column = ColumnUInt8::create(rows_count); std::transform( ternary_data.cbegin(), ternary_data.cend(), null_column->getData().begin(), [](const auto x) { return x == Ternary::Null; }); return ColumnNullable::create(std::move(new_column), std::move(null_column)); } template bool tryConvertColumnToBool(const IColumn * column, UInt8Container & res) { const auto col = checkAndGetColumn>(column); if (!col) return false; std::transform( col->getData().cbegin(), col->getData().cend(), res.begin(), [](const auto x) { return !!x; }); return true; } void convertAnyColumnToBool(const IColumn * column, UInt8Container & res) { if (!tryConvertColumnToBool(column, res) && !tryConvertColumnToBool(column, res) && !tryConvertColumnToBool(column, res) && !tryConvertColumnToBool(column, res) && !tryConvertColumnToBool(column, res) && !tryConvertColumnToBool(column, res) && !tryConvertColumnToBool(column, res) && !tryConvertColumnToBool(column, res) && !tryConvertColumnToBool(column, res)) throw Exception("Unexpected type of column: " + column->getName(), ErrorCodes::ILLEGAL_COLUMN); } template static bool extractConstColumns(ColumnRawPtrs & in, UInt8 & res, Func && func) { bool has_res = false; for (int i = static_cast(in.size()) - 1; i >= 0; --i) { UInt8 x; if (in[i]->onlyNull()) x = func(Null()); else if (isColumnConst(*in[i])) x = func((*in[i])[0]); else continue; if (has_res) { res = Op::apply(res, x); } else { res = x; has_res = true; } in.erase(in.begin() + i); } return has_res; } template inline bool extractConstColumnsAsBool(ColumnRawPtrs & in, UInt8 & res) { return extractConstColumns( in, res, [](const Field & value) { return !value.isNull() && applyVisitor(FieldVisitorConvertToNumber(), value); } ); } template inline bool extractConstColumnsAsTernary(ColumnRawPtrs & in, UInt8 & res_3v) { return extractConstColumns( in, res_3v, [](const Field & value) { return value.isNull() ? Ternary::makeValue(false, true) : Ternary::makeValue(applyVisitor(FieldVisitorConvertToNumber(), value)); } ); } /// N.B. This class calculates result only for non-nullable types template class AssociativeApplierImpl { using ResultValueType = typename Op::ResultType; public: /// Remembers the last N columns from `in`. explicit AssociativeApplierImpl(const UInt8ColumnPtrs & in) : vec(in[in.size() - N]->getData()), next(in) {} /// Returns a combination of values in the i-th row of all columns stored in the constructor. inline ResultValueType apply(const size_t i) const { const auto a = !!vec[i]; if constexpr (Op::isSaturable()) return Op::isSaturatedValue(a) ? a : Op::apply(a, next.apply(i)); else return Op::apply(a, next.apply(i)); } private: const UInt8Container & vec; const AssociativeApplierImpl next; }; template class AssociativeApplierImpl { using ResultValueType = typename Op::ResultType; public: explicit AssociativeApplierImpl(const UInt8ColumnPtrs & in) : vec(in[in.size() - 1]->getData()) {} inline ResultValueType apply(const size_t i) const { return !!vec[i]; } private: const UInt8Container & vec; }; /// A helper class used by AssociativeGenericApplierImpl /// Allows for on-the-fly conversion of any data type into intermediate ternary representation using TernaryValueGetter = std::function; template struct ValueGetterBuilderImpl; template struct ValueGetterBuilderImpl { static TernaryValueGetter build(const IColumn * x) { if (x->onlyNull()) { return [](size_t){ return Ternary::Null; }; } else if (const auto * nullable_column = typeid_cast(x)) { if (const auto * nested_column = typeid_cast *>(nullable_column->getNestedColumnPtr().get())) { return [ &null_data = nullable_column->getNullMapData(), &column_data = nested_column->getData()](size_t i) { return Ternary::makeValue(column_data[i], null_data[i]); }; } else return ValueGetterBuilderImpl::build(x); } else if (const auto column = typeid_cast *>(x)) return [&column_data = column->getData()](size_t i) { return Ternary::makeValue(column_data[i]); }; else return ValueGetterBuilderImpl::build(x); } }; template <> struct ValueGetterBuilderImpl<> { static TernaryValueGetter build(const IColumn * x) { throw Exception( std::string("Unknown numeric column of type: ") + demangle(typeid(*x).name()), ErrorCodes::LOGICAL_ERROR); } }; using ValueGetterBuilder = ValueGetterBuilderImpl; /// This class together with helper class ValueGetterBuilder can be used with columns of arbitrary data type /// Allows for on-the-fly conversion of any type of data into intermediate ternary representation /// and eliminates the need to materialize data columns in intermediate representation template class AssociativeGenericApplierImpl { using ResultValueType = typename Op::ResultType; public: /// Remembers the last N columns from `in`. explicit AssociativeGenericApplierImpl(const ColumnRawPtrs & in) : val_getter{ValueGetterBuilder::build(in[in.size() - N])}, next{in} {} /// Returns a combination of values in the i-th row of all columns stored in the constructor. inline ResultValueType apply(const size_t i) const { const auto a = val_getter(i); if constexpr (Op::isSaturable()) return Op::isSaturatedValueTernary(a) ? a : Op::apply(a, next.apply(i)); else return Op::apply(a, next.apply(i)); } private: const TernaryValueGetter val_getter; const AssociativeGenericApplierImpl next; }; template class AssociativeGenericApplierImpl { using ResultValueType = typename Op::ResultType; public: /// Remembers the last N columns from `in`. explicit AssociativeGenericApplierImpl(const ColumnRawPtrs & in) : val_getter{ValueGetterBuilder::build(in[in.size() - 1])} {} inline ResultValueType apply(const size_t i) const { return val_getter(i); } private: const TernaryValueGetter val_getter; }; /// Apply target function by feeding it "batches" of N columns /// Combining 8 columns per pass is the fastest method, because it's the maximum when clang vectorizes a loop. template < typename Op, template typename OperationApplierImpl, size_t N = 8> struct OperationApplier { template static void apply(Columns & in, ResultData & result_data, bool use_result_data_as_input = false) { if (!use_result_data_as_input) doBatchedApply(in, result_data.data(), result_data.size()); while (!in.empty()) doBatchedApply(in, result_data.data(), result_data.size()); } template static void NO_INLINE doBatchedApply(Columns & in, Result * __restrict result_data, size_t size) { if (N > in.size()) { OperationApplier ::template doBatchedApply(in, result_data, size); return; } const OperationApplierImpl operation_applier_impl(in); for (size_t i = 0; i < size; ++i) { if constexpr (CarryResult) result_data[i] = Op::apply(result_data[i], operation_applier_impl.apply(i)); else result_data[i] = operation_applier_impl.apply(i); } in.erase(in.end() - N, in.end()); } }; template < typename Op, template typename OperationApplierImpl> struct OperationApplier { template static void NO_INLINE doBatchedApply(Columns &, Result &, size_t) { throw Exception( "OperationApplier<...>::apply(...): not enough arguments to run this method", ErrorCodes::LOGICAL_ERROR); } }; template static ColumnPtr executeForTernaryLogicImpl(ColumnRawPtrs arguments, const DataTypePtr & result_type, size_t input_rows_count) { /// Combine all constant columns into a single constant value. UInt8 const_3v_value = 0; const bool has_consts = extractConstColumnsAsTernary(arguments, const_3v_value); /// If the constant value uniquely determines the result, return it. if (has_consts && (arguments.empty() || Op::isSaturatedValueTernary(const_3v_value))) { return ColumnConst::create( buildColumnFromTernaryData(UInt8Container({const_3v_value}), result_type->isNullable()), input_rows_count ); } const auto result_column = has_consts ? ColumnUInt8::create(input_rows_count, const_3v_value) : ColumnUInt8::create(input_rows_count); OperationApplier::apply(arguments, result_column->getData(), has_consts); return buildColumnFromTernaryData(result_column->getData(), result_type->isNullable()); } template struct TypedExecutorInvoker; template using FastApplierImpl = TypedExecutorInvoker; template struct TypedExecutorInvoker { template static void apply(const ColumnVector & x, const IColumn & y, Result & result) { if (const auto column = typeid_cast *>(&y)) std::transform( x.getData().cbegin(), x.getData().cend(), column->getData().cbegin(), result.begin(), [](const auto a, const auto b) { return Op::apply(!!a, !!b); }); else TypedExecutorInvoker::template apply(x, y, result); } template static void apply(const IColumn & x, const IColumn & y, Result & result) { if (const auto column = typeid_cast *>(&x)) FastApplierImpl::template apply(*column, y, result); else TypedExecutorInvoker::apply(x, y, result); } }; template struct TypedExecutorInvoker { template static void apply(const ColumnVector &, const IColumn & y, Result &) { throw Exception(std::string("Unknown numeric column y of type: ") + demangle(typeid(y).name()), ErrorCodes::LOGICAL_ERROR); } template static void apply(const IColumn & x, const IColumn &, Result &) { throw Exception(std::string("Unknown numeric column x of type: ") + demangle(typeid(x).name()), ErrorCodes::LOGICAL_ERROR); } }; /// Types of all of the arguments are guaranteed to be non-nullable here template static ColumnPtr basicExecuteImpl(ColumnRawPtrs arguments, size_t input_rows_count) { /// Combine all constant columns into a single constant value. UInt8 const_val = 0; bool has_consts = extractConstColumnsAsBool(arguments, const_val); /// If the constant value uniquely determines the result, return it. if (has_consts && (arguments.empty() || Op::apply(const_val, 0) == Op::apply(const_val, 1))) { if (!arguments.empty()) const_val = Op::apply(const_val, 0); return DataTypeUInt8().createColumnConst(input_rows_count, toField(const_val)); } /// If the constant value is a neutral element, let's forget about it. if (has_consts && Op::apply(const_val, 0) == 0 && Op::apply(const_val, 1) == 1) has_consts = false; auto col_res = has_consts ? ColumnUInt8::create(input_rows_count, const_val) : ColumnUInt8::create(input_rows_count); /// FastPath detection goes in here if (arguments.size() == (has_consts ? 1 : 2)) { if (has_consts) FastApplierImpl::apply(*arguments[0], *col_res, col_res->getData()); else FastApplierImpl::apply(*arguments[0], *arguments[1], col_res->getData()); return col_res; } /// Convert all columns to UInt8 UInt8ColumnPtrs uint8_args; Columns converted_columns_holder; for (const IColumn * column : arguments) { if (const auto * uint8_column = checkAndGetColumn(column)) uint8_args.push_back(uint8_column); else { auto converted_column = ColumnUInt8::create(input_rows_count); convertAnyColumnToBool(column, converted_column->getData()); uint8_args.push_back(converted_column.get()); converted_columns_holder.emplace_back(std::move(converted_column)); } } OperationApplier::apply(uint8_args, col_res->getData(), has_consts); return col_res; } } template DataTypePtr FunctionAnyArityLogical::getReturnTypeImpl(const DataTypes & arguments) const { if (arguments.size() < 2) throw Exception("Number of arguments for function \"" + getName() + "\" should be at least 2: passed " + toString(arguments.size()), ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION); bool has_nullable_arguments = false; for (size_t i = 0; i < arguments.size(); ++i) { const auto & arg_type = arguments[i]; if (!has_nullable_arguments) { has_nullable_arguments = arg_type->isNullable(); if (has_nullable_arguments && !Impl::specialImplementationForNulls()) throw Exception("Logical error: Unexpected type of argument for function \"" + getName() + "\": " " argument " + toString(i + 1) + " is of type " + arg_type->getName(), ErrorCodes::LOGICAL_ERROR); } if (!(isNativeNumber(arg_type) || (Impl::specialImplementationForNulls() && (arg_type->onlyNull() || isNativeNumber(removeNullable(arg_type)))))) throw Exception("Illegal type (" + arg_type->getName() + ") of " + toString(i + 1) + " argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } auto result_type = std::make_shared(); return has_nullable_arguments ? makeNullable(result_type) : result_type; } template static void applyTernaryLogicImpl(const IColumn::Filter & mask, IColumn::Filter & null_bytemap) { for (size_t i = 0; i != mask.size(); ++i) { UInt8 value = mask[i]; if constexpr (inverted) value = !value; if (null_bytemap[i] && value) null_bytemap[i] = 0; } } template static void applyTernaryLogic(const IColumn::Filter & mask, IColumn::Filter & null_bytemap) { if (Name::name == NameAnd::name) applyTernaryLogicImpl(mask, null_bytemap); else if (Name::name == NameOr::name) applyTernaryLogicImpl(mask, null_bytemap); } template ColumnPtr FunctionAnyArityLogical::executeShortCircuit(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const { if (Name::name != NameAnd::name && Name::name != NameOr::name) throw Exception("Function " + getName() + " doesn't support short circuit execution", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); /// Let's denote x_i' = maskedExecute(x_i, mask). /// 1) AND(x_0, x_1, x_2, ..., x_n) /// We will support mask_i = x_0 & x_1 & ... & x_i. /// Base: /// mask_0 is 1 everywhere, x_0' = x_0. /// Iteration: /// mask_i = extractMask(mask_{i - 1}, x_{i - 1}') /// x_i' = maskedExecute(x_i, mask) /// Also we will treat NULL as 1 if x_i' is Nullable /// to support ternary logic. /// The result is mask_n. /// /// 1) OR(x_0, x_1, x_2, ..., x_n) /// We will support mask_i = !x_0 & !x_1 & ... & !x_i. /// mask_0 is 1 everywhere, x_0' = x_0. /// mask = extractMask(mask, !x_{i - 1}') /// x_i' = maskedExecute(x_i, mask) /// Also we will treat NULL as 0 if x_i' is Nullable /// to support ternary logic. /// The result is !mask_n. bool inverted = Name::name != NameAnd::name; UInt8 null_value = UInt8(Name::name == NameAnd::name); IColumn::Filter mask(arguments[0].column->size(), 1); /// If result is nullable, we need to create null bytemap of the resulting column. /// We will fill it while extracting mask from arguments. std::unique_ptr nulls; if (result_type->isNullable()) nulls = std::make_unique(arguments[0].column->size(), 0); MaskInfo mask_info; for (size_t i = 1; i <= arguments.size(); ++i) { if (inverted) mask_info = extractInvertedMask(mask, arguments[i - 1].column, nulls.get(), null_value); else mask_info = extractMask(mask, arguments[i - 1].column, nulls.get(), null_value); /// If mask doesn't have ones, we don't need to execute the rest arguments, /// because the result won't change. if (!mask_info.has_ones || i == arguments.size()) break; maskedExecute(arguments[i], mask, mask_info); } /// For OR function we need to inverse mask to get the resulting column. if (inverted) inverseMask(mask, mask_info); if (nulls) applyTernaryLogic(mask, *nulls); MutableColumnPtr res = ColumnUInt8::create(); typeid_cast(res.get())->getData() = std::move(mask); if (!nulls) return res; MutableColumnPtr bytemap = ColumnUInt8::create(); typeid_cast(bytemap.get())->getData() = std::move(*nulls); return ColumnNullable::create(std::move(res), std::move(bytemap)); } template ColumnPtr FunctionAnyArityLogical::executeImpl( const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count) const { ColumnsWithTypeAndName arguments = std::move(args); /// Special implementation for short-circuit arguments. if (checkShortCircuitArguments(arguments) != -1) return executeShortCircuit(arguments, result_type); ColumnRawPtrs args_in; for (const auto & arg_index : arguments) args_in.push_back(arg_index.column.get()); if (result_type->isNullable()) return executeForTernaryLogicImpl(std::move(args_in), result_type, input_rows_count); else return basicExecuteImpl(std::move(args_in), input_rows_count); } template ColumnPtr FunctionAnyArityLogical::getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const { /** Try to perform optimization for saturable functions (AndFunction, OrFunction) in case some arguments are * constants. * If function is not saturable (XorFunction) we cannot perform such optimization. * If function is AndFunction and in arguments there is constant false, result is false. * If function is OrFunction and in arguments there is constant true, result is true. */ if constexpr (!Impl::isSaturable()) return nullptr; bool has_true_constant = false; bool has_false_constant = false; for (const auto & argument : arguments) { ColumnPtr column = argument.column; if (!column || !isColumnConst(*column)) continue; DataTypePtr non_nullable_type = removeNullable(argument.type); TypeIndex data_type_index = non_nullable_type->getTypeId(); if (!isNativeNumber(data_type_index)) continue; const ColumnConst * const_column = static_cast(column.get()); Field constant_field_value = const_column->getField(); if (constant_field_value.isNull()) continue; auto field_type = constant_field_value.getType(); bool constant_value_bool = false; if (field_type == Field::Types::Float64) constant_value_bool = static_cast(constant_field_value.get()); else if (field_type == Field::Types::Int64) constant_value_bool = static_cast(constant_field_value.get()); else if (field_type == Field::Types::UInt64) constant_value_bool = static_cast(constant_field_value.get()); has_true_constant = has_true_constant || constant_value_bool; has_false_constant = has_false_constant || !constant_value_bool; } ColumnPtr result_column; if constexpr (std::is_same_v) { if (has_false_constant) result_column = result_type->createColumnConst(0, static_cast(false)); } else if constexpr (std::is_same_v) { if (has_true_constant) result_column = result_type->createColumnConst(0, static_cast(true)); } return result_column; } template struct UnaryOperationImpl { using ResultType = typename Op::ResultType; using ArrayA = typename ColumnVector::Container; using ArrayC = typename ColumnVector::Container; static void NO_INLINE vector(const ArrayA & a, ArrayC & c) { std::transform( a.cbegin(), a.cend(), c.begin(), [](const auto x) { return Op::apply(x); }); } }; template