2021-05-17 07:30:42 +00:00
|
|
|
#include <Functions/IFunction.h>
|
2018-09-08 22:04:39 +00:00
|
|
|
#include <Functions/FunctionFactory.h>
|
|
|
|
#include <Functions/FunctionHelpers.h>
|
|
|
|
#include <DataTypes/DataTypeTuple.h>
|
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
2021-06-15 09:52:34 +00:00
|
|
|
#include <DataTypes/DataTypeLowCardinality.h>
|
2018-09-08 22:04:39 +00:00
|
|
|
#include <Columns/ColumnConst.h>
|
2020-05-14 18:21:35 +00:00
|
|
|
#include <Columns/ColumnsNumber.h>
|
2018-09-08 22:04:39 +00:00
|
|
|
#include <Columns/ColumnTuple.h>
|
|
|
|
#include <Columns/ColumnSet.h>
|
2021-06-15 09:52:34 +00:00
|
|
|
#include <Columns/ColumnLowCardinality.h>
|
2018-09-08 22:04:39 +00:00
|
|
|
#include <Interpreters/Set.h>
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int ILLEGAL_COLUMN;
|
2022-12-22 08:20:25 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2018-09-08 22:04:39 +00:00
|
|
|
}
|
|
|
|
|
2020-09-07 18:00:37 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
2018-09-08 22:04:39 +00:00
|
|
|
/** in(x, set) - function for evaluating the IN
|
|
|
|
* notIn(x, set) - and NOT IN.
|
|
|
|
*/
|
|
|
|
|
2020-05-14 14:21:38 +00:00
|
|
|
template <bool negative, bool global, bool null_is_skipped, bool ignore_set>
|
2018-09-08 22:04:39 +00:00
|
|
|
struct FunctionInName;
|
|
|
|
|
2020-05-14 14:21:38 +00:00
|
|
|
template <> struct FunctionInName<false, false, true, false> { static constexpr auto name = "in"; };
|
|
|
|
template <> struct FunctionInName<false, true, true, false> { static constexpr auto name = "globalIn"; };
|
|
|
|
template <> struct FunctionInName<true, false, true, false> { static constexpr auto name = "notIn"; };
|
|
|
|
template <> struct FunctionInName<true, true, true, false> { static constexpr auto name = "globalNotIn"; };
|
|
|
|
template <> struct FunctionInName<false, false, false, false> { static constexpr auto name = "nullIn"; };
|
|
|
|
template <> struct FunctionInName<false, true, false, false> { static constexpr auto name = "globalNullIn"; };
|
|
|
|
template <> struct FunctionInName<true, false, false, false> { static constexpr auto name = "notNullIn"; };
|
|
|
|
template <> struct FunctionInName<true, true, false, false> { static constexpr auto name = "globalNotNullIn"; };
|
|
|
|
template <> struct FunctionInName<false, false, true, true> { static constexpr auto name = "inIgnoreSet"; };
|
|
|
|
template <> struct FunctionInName<false, true, true, true> { static constexpr auto name = "globalInIgnoreSet"; };
|
|
|
|
template <> struct FunctionInName<true, false, true, true> { static constexpr auto name = "notInIgnoreSet"; };
|
|
|
|
template <> struct FunctionInName<true, true, true, true> { static constexpr auto name = "globalNotInIgnoreSet"; };
|
|
|
|
template <> struct FunctionInName<false, false, false, true> { static constexpr auto name = "nullInIgnoreSet"; };
|
|
|
|
template <> struct FunctionInName<false, true, false, true> { static constexpr auto name = "globalNullInIgnoreSet"; };
|
|
|
|
template <> struct FunctionInName<true, false, false, true> { static constexpr auto name = "notNullInIgnoreSet"; };
|
|
|
|
template <> struct FunctionInName<true, true, false, true> { static constexpr auto name = "globalNotNullInIgnoreSet"; };
|
|
|
|
|
|
|
|
template <bool negative, bool global, bool null_is_skipped, bool ignore_set>
|
2018-09-08 22:04:39 +00:00
|
|
|
class FunctionIn : public IFunction
|
|
|
|
{
|
|
|
|
public:
|
2020-05-14 14:21:38 +00:00
|
|
|
/// ignore_set flag means that we don't use set from the second argument, just return zero column.
|
|
|
|
/// It is needed to perform type analysis without creation of set.
|
|
|
|
static constexpr auto name = FunctionInName<negative, global, null_is_skipped, ignore_set>::name;
|
|
|
|
|
2021-06-01 12:20:52 +00:00
|
|
|
static FunctionPtr create(ContextPtr)
|
2018-09-08 22:04:39 +00:00
|
|
|
{
|
|
|
|
return std::make_shared<FunctionIn>();
|
|
|
|
}
|
|
|
|
|
|
|
|
String getName() const override
|
|
|
|
{
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t getNumberOfArguments() const override
|
|
|
|
{
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
|
2021-06-15 09:52:34 +00:00
|
|
|
/// Do not use default implementation for LowCardinality.
|
|
|
|
/// For now, Set may be const or non const column, depending on how it was created.
|
|
|
|
/// But we will return UInt8 for any case.
|
|
|
|
/// TODO: we could use special implementation later.
|
|
|
|
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
|
|
|
|
|
2018-09-08 22:04:39 +00:00
|
|
|
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
|
|
|
|
{
|
|
|
|
return std::make_shared<DataTypeUInt8>();
|
|
|
|
}
|
|
|
|
|
2020-05-15 09:06:21 +00:00
|
|
|
bool useDefaultImplementationForConstants() const override
|
|
|
|
{
|
|
|
|
/// Never return constant for -IgnoreSet functions to avoid constant folding.
|
|
|
|
return !ignore_set;
|
|
|
|
}
|
2019-10-27 18:12:40 +00:00
|
|
|
|
2020-04-06 13:30:16 +00:00
|
|
|
bool useDefaultImplementationForNulls() const override { return null_is_skipped; }
|
|
|
|
|
2021-06-22 16:21:23 +00:00
|
|
|
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
|
2021-04-29 14:48:26 +00:00
|
|
|
|
2020-11-17 13:24:45 +00:00
|
|
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, [[maybe_unused]] size_t input_rows_count) const override
|
2018-09-08 22:04:39 +00:00
|
|
|
{
|
2020-05-14 14:21:38 +00:00
|
|
|
if constexpr (ignore_set)
|
2020-10-19 13:42:14 +00:00
|
|
|
return ColumnUInt8::create(input_rows_count, 0u);
|
2022-12-22 08:20:25 +00:00
|
|
|
if (input_rows_count == 0)
|
|
|
|
return ColumnUInt8::create();
|
2019-08-12 13:07:35 +00:00
|
|
|
|
2018-09-08 22:04:39 +00:00
|
|
|
/// Second argument must be ColumnSet.
|
2020-10-19 13:42:14 +00:00
|
|
|
ColumnPtr column_set_ptr = arguments[1].column;
|
2019-10-27 18:12:40 +00:00
|
|
|
const ColumnSet * column_set = checkAndGetColumnConstData<const ColumnSet>(column_set_ptr.get());
|
|
|
|
if (!column_set)
|
|
|
|
column_set = checkAndGetColumn<const ColumnSet>(column_set_ptr.get());
|
2018-09-08 22:04:39 +00:00
|
|
|
if (!column_set)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function '{}' must be Set; found {}",
|
|
|
|
getName(), column_set_ptr->getName());
|
2018-09-08 22:04:39 +00:00
|
|
|
|
2021-08-18 10:30:02 +00:00
|
|
|
ColumnsWithTypeAndName columns_of_key_columns;
|
2018-09-08 22:04:39 +00:00
|
|
|
|
2019-04-30 17:26:56 +00:00
|
|
|
/// First argument may be a tuple or a single column.
|
2020-10-19 13:42:14 +00:00
|
|
|
const ColumnWithTypeAndName & left_arg = arguments[0];
|
2018-09-08 22:04:39 +00:00
|
|
|
const ColumnTuple * tuple = typeid_cast<const ColumnTuple *>(left_arg.column.get());
|
|
|
|
const ColumnConst * const_tuple = checkAndGetColumnConst<ColumnTuple>(left_arg.column.get());
|
|
|
|
const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(left_arg.type.get());
|
|
|
|
|
|
|
|
ColumnPtr materialized_tuple;
|
|
|
|
if (const_tuple)
|
|
|
|
{
|
|
|
|
materialized_tuple = const_tuple->convertToFullColumn();
|
|
|
|
tuple = typeid_cast<const ColumnTuple *>(materialized_tuple.get());
|
|
|
|
}
|
|
|
|
|
|
|
|
auto set = column_set->getData();
|
2023-04-18 15:11:19 +00:00
|
|
|
if (!set)
|
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not-ready Set passed as the second argument for function '{}'", getName());
|
|
|
|
|
2018-09-08 22:04:39 +00:00
|
|
|
auto set_types = set->getDataTypes();
|
2022-02-23 14:31:53 +00:00
|
|
|
|
|
|
|
if (tuple && set_types.size() != 1 && set_types.size() == tuple->tupleSize())
|
2018-09-08 22:04:39 +00:00
|
|
|
{
|
2019-03-25 01:43:54 +00:00
|
|
|
const auto & tuple_columns = tuple->getColumns();
|
2018-09-08 22:04:39 +00:00
|
|
|
const DataTypes & tuple_types = type_tuple->getElements();
|
|
|
|
size_t tuple_size = tuple_columns.size();
|
|
|
|
for (size_t i = 0; i < tuple_size; ++i)
|
2021-08-18 10:30:02 +00:00
|
|
|
columns_of_key_columns.emplace_back(tuple_columns[i], tuple_types[i], "_" + toString(i));
|
2018-09-08 22:04:39 +00:00
|
|
|
}
|
|
|
|
else
|
2021-08-18 10:30:02 +00:00
|
|
|
columns_of_key_columns.emplace_back(left_arg);
|
2018-09-08 22:04:39 +00:00
|
|
|
|
2021-06-15 09:59:02 +00:00
|
|
|
/// Replace single LowCardinality column to it's dictionary if possible.
|
2021-06-15 09:52:34 +00:00
|
|
|
ColumnPtr lc_indexes = nullptr;
|
2022-12-22 08:20:25 +00:00
|
|
|
bool is_const = false;
|
2021-08-18 10:30:02 +00:00
|
|
|
if (columns_of_key_columns.size() == 1)
|
2021-06-15 09:52:34 +00:00
|
|
|
{
|
2021-08-18 10:30:02 +00:00
|
|
|
auto & arg = columns_of_key_columns.at(0);
|
2021-06-15 09:52:34 +00:00
|
|
|
const auto * col = arg.column.get();
|
|
|
|
if (const auto * const_col = typeid_cast<const ColumnConst *>(col))
|
2022-12-22 08:20:25 +00:00
|
|
|
{
|
2021-06-15 09:52:34 +00:00
|
|
|
col = &const_col->getDataColumn();
|
2022-12-22 08:20:25 +00:00
|
|
|
is_const = true;
|
|
|
|
}
|
2021-06-15 09:52:34 +00:00
|
|
|
|
|
|
|
if (const auto * lc = typeid_cast<const ColumnLowCardinality *>(col))
|
|
|
|
{
|
|
|
|
lc_indexes = lc->getIndexesPtr();
|
|
|
|
arg.column = lc->getDictionary().getNestedColumn();
|
|
|
|
arg.type = removeLowCardinality(arg.type);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
auto res = set->execute(columns_of_key_columns, negative);
|
|
|
|
|
|
|
|
if (lc_indexes)
|
2022-12-22 08:20:25 +00:00
|
|
|
res = res->index(*lc_indexes, 0);
|
2022-12-23 02:06:43 +00:00
|
|
|
|
2022-12-22 08:20:25 +00:00
|
|
|
if (is_const)
|
|
|
|
res = ColumnUInt8::create(input_rows_count, res->getUInt(0));
|
|
|
|
|
|
|
|
if (res->size() != input_rows_count)
|
2022-12-23 02:25:37 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Output size is different from input size, expect {}, get {}", input_rows_count, res->size());
|
2021-06-15 09:52:34 +00:00
|
|
|
|
|
|
|
return res;
|
2018-09-08 22:04:39 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2020-05-14 14:21:38 +00:00
|
|
|
template<bool ignore_set>
|
2020-09-07 18:00:37 +00:00
|
|
|
void registerFunctionsInImpl(FunctionFactory & factory)
|
2020-05-14 14:21:38 +00:00
|
|
|
{
|
|
|
|
factory.registerFunction<FunctionIn<false, false, true, ignore_set>>();
|
|
|
|
factory.registerFunction<FunctionIn<false, true, true, ignore_set>>();
|
|
|
|
factory.registerFunction<FunctionIn<true, false, true, ignore_set>>();
|
|
|
|
factory.registerFunction<FunctionIn<true, true, true, ignore_set>>();
|
|
|
|
factory.registerFunction<FunctionIn<false, false, false, ignore_set>>();
|
|
|
|
factory.registerFunction<FunctionIn<false, true, false, ignore_set>>();
|
|
|
|
factory.registerFunction<FunctionIn<true, false, false, ignore_set>>();
|
|
|
|
factory.registerFunction<FunctionIn<true, true, false, ignore_set>>();
|
|
|
|
}
|
2018-09-08 22:04:39 +00:00
|
|
|
|
2020-09-07 18:00:37 +00:00
|
|
|
}
|
|
|
|
|
2022-07-04 07:01:39 +00:00
|
|
|
REGISTER_FUNCTION(In)
|
2018-09-08 22:04:39 +00:00
|
|
|
{
|
2020-05-14 14:21:38 +00:00
|
|
|
registerFunctionsInImpl<false>(factory);
|
|
|
|
registerFunctionsInImpl<true>(factory);
|
2018-09-08 22:04:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|