ClickHouse/src/Functions/vectorFunctions.cpp

527 lines
22 KiB
C++
Raw Normal View History

#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/TupleIFunction.h>
#include <Functions/castTypeToEither.h>
namespace DB
{
namespace ErrorCodes
{
2021-08-20 21:14:26 +00:00
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
class FunctionTuplePlus : public TupleIFunction
{
private:
ContextPtr context;
public:
static constexpr auto name = "tuplePlus";
explicit FunctionTuplePlus(ContextPtr context_) : context(context_) {}
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTuplePlus>(context); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
bool useDefaultImplementationForConstants() const override { return true; }
2021-08-20 16:23:51 +00:00
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
const auto * left_tuple = checkAndGetDataType<DataTypeTuple>(arguments[0].type.get());
const auto * right_tuple = checkAndGetDataType<DataTypeTuple>(arguments[1].type.get());
if (!left_tuple)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument 0 of function {} should be tuples, got {}",
getName(), arguments[0].type->getName());
if (!right_tuple)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument 1 of function {} should be tuples, got {}",
getName(), arguments[1].type->getName());
const auto & left_types = left_tuple->getElements();
const auto & right_types = right_tuple->getElements();
Columns left_elements;
Columns right_elements;
if (arguments[0].column)
left_elements = getTupleElements(*arguments[0].column);
if (arguments[1].column)
right_elements = getTupleElements(*arguments[1].column);
if (left_types.size() != right_types.size())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Expected tuples of the same size as arguments of function {}. Got {} and {}",
getName(), arguments[0].type->getName(), arguments[1].type->getName());
size_t tuple_size = left_types.size();
if (tuple_size == 0)
return std::make_shared<DataTypeUInt8>();
auto plus = FunctionFactory::instance().get("plus", context);
DataTypes types(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
{
try
{
ColumnWithTypeAndName left{left_elements.empty() ? nullptr : left_elements[i], left_types[i], {}};
ColumnWithTypeAndName right{right_elements.empty() ? nullptr : right_elements[i], right_types[i], {}};
auto elem_plus = plus->build(ColumnsWithTypeAndName{left, right});
types[i] = elem_plus->getResultType();
}
catch (DB::Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
}
}
return std::make_shared<DataTypeTuple>(types);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto * left_tuple = checkAndGetDataType<DataTypeTuple>(arguments[0].type.get());
const auto * right_tuple = checkAndGetDataType<DataTypeTuple>(arguments[1].type.get());
const auto & left_types = left_tuple->getElements();
const auto & right_types = right_tuple->getElements();
auto left_elements = getTupleElements(*arguments[0].column);
auto right_elements = getTupleElements(*arguments[1].column);
size_t tuple_size = left_elements.size();
if (tuple_size == 0)
return DataTypeUInt8().createColumnConstWithDefaultValue(input_rows_count);
auto plus = FunctionFactory::instance().get("plus", context);
Columns columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
{
ColumnWithTypeAndName left{left_elements[i], left_types[i], {}};
ColumnWithTypeAndName right{right_elements[i], right_types[i], {}};
auto elem_plus = plus->build(ColumnsWithTypeAndName{left, right});
columns[i] = elem_plus->execute({left, right}, elem_plus->getResultType(), input_rows_count);
}
return ColumnTuple::create(columns);
}
};
class FunctionTupleMinus : public TupleIFunction
{
private:
ContextPtr context;
public:
static constexpr auto name = "tupleMinus";
explicit FunctionTupleMinus(ContextPtr context_) : context(context_) {}
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTupleMinus>(context); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
bool useDefaultImplementationForConstants() const override { return true; }
2021-08-20 16:23:51 +00:00
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
const auto * left_tuple = checkAndGetDataType<DataTypeTuple>(arguments[0].type.get());
const auto * right_tuple = checkAndGetDataType<DataTypeTuple>(arguments[1].type.get());
if (!left_tuple)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument 0 of function {} should be tuples, got {}",
getName(), arguments[0].type->getName());
if (!right_tuple)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument 1 of function {} should be tuples, got {}",
getName(), arguments[1].type->getName());
const auto & left_types = left_tuple->getElements();
const auto & right_types = right_tuple->getElements();
Columns left_elements;
Columns right_elements;
if (arguments[0].column)
left_elements = getTupleElements(*arguments[0].column);
if (arguments[1].column)
right_elements = getTupleElements(*arguments[1].column);
if (left_types.size() != right_types.size())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Expected tuples of the same size as arguments of function {}. Got {} and {}",
getName(), arguments[0].type->getName(), arguments[1].type->getName());
size_t tuple_size = left_types.size();
if (tuple_size == 0)
return std::make_shared<DataTypeUInt8>();
auto minus = FunctionFactory::instance().get("minus", context);
DataTypes types(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
{
try
{
ColumnWithTypeAndName left{left_elements.empty() ? nullptr : left_elements[i], left_types[i], {}};
ColumnWithTypeAndName right{right_elements.empty() ? nullptr : right_elements[i], right_types[i], {}};
auto elem_minus = minus->build(ColumnsWithTypeAndName{left, right});
types[i] = elem_minus->getResultType();
}
catch (DB::Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
}
}
return std::make_shared<DataTypeTuple>(types);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto * left_tuple = checkAndGetDataType<DataTypeTuple>(arguments[0].type.get());
const auto * right_tuple = checkAndGetDataType<DataTypeTuple>(arguments[1].type.get());
const auto & left_types = left_tuple->getElements();
const auto & right_types = right_tuple->getElements();
auto left_elements = getTupleElements(*arguments[0].column);
auto right_elements = getTupleElements(*arguments[1].column);
size_t tuple_size = left_elements.size();
if (tuple_size == 0)
return DataTypeUInt8().createColumnConstWithDefaultValue(input_rows_count);
auto minus = FunctionFactory::instance().get("minus", context);
Columns columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
{
ColumnWithTypeAndName left{left_elements[i], left_types[i], {}};
ColumnWithTypeAndName right{right_elements[i], right_types[i], {}};
auto elem_minus = minus->build(ColumnsWithTypeAndName{left, right});
columns[i] = elem_minus->execute({left, right}, elem_minus->getResultType(), input_rows_count);
}
return ColumnTuple::create(columns);
}
};
class FunctionTupleMultiply : public TupleIFunction
{
private:
ContextPtr context;
public:
static constexpr auto name = "tupleMultiply";
explicit FunctionTupleMultiply(ContextPtr context_) : context(context_) {}
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTupleMultiply>(context); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
bool useDefaultImplementationForConstants() const override { return true; }
2021-08-20 16:23:51 +00:00
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
const auto * left_tuple = checkAndGetDataType<DataTypeTuple>(arguments[0].type.get());
const auto * right_tuple = checkAndGetDataType<DataTypeTuple>(arguments[1].type.get());
if (!left_tuple)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument 0 of function {} should be tuples, got {}",
getName(), arguments[0].type->getName());
if (!right_tuple)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument 1 of function {} should be tuples, got {}",
getName(), arguments[1].type->getName());
const auto & left_types = left_tuple->getElements();
const auto & right_types = right_tuple->getElements();
Columns left_elements;
Columns right_elements;
if (arguments[0].column)
left_elements = getTupleElements(*arguments[0].column);
if (arguments[1].column)
right_elements = getTupleElements(*arguments[1].column);
if (left_types.size() != right_types.size())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Expected tuples of the same size as arguments of function {}. Got {} and {}",
getName(), arguments[0].type->getName(), arguments[1].type->getName());
size_t tuple_size = left_types.size();
if (tuple_size == 0)
return std::make_shared<DataTypeUInt8>();
auto multiply = FunctionFactory::instance().get("multiply", context);
DataTypes types(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
{
try
{
ColumnWithTypeAndName left{left_elements.empty() ? nullptr : left_elements[i], left_types[i], {}};
ColumnWithTypeAndName right{right_elements.empty() ? nullptr : right_elements[i], right_types[i], {}};
auto elem_multiply = multiply->build(ColumnsWithTypeAndName{left, right});
types[i] = elem_multiply->getResultType();
}
catch (DB::Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
}
}
return std::make_shared<DataTypeTuple>(types);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto * left_tuple = checkAndGetDataType<DataTypeTuple>(arguments[0].type.get());
const auto * right_tuple = checkAndGetDataType<DataTypeTuple>(arguments[1].type.get());
const auto & left_types = left_tuple->getElements();
const auto & right_types = right_tuple->getElements();
auto left_elements = getTupleElements(*arguments[0].column);
auto right_elements = getTupleElements(*arguments[1].column);
size_t tuple_size = left_elements.size();
if (tuple_size == 0)
return DataTypeUInt8().createColumnConstWithDefaultValue(input_rows_count);
auto multiply = FunctionFactory::instance().get("multiply", context);
Columns columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
{
ColumnWithTypeAndName left{left_elements[i], left_types[i], {}};
ColumnWithTypeAndName right{right_elements[i], right_types[i], {}};
auto elem_multiply = multiply->build(ColumnsWithTypeAndName{left, right});
columns[i] = elem_multiply->execute({left, right}, elem_multiply->getResultType(), input_rows_count);
}
return ColumnTuple::create(columns);
}
};
2021-08-23 13:41:20 +00:00
class FunctionTupleDivide : public TupleIFunction
{
private:
ContextPtr context;
public:
static constexpr auto name = "tupleDivide";
explicit FunctionTupleDivide(ContextPtr context_) : context(context_) {}
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTupleDivide>(context); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
const auto * left_tuple = checkAndGetDataType<DataTypeTuple>(arguments[0].type.get());
const auto * right_tuple = checkAndGetDataType<DataTypeTuple>(arguments[1].type.get());
if (!left_tuple)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument 0 of function {} should be tuples, got {}",
getName(), arguments[0].type->getName());
if (!right_tuple)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument 1 of function {} should be tuples, got {}",
getName(), arguments[1].type->getName());
const auto & left_types = left_tuple->getElements();
const auto & right_types = right_tuple->getElements();
Columns left_elements;
Columns right_elements;
if (arguments[0].column)
left_elements = getTupleElements(*arguments[0].column);
if (arguments[1].column)
right_elements = getTupleElements(*arguments[1].column);
if (left_types.size() != right_types.size())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Expected tuples of the same size as arguments of function {}. Got {} and {}",
getName(), arguments[0].type->getName(), arguments[1].type->getName());
size_t tuple_size = left_types.size();
if (tuple_size == 0)
return std::make_shared<DataTypeUInt8>();
auto divide = FunctionFactory::instance().get("divide", context);
DataTypes types(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
{
try
{
ColumnWithTypeAndName left{left_elements.empty() ? nullptr : left_elements[i], left_types[i], {}};
ColumnWithTypeAndName right{right_elements.empty() ? nullptr : right_elements[i], right_types[i], {}};
auto elem_divide = divide->build(ColumnsWithTypeAndName{left, right});
types[i] = elem_divide->getResultType();
}
catch (DB::Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
}
}
return std::make_shared<DataTypeTuple>(types);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto * left_tuple = checkAndGetDataType<DataTypeTuple>(arguments[0].type.get());
const auto * right_tuple = checkAndGetDataType<DataTypeTuple>(arguments[1].type.get());
const auto & left_types = left_tuple->getElements();
const auto & right_types = right_tuple->getElements();
auto left_elements = getTupleElements(*arguments[0].column);
auto right_elements = getTupleElements(*arguments[1].column);
size_t tuple_size = left_elements.size();
if (tuple_size == 0)
return DataTypeUInt8().createColumnConstWithDefaultValue(input_rows_count);
auto divide = FunctionFactory::instance().get("divide", context);
Columns columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
{
ColumnWithTypeAndName left{left_elements[i], left_types[i], {}};
ColumnWithTypeAndName right{right_elements[i], right_types[i], {}};
auto elem_divide = divide->build(ColumnsWithTypeAndName{left, right});
columns[i] = elem_divide->execute({left, right}, elem_divide->getResultType(), input_rows_count);
}
return ColumnTuple::create(columns);
}
};
class FunctionTupleNegate : public TupleIFunction
{
private:
ContextPtr context;
public:
static constexpr auto name = "tupleNegate";
explicit FunctionTupleNegate(ContextPtr context_) : context(context_) {}
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTupleNegate>(context); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
const auto * cur_tuple = checkAndGetDataType<DataTypeTuple>(arguments[0].type.get());
if (!cur_tuple)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument 0 of function {} should be tuples, got {}",
getName(), arguments[0].type->getName());
const auto & cur_types = cur_tuple->getElements();
Columns cur_elements;
if (arguments[0].column)
cur_elements = getTupleElements(*arguments[0].column);
size_t tuple_size = cur_types.size();
if (tuple_size == 0)
return std::make_shared<DataTypeUInt8>();
auto negate = FunctionFactory::instance().get("negate", context);
DataTypes types(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
{
try
{
ColumnWithTypeAndName cur{cur_elements.empty() ? nullptr : cur_elements[i], cur_types[i], {}};
auto elem_negate = negate->build(ColumnsWithTypeAndName{cur});
types[i] = elem_negate->getResultType();
}
catch (DB::Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
}
}
return std::make_shared<DataTypeTuple>(types);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto * cur_tuple = checkAndGetDataType<DataTypeTuple>(arguments[0].type.get());
const auto & cur_types = cur_tuple->getElements();
auto cur_elements = getTupleElements(*arguments[0].column);
size_t tuple_size = cur_elements.size();
if (tuple_size == 0)
return DataTypeUInt8().createColumnConstWithDefaultValue(input_rows_count);
auto negate = FunctionFactory::instance().get("negate", context);
Columns columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
{
ColumnWithTypeAndName cur{cur_elements[i], cur_types[i], {}};
auto elem_negate= negate->build(ColumnsWithTypeAndName{cur});
columns[i] = elem_negate->execute({cur}, elem_negate->getResultType(), input_rows_count);
}
return ColumnTuple::create(columns);
}
};
void registerVectorFunctions(FunctionFactory & factory)
{
factory.registerFunction<FunctionTuplePlus>();
factory.registerAlias("vectorSum", FunctionTuplePlus::name, FunctionFactory::CaseInsensitive);
factory.registerFunction<FunctionTupleMinus>();
factory.registerAlias("vectorDifference", FunctionTupleMinus::name, FunctionFactory::CaseInsensitive);
factory.registerFunction<FunctionTupleMultiply>();
2021-08-23 13:41:20 +00:00
factory.registerFunction<FunctionTupleDivide>();
factory.registerFunction<FunctionTupleNegate>();
/*factory.registerFunction<FunctionTupleMultiplyByNumber>();
factory.registerFunction<FunctionTupleDivideByNumber>();
factory.registerFunction<FunctionDotProduct>();
factory.registerAlias("scalarProduct", FunctionDotProduct::name, FunctionFactory::CaseInsensitive);
factory.registerFunction<FunctionL1Norm>();
2021-08-23 13:41:20 +00:00
factory.registerFunction<FunctionL1Distance>();
factory.registerFunction<FunctionL1Normalize>();
factory.registerFunction<FunctionL2Norm>();
2021-08-23 13:41:20 +00:00
factory.registerFunction<FunctionL2Distance>();
factory.registerFunction<FunctionL2Normalize>();
factory.registerFunction<FunctionLinfNorm>();
2021-08-23 13:41:20 +00:00
factory.registerFunction<FunctionLinfDistance>();
factory.registerFunction<FunctionLinfNormalize>();
factory.registerFunction<FunctionLpNorm>();
2021-08-23 13:41:20 +00:00
factory.registerFunction<FunctionLpDistance>();
factory.registerFunction<FunctionLpNormalize>();
factory.registerFunction<FunctionCosineDistance>();*/
}
}