2011-08-09 15:57:33 +00:00
|
|
|
#pragma once
|
|
|
|
|
2016-05-28 15:42:22 +00:00
|
|
|
#include <memory>
|
2011-08-09 19:19:00 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Core/Names.h>
|
2017-06-13 02:06:53 +00:00
|
|
|
#include <Core/Field.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Core/Block.h>
|
|
|
|
#include <Core/ColumnNumbers.h>
|
|
|
|
#include <DataTypes/IDataType.h>
|
2011-08-09 15:57:33 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-01-11 21:46:36 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
2017-06-13 02:06:53 +00:00
|
|
|
extern const int NOT_IMPLEMENTED;
|
2016-01-11 21:46:36 +00:00
|
|
|
}
|
|
|
|
|
2017-06-13 04:33:07 +00:00
|
|
|
struct ExpressionAction;
|
|
|
|
|
2015-05-04 17:52:19 +00:00
|
|
|
|
2017-05-28 14:32:59 +00:00
|
|
|
/** Interface for normal functions.
|
|
|
|
* Normal functions are functions that do not change the number of rows in the table,
|
|
|
|
* and the result of which for each row does not depend on other rows.
|
2011-08-09 15:57:33 +00:00
|
|
|
*
|
2017-05-28 14:32:59 +00:00
|
|
|
* A function can take an arbitrary number of arguments; returns exactly one value.
|
|
|
|
* The type of the result depends on the type and number of arguments.
|
2011-08-09 15:57:33 +00:00
|
|
|
*
|
2017-05-28 14:32:59 +00:00
|
|
|
* The function is dispatched for the whole block. This allows you to perform all kinds of checks rarely,
|
|
|
|
* and do the main job as an efficient loop.
|
2011-08-09 15:57:33 +00:00
|
|
|
*
|
2017-05-28 14:32:59 +00:00
|
|
|
* The function is applied to one or more columns of the block, and writes its result,
|
|
|
|
* adding a new column to the block. The function does not modify its arguments.
|
2011-08-09 15:57:33 +00:00
|
|
|
*/
|
|
|
|
class IFunction
|
|
|
|
{
|
|
|
|
public:
|
2017-05-28 14:32:59 +00:00
|
|
|
/** The successor of IFunction must implement:
|
2017-04-01 07:20:54 +00:00
|
|
|
* - getName
|
2017-05-28 14:32:59 +00:00
|
|
|
* - either getReturnType, or getReturnTypeAndPrerequisites
|
|
|
|
* - one of the overloads of `execute`.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
|
2017-05-28 14:32:59 +00:00
|
|
|
/// Get the main function name.
|
2017-04-01 07:20:54 +00:00
|
|
|
virtual String getName() const = 0;
|
|
|
|
|
|
|
|
/// Override and return true if function could take different number of arguments.
|
|
|
|
virtual bool isVariadic() const { return false; }
|
|
|
|
|
|
|
|
/// For non-variadic functions, return number of arguments; otherwise return zero (that should be ignored).
|
|
|
|
virtual size_t getNumberOfArguments() const = 0;
|
|
|
|
|
|
|
|
/// Throw if number of arguments is incorrect. Default implementation will check only in non-variadic case.
|
|
|
|
/// It is called inside getReturnType.
|
|
|
|
virtual void checkNumberOfArguments(size_t number_of_arguments) const;
|
|
|
|
|
|
|
|
/** Should we evaluate this function while constant folding, if arguments are constants?
|
|
|
|
* Usually this is true. Notable counterexample is function 'sleep'.
|
|
|
|
* If we will call it during query analysis, we will sleep extra amount of time.
|
|
|
|
*/
|
|
|
|
virtual bool isSuitableForConstantFolding() const { return true; }
|
|
|
|
|
|
|
|
/** Function is called "injective" if it returns different result for different values of arguments.
|
|
|
|
* Example: hex, negate, tuple...
|
|
|
|
*
|
|
|
|
* Function could be injective with some arguments fixed to some constant values.
|
|
|
|
* Examples:
|
|
|
|
* plus(const, x);
|
|
|
|
* multiply(const, x) where x is an integer and constant is not divisable by two;
|
|
|
|
* concat(x, 'const');
|
|
|
|
* concat(x, 'const', y) where const contain at least one non-numeric character;
|
|
|
|
* concat with FixedString
|
|
|
|
* dictGet... functions takes name of dictionary as its argument,
|
|
|
|
* and some dictionaries could be explicitly defined as injective.
|
|
|
|
*
|
|
|
|
* It could be used, for example, to remove useless function applications from GROUP BY.
|
|
|
|
*
|
|
|
|
* Sometimes, function is not really injective, but considered as injective, for purpose of query optimization.
|
|
|
|
* For example, toString function is not injective for Float64 data type,
|
|
|
|
* as it returns 'nan' for many different representation of NaNs.
|
|
|
|
* But we assume, that it is injective. This could be documented as implementation-specific behaviour.
|
|
|
|
*
|
|
|
|
* sample_block should contain data types of arguments and values of constants, if relevant.
|
|
|
|
*/
|
2017-12-01 19:34:51 +00:00
|
|
|
virtual bool isInjective(const Block & /*sample_block*/) { return false; }
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
/** Function is called "deterministic", if it returns same result for same values of arguments.
|
|
|
|
* Most of functions are deterministic. Notable counterexample is rand().
|
|
|
|
* Sometimes, functions are "deterministic" in scope of single query
|
|
|
|
* (even for distributed query), but not deterministic it general.
|
|
|
|
* Example: now(). Another example: functions that work with periodically updated dictionaries.
|
|
|
|
*/
|
|
|
|
virtual bool isDeterministicInScopeOfQuery() { return true; }
|
|
|
|
|
2017-05-28 14:32:59 +00:00
|
|
|
/// Get the result type by argument type. If the function does not apply to these arguments, throw an exception.
|
|
|
|
/// Overloading for those who do not need prerequisites and values of constant arguments. Not called from outside.
|
2017-04-01 07:20:54 +00:00
|
|
|
DataTypePtr getReturnType(const DataTypes & arguments) const;
|
|
|
|
|
2017-12-01 19:34:51 +00:00
|
|
|
virtual DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
throw Exception("getReturnType is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
|
|
|
|
2017-05-28 14:32:59 +00:00
|
|
|
/** Get the result type by argument types and constant argument values.
|
|
|
|
* If the function does not apply to these arguments, throw an exception.
|
|
|
|
* You can also return a description of the additional columns that are required to perform the function.
|
|
|
|
* For non-constant columns `arguments[i].column = nullptr`.
|
|
|
|
* Meaningful element types in out_prerequisites: APPLY_FUNCTION, ADD_COLUMN.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
void getReturnTypeAndPrerequisites(
|
|
|
|
const ColumnsWithTypeAndName & arguments,
|
|
|
|
DataTypePtr & out_return_type,
|
|
|
|
std::vector<ExpressionAction> & out_prerequisites);
|
|
|
|
|
|
|
|
virtual void getReturnTypeAndPrerequisitesImpl(
|
|
|
|
const ColumnsWithTypeAndName & arguments,
|
|
|
|
DataTypePtr & out_return_type,
|
2017-12-01 19:34:51 +00:00
|
|
|
std::vector<ExpressionAction> & /*out_prerequisites*/)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
DataTypes types(arguments.size());
|
|
|
|
for (size_t i = 0; i < arguments.size(); ++i)
|
|
|
|
types[i] = arguments[i].type;
|
|
|
|
out_return_type = getReturnTypeImpl(types);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// For higher-order functions (functions, that have lambda expression as at least one argument).
|
|
|
|
/// You pass data types with empty DataTypeExpression for lambda arguments.
|
|
|
|
/// This function will replace it with DataTypeExpression containing actual types.
|
|
|
|
void getLambdaArgumentTypes(DataTypes & arguments) const;
|
|
|
|
|
2017-12-01 19:34:51 +00:00
|
|
|
virtual void getLambdaArgumentTypesImpl(DataTypes & /*arguments*/) const
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
throw Exception("Function " + getName() + " can't have lambda-expressions as arguments", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
}
|
|
|
|
|
2017-05-28 14:32:59 +00:00
|
|
|
/// Execute the function on the block. Note: can be called simultaneously from several threads, for one object.
|
|
|
|
/// Overloading for those who do not need `prerequisites`. Not called from outside.
|
2017-04-01 07:20:54 +00:00
|
|
|
void execute(Block & block, const ColumnNumbers & arguments, size_t result);
|
|
|
|
|
2017-05-28 14:32:59 +00:00
|
|
|
/// Execute the function above the block. Note: can be called simultaneously from several threads, for one object.
|
|
|
|
/// `prerequisites` go in the same order as `out_prerequisites` obtained from getReturnTypeAndPrerequisites.
|
2017-04-01 07:20:54 +00:00
|
|
|
void execute(Block & block, const ColumnNumbers & arguments, const ColumnNumbers & prerequisites, size_t result);
|
|
|
|
|
2017-12-01 19:34:51 +00:00
|
|
|
virtual void executeImpl(Block & /*block*/, const ColumnNumbers & /*arguments*/, size_t /*result*/)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
throw Exception("executeImpl is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
|
|
|
|
2017-12-01 19:34:51 +00:00
|
|
|
virtual void executeImpl(Block & block, const ColumnNumbers & arguments, const ColumnNumbers & /*prerequisites*/, size_t result)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
executeImpl(block, arguments, result);
|
|
|
|
}
|
|
|
|
|
2017-07-24 01:00:31 +00:00
|
|
|
/** Default implementation in presense of Nullable arguments or NULL constants as arguments is the following:
|
|
|
|
* if some of arguments are NULL constants then return NULL constant,
|
|
|
|
* if some of arguments are Nullable, then execute function as usual for block,
|
|
|
|
* where Nullable columns are substituted with nested columns (they have arbitary values in rows corresponding to NULL value)
|
|
|
|
* and wrap result in Nullable column where NULLs are in all rows where any of arguments are NULL.
|
|
|
|
*/
|
|
|
|
virtual bool useDefaultImplementationForNulls() const { return true; }
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-07-23 08:40:43 +00:00
|
|
|
/** If the function have non-zero number of arguments,
|
|
|
|
* and if all arguments are constant, that we could automatically provide default implementation:
|
|
|
|
* arguments are converted to ordinary columns with single value, then function is executed as usual,
|
|
|
|
* and then the result is converted to constant column.
|
|
|
|
*/
|
|
|
|
virtual bool useDefaultImplementationForConstants() const { return false; }
|
|
|
|
|
|
|
|
/** Some arguments could remain constant during this implementation.
|
|
|
|
*/
|
|
|
|
virtual ColumnNumbers getArgumentsThatAreAlwaysConstant() const { return {}; }
|
|
|
|
|
|
|
|
|
2017-05-28 14:32:59 +00:00
|
|
|
/** Lets you know if the function is monotonic in a range of values.
|
|
|
|
* This is used to work with the index in a sorted chunk of data.
|
|
|
|
* And allows to use the index not only when it is written, for example `date >= const`, but also, for example, `toMonth(date) >= 11`.
|
|
|
|
* All this is considered only for functions of one argument.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
virtual bool hasInformationAboutMonotonicity() const { return false; }
|
|
|
|
|
2017-05-28 14:32:59 +00:00
|
|
|
/// The property of monotonicity for a certain range.
|
2017-04-01 07:20:54 +00:00
|
|
|
struct Monotonicity
|
|
|
|
{
|
2017-05-28 14:32:59 +00:00
|
|
|
bool is_monotonic = false; /// Is the function monotonous (nondecreasing or nonincreasing).
|
|
|
|
bool is_positive = true; /// true if the function is nondecreasing, false, if notincreasing. If is_monotonic = false, then it does not matter.
|
PKCondition: infer index use with pk subexpression
By default only constraints explicitly matching
primary key expression (or expression wrapped in
a monotonic function) are eligible for part and
range selection. So for example, if index is:
(toStartOfHour(dt), UserID)
Then a query such as this resorts to full scan:
SELECT count() FROM t WHERE dt = now()
Intuitively, only parts with toStartOfHour(now())
could be selected, but it is less trivial to prove.
The primary key currently can be wrapped in a chain
of monotonic functions, so following would work:
toStartOfHour(dt) = toStartOfHour(now()) AND dt = now()
It must be however explicitly stated, if we wanted
to infer that we’d have to know the inverse function,
and prove that the inverse function is monotonic
on given interval. This is not practical as
there is no inverse function that for example undos
rounding, it isn’t strictly monotonic.
There are however functions that don’t transform
output range and preserve monotonicity on the
complete input range, such as rounding or casts
to a same or wider numeric type. This eliminates
the need to find inverse function, as no check for monotonicity over arbitrary interval is needed,
and thus makes this optimisation possible.
2017-07-06 05:39:05 +00:00
|
|
|
bool is_always_monotonic = false; /// Is true if function is monotonic on the whole input range I
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-07-09 14:08:50 +00:00
|
|
|
Monotonicity(bool is_monotonic_ = false, bool is_positive_ = true, bool is_always_monotonic_ = false)
|
PKCondition: infer index use with pk subexpression
By default only constraints explicitly matching
primary key expression (or expression wrapped in
a monotonic function) are eligible for part and
range selection. So for example, if index is:
(toStartOfHour(dt), UserID)
Then a query such as this resorts to full scan:
SELECT count() FROM t WHERE dt = now()
Intuitively, only parts with toStartOfHour(now())
could be selected, but it is less trivial to prove.
The primary key currently can be wrapped in a chain
of monotonic functions, so following would work:
toStartOfHour(dt) = toStartOfHour(now()) AND dt = now()
It must be however explicitly stated, if we wanted
to infer that we’d have to know the inverse function,
and prove that the inverse function is monotonic
on given interval. This is not practical as
there is no inverse function that for example undos
rounding, it isn’t strictly monotonic.
There are however functions that don’t transform
output range and preserve monotonicity on the
complete input range, such as rounding or casts
to a same or wider numeric type. This eliminates
the need to find inverse function, as no check for monotonicity over arbitrary interval is needed,
and thus makes this optimisation possible.
2017-07-06 05:39:05 +00:00
|
|
|
: is_monotonic(is_monotonic_), is_positive(is_positive_), is_always_monotonic(is_always_monotonic_) {}
|
2017-04-01 07:20:54 +00:00
|
|
|
};
|
|
|
|
|
2017-05-28 14:32:59 +00:00
|
|
|
/** Get information about monotonicity on a range of values. Call only if hasInformationAboutMonotonicity.
|
|
|
|
* NULL can be passed as one of the arguments. This means that the corresponding range is unlimited on the left or on the right.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
2017-12-01 19:34:51 +00:00
|
|
|
virtual Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
throw Exception("Function " + getName() + " has no information about its monotonicity.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual ~IFunction() {}
|
2011-08-09 15:57:33 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2016-05-28 15:42:22 +00:00
|
|
|
using FunctionPtr = std::shared_ptr<IFunction>;
|
2011-08-09 19:19:00 +00:00
|
|
|
|
|
|
|
|
2011-08-09 15:57:33 +00:00
|
|
|
}
|