mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
Merge pull request #50951 from ZhiguoZh/20230607-toyear-fix
Optimization of predicates with toYear/toYYYYMM based on a general solution
This commit is contained in:
commit
6bbd0d144d
@ -322,6 +322,7 @@ struct ToTimeImpl
|
||||
{
|
||||
throwDateTimeIsNotSupported(name);
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ToDateImpl;
|
||||
};
|
||||
@ -393,6 +394,7 @@ struct ToStartOfSecondImpl
|
||||
{
|
||||
throwDateTimeIsNotSupported(name);
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
@ -440,6 +442,7 @@ struct ToStartOfMillisecondImpl
|
||||
{
|
||||
throwDateTimeIsNotSupported(name);
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
@ -483,6 +486,7 @@ struct ToStartOfMicrosecondImpl
|
||||
{
|
||||
throwDateTimeIsNotSupported(name);
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
@ -520,6 +524,7 @@ struct ToStartOfNanosecondImpl
|
||||
{
|
||||
throwDateTimeIsNotSupported(name);
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
@ -718,6 +723,28 @@ struct ToYearImpl
|
||||
return time_zone.toYear(DayNum(d));
|
||||
}
|
||||
|
||||
static inline constexpr bool hasPreimage() { return true; }
|
||||
|
||||
static inline RangeOrNull getPreimage(const IDataType & type, const Field & point)
|
||||
{
|
||||
if (point.getType() != Field::Types::UInt64) return std::nullopt;
|
||||
|
||||
auto year = point.get<UInt64>();
|
||||
if (year < DATE_LUT_MIN_YEAR || year >= DATE_LUT_MAX_YEAR) return std::nullopt;
|
||||
|
||||
const DateLUTImpl & date_lut = DateLUT::instance();
|
||||
|
||||
auto start_time = date_lut.makeDateTime(year, 1, 1, 0, 0, 0);
|
||||
auto end_time = date_lut.addYears(start_time, 1);
|
||||
|
||||
if (isDateOrDate32(type) || isDateTime(type) || isDateTime64(type))
|
||||
return {std::make_pair(Field(start_time), Field(end_time))};
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64",
|
||||
type.getName(), name);
|
||||
}
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
||||
@ -791,6 +818,7 @@ struct ToQuarterImpl
|
||||
{
|
||||
return time_zone.toQuarter(DayNum(d));
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ToStartOfYearImpl;
|
||||
};
|
||||
@ -815,6 +843,7 @@ struct ToMonthImpl
|
||||
{
|
||||
return time_zone.toMonth(DayNum(d));
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ToStartOfYearImpl;
|
||||
};
|
||||
@ -840,6 +869,7 @@ struct ToDayOfMonthImpl
|
||||
return time_zone.toDayOfMonth(DayNum(d));
|
||||
}
|
||||
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
using FactorTransform = ToStartOfMonthImpl;
|
||||
};
|
||||
|
||||
@ -887,6 +917,7 @@ struct ToDayOfYearImpl
|
||||
{
|
||||
return time_zone.toDayOfYear(DayNum(d));
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ToStartOfYearImpl;
|
||||
};
|
||||
@ -911,6 +942,7 @@ struct ToHourImpl
|
||||
{
|
||||
throwDateTimeIsNotSupported(name);
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ToDateImpl;
|
||||
};
|
||||
@ -939,6 +971,7 @@ struct TimezoneOffsetImpl
|
||||
throwDateTimeIsNotSupported(name);
|
||||
}
|
||||
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
using FactorTransform = ToTimeImpl;
|
||||
};
|
||||
|
||||
@ -962,6 +995,7 @@ struct ToMinuteImpl
|
||||
{
|
||||
throwDateTimeIsNotSupported(name);
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ToStartOfHourImpl;
|
||||
};
|
||||
@ -986,6 +1020,7 @@ struct ToSecondImpl
|
||||
{
|
||||
throwDateTimeIsNotSupported(name);
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ToStartOfMinuteImpl;
|
||||
};
|
||||
@ -1010,6 +1045,7 @@ struct ToISOYearImpl
|
||||
{
|
||||
return time_zone.toISOYear(DayNum(d));
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
@ -1066,6 +1102,7 @@ struct ToISOWeekImpl
|
||||
{
|
||||
return time_zone.toISOWeek(DayNum(d));
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ToISOYearImpl;
|
||||
};
|
||||
@ -1108,6 +1145,7 @@ struct ToRelativeYearNumImpl
|
||||
{
|
||||
return time_zone.toYear(DayNum(d));
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
@ -1139,6 +1177,7 @@ struct ToRelativeQuarterNumImpl
|
||||
{
|
||||
return time_zone.toRelativeQuarterNum(DayNum(d));
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
@ -1170,6 +1209,7 @@ struct ToRelativeMonthNumImpl
|
||||
{
|
||||
return time_zone.toRelativeMonthNum(DayNum(d));
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
@ -1201,6 +1241,7 @@ struct ToRelativeWeekNumImpl
|
||||
{
|
||||
return time_zone.toRelativeWeekNum(DayNum(d));
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
@ -1232,6 +1273,7 @@ struct ToRelativeDayNumImpl
|
||||
{
|
||||
return static_cast<DayNum>(d);
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
@ -1269,6 +1311,7 @@ struct ToRelativeHourNumImpl
|
||||
else
|
||||
return static_cast<UInt32>(time_zone.toRelativeHourNum(DayNum(d)));
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
@ -1300,6 +1343,7 @@ struct ToRelativeMinuteNumImpl
|
||||
{
|
||||
return static_cast<UInt32>(time_zone.toRelativeMinuteNum(DayNum(d)));
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
@ -1328,6 +1372,7 @@ struct ToRelativeSecondNumImpl
|
||||
{
|
||||
return static_cast<UInt32>(time_zone.fromDayNum(DayNum(d)));
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
@ -1352,6 +1397,31 @@ struct ToYYYYMMImpl
|
||||
{
|
||||
return time_zone.toNumYYYYMM(DayNum(d));
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return true; }
|
||||
|
||||
static inline RangeOrNull getPreimage(const IDataType & type, const Field & point)
|
||||
{
|
||||
if (point.getType() != Field::Types::UInt64) return std::nullopt;
|
||||
|
||||
auto year_month = point.get<UInt64>();
|
||||
auto year = year_month / 100;
|
||||
auto month = year_month % 100;
|
||||
|
||||
if (year < DATE_LUT_MIN_YEAR || year > DATE_LUT_MAX_YEAR || month < 1 || month > 12 || (year == DATE_LUT_MAX_YEAR && month == 12))
|
||||
return std::nullopt;
|
||||
|
||||
const DateLUTImpl & date_lut = DateLUT::instance();
|
||||
|
||||
auto start_time = date_lut.makeDateTime(year, month, 1, 0, 0, 0);
|
||||
auto end_time = date_lut.addMonths(start_time, 1);
|
||||
|
||||
if (isDateOrDate32(type) || isDateTime(type) || isDateTime64(type))
|
||||
return {std::make_pair(Field(start_time), Field(end_time))};
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64",
|
||||
type.getName(), name);
|
||||
}
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
@ -1376,6 +1446,7 @@ struct ToYYYYMMDDImpl
|
||||
{
|
||||
return time_zone.toNumYYYYMMDD(DayNum(d));
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
@ -1400,6 +1471,7 @@ struct ToYYYYMMDDhhmmssImpl
|
||||
{
|
||||
return time_zone.toNumYYYYMMDDhhmmss(time_zone.toDate(DayNum(d)));
|
||||
}
|
||||
static inline constexpr bool hasPreimage() { return false; }
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
@ -7,6 +7,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
/// See DateTimeTransforms.h
|
||||
@ -83,6 +84,18 @@ public:
|
||||
arguments[0].type->getName(), this->getName());
|
||||
}
|
||||
|
||||
bool hasInformationAboutPreimage() const override { return Transform::hasPreimage(); }
|
||||
|
||||
RangeOrNull getPreimage(const IDataType & type, const Field & point) const override
|
||||
{
|
||||
if constexpr (Transform::hasPreimage())
|
||||
return Transform::getPreimage(type, point);
|
||||
else
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
|
||||
"Function {} has no information about its preimage",
|
||||
Transform::name);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <Core/ColumnNumbers.h>
|
||||
#include <Core/ColumnsWithTypeAndName.h>
|
||||
#include <Core/Field.h>
|
||||
#include <Core/Names.h>
|
||||
#include <Core/IResolvedFunction.h>
|
||||
#include <Common/Exception.h>
|
||||
@ -30,7 +31,8 @@ namespace ErrorCodes
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
class Field;
|
||||
/// A left-closed and right-open interval representing the preimage of a function.
|
||||
using RangeOrNull = std::optional<std::pair<Field, Field>>;
|
||||
|
||||
/// The simplest executable object.
|
||||
/// Motivation:
|
||||
@ -230,6 +232,12 @@ public:
|
||||
*/
|
||||
virtual bool hasInformationAboutMonotonicity() const { return false; }
|
||||
|
||||
/** Lets you know if the function has its definition of preimage.
|
||||
* This is used to work with predicate optimizations, where the comparison between
|
||||
* f(x) and a constant c could be converted to the comparison between x and f's preimage [b, e).
|
||||
*/
|
||||
virtual bool hasInformationAboutPreimage() const { return false; }
|
||||
|
||||
struct ShortCircuitSettings
|
||||
{
|
||||
/// Should we enable lazy execution for the first argument of short-circuit function?
|
||||
@ -283,6 +291,14 @@ public:
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} has no information about its monotonicity", getName());
|
||||
}
|
||||
|
||||
/** Get the preimage of a function in the form of a left-closed and right-open interval. Call only if hasInformationAboutPreimage.
|
||||
* std::nullopt might be returned if the point (a single value) is invalid for this function.
|
||||
*/
|
||||
virtual RangeOrNull getPreimage(const IDataType & /*type*/, const Field & /*point*/) const
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} has no information about its preimage", getName());
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
|
||||
@ -472,12 +488,17 @@ public:
|
||||
virtual bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const = 0;
|
||||
|
||||
virtual bool hasInformationAboutMonotonicity() const { return false; }
|
||||
virtual bool hasInformationAboutPreimage() const { return false; }
|
||||
|
||||
using Monotonicity = IFunctionBase::Monotonicity;
|
||||
virtual Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} has no information about its monotonicity", getName());
|
||||
}
|
||||
virtual RangeOrNull getPreimage(const IDataType & /*type*/, const Field & /*point*/) const
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} has no information about its preimage", getName());
|
||||
}
|
||||
|
||||
/// For non-variadic functions, return number of arguments; otherwise return zero (that should be ignored).
|
||||
virtual size_t getNumberOfArguments() const = 0;
|
||||
|
@ -90,10 +90,17 @@ public:
|
||||
|
||||
bool hasInformationAboutMonotonicity() const override { return function->hasInformationAboutMonotonicity(); }
|
||||
|
||||
bool hasInformationAboutPreimage() const override { return function->hasInformationAboutPreimage(); }
|
||||
|
||||
Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override
|
||||
{
|
||||
return function->getMonotonicityForRange(type, left, right);
|
||||
}
|
||||
|
||||
RangeOrNull getPreimage(const IDataType & type, const Field & point) const override
|
||||
{
|
||||
return function->getPreimage(type, point);
|
||||
}
|
||||
private:
|
||||
std::shared_ptr<IFunction> function;
|
||||
DataTypes arguments;
|
||||
|
@ -0,0 +1,199 @@
|
||||
#include <Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.h>
|
||||
|
||||
#include <Core/Field.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Common/DateLUT.h>
|
||||
#include <Common/DateLUTImpl.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Interpreters/IdentifierSemantic.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
/** Given a monotonic non-decreasing function f(x), which satisfies f(x) = c for any value x within [b, e).
|
||||
* We could convert it into its equivalent form, x >= b AND x < e, which is free from the invocation of the function.
|
||||
* And we could apply the similar transformation to other comparisons. The suggested transformations list:
|
||||
*
|
||||
* f(x) == c -> x >= b AND x < e
|
||||
* f(x) != c -> x < b OR x >= e
|
||||
* f(x) > c -> x >= e
|
||||
* f(x) >= c -> x >= b
|
||||
* f(x) < c -> x < b
|
||||
* f(x) <= c -> x < e
|
||||
*
|
||||
* This function generates a new AST with the transformed relation.
|
||||
*/
|
||||
ASTPtr generateOptimizedDateFilterAST(const String & comparator, const NameAndTypePair & column, const std::pair<Field, Field>& range)
|
||||
{
|
||||
const DateLUTImpl & date_lut = DateLUT::instance();
|
||||
|
||||
const String & column_name = column.name;
|
||||
String start_date_or_date_time;
|
||||
String end_date_or_date_time;
|
||||
|
||||
if (isDateOrDate32(column.type.get()))
|
||||
{
|
||||
start_date_or_date_time = date_lut.dateToString(range.first.get<DateLUTImpl::Time>());
|
||||
end_date_or_date_time = date_lut.dateToString(range.second.get<DateLUTImpl::Time>());
|
||||
}
|
||||
else if (isDateTime(column.type.get()) || isDateTime64(column.type.get()))
|
||||
{
|
||||
start_date_or_date_time = date_lut.timeToString(range.first.get<DateLUTImpl::Time>());
|
||||
end_date_or_date_time = date_lut.timeToString(range.second.get<DateLUTImpl::Time>());
|
||||
}
|
||||
else [[unlikely]] return {};
|
||||
|
||||
if (comparator == "equals")
|
||||
{
|
||||
return makeASTFunction("and",
|
||||
makeASTFunction("greaterOrEquals",
|
||||
std::make_shared<ASTIdentifier>(column_name),
|
||||
std::make_shared<ASTLiteral>(start_date_or_date_time)
|
||||
),
|
||||
makeASTFunction("less",
|
||||
std::make_shared<ASTIdentifier>(column_name),
|
||||
std::make_shared<ASTLiteral>(end_date_or_date_time)
|
||||
)
|
||||
);
|
||||
}
|
||||
else if (comparator == "notEquals")
|
||||
{
|
||||
return makeASTFunction("or",
|
||||
makeASTFunction("less",
|
||||
std::make_shared<ASTIdentifier>(column_name),
|
||||
std::make_shared<ASTLiteral>(start_date_or_date_time)
|
||||
),
|
||||
makeASTFunction("greaterOrEquals",
|
||||
std::make_shared<ASTIdentifier>(column_name),
|
||||
std::make_shared<ASTLiteral>(end_date_or_date_time)
|
||||
)
|
||||
);
|
||||
}
|
||||
else if (comparator == "greater")
|
||||
{
|
||||
return makeASTFunction("greaterOrEquals",
|
||||
std::make_shared<ASTIdentifier>(column_name),
|
||||
std::make_shared<ASTLiteral>(end_date_or_date_time)
|
||||
);
|
||||
}
|
||||
else if (comparator == "lessOrEquals")
|
||||
{
|
||||
return makeASTFunction("less",
|
||||
std::make_shared<ASTIdentifier>(column_name),
|
||||
std::make_shared<ASTLiteral>(end_date_or_date_time)
|
||||
);
|
||||
}
|
||||
else if (comparator == "less" || comparator == "greaterOrEquals")
|
||||
{
|
||||
return makeASTFunction(comparator,
|
||||
std::make_shared<ASTIdentifier>(column_name),
|
||||
std::make_shared<ASTLiteral>(start_date_or_date_time)
|
||||
);
|
||||
}
|
||||
else [[unlikely]]
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Expected equals, notEquals, less, lessOrEquals, greater, greaterOrEquals. Actual {}",
|
||||
comparator);
|
||||
}
|
||||
}
|
||||
|
||||
void OptimizeDateOrDateTimeConverterWithPreimageMatcher::visit(const ASTFunction & function, ASTPtr & ast, const Data & data)
|
||||
{
|
||||
const static std::unordered_map<String, String> swap_relations = {
|
||||
{"equals", "equals"},
|
||||
{"notEquals", "notEquals"},
|
||||
{"less", "greater"},
|
||||
{"greater", "less"},
|
||||
{"lessOrEquals", "greaterOrEquals"},
|
||||
{"greaterOrEquals", "lessOrEquals"},
|
||||
};
|
||||
|
||||
if (!swap_relations.contains(function.name)) return;
|
||||
|
||||
if (!function.arguments || function.arguments->children.size() != 2) return;
|
||||
|
||||
size_t func_id = function.arguments->children.size();
|
||||
|
||||
for (size_t i = 0; i < function.arguments->children.size(); i++)
|
||||
{
|
||||
if (const auto * func = function.arguments->children[i]->as<ASTFunction>())
|
||||
{
|
||||
func_id = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (func_id == function.arguments->children.size()) return;
|
||||
|
||||
size_t literal_id = 1 - func_id;
|
||||
const auto * literal = function.arguments->children[literal_id]->as<ASTLiteral>();
|
||||
|
||||
if (!literal || literal->value.getType() != Field::Types::UInt64) return;
|
||||
|
||||
String comparator = literal_id > func_id ? function.name : swap_relations.at(function.name);
|
||||
|
||||
const auto * ast_func = function.arguments->children[func_id]->as<ASTFunction>();
|
||||
/// Currently we only handle single-argument functions.
|
||||
if (!ast_func || !ast_func->arguments || ast_func->arguments->children.size() != 1) return;
|
||||
|
||||
const auto * column_id = ast_func->arguments->children.at(0)->as<ASTIdentifier>();
|
||||
if (!column_id) return;
|
||||
|
||||
auto pos = IdentifierSemantic::getMembership(*column_id);
|
||||
if (!pos)
|
||||
pos = IdentifierSemantic::chooseTableColumnMatch(*column_id, data.tables, true);
|
||||
if (!pos)
|
||||
return;
|
||||
|
||||
if (*pos >= data.tables.size())
|
||||
return;
|
||||
|
||||
auto data_type_and_name = data.tables[*pos].columns.tryGetByName(column_id->shortName());
|
||||
if (!data_type_and_name) return;
|
||||
|
||||
const auto & converter = FunctionFactory::instance().tryGet(ast_func->name, data.context);
|
||||
if (!converter) return;
|
||||
|
||||
ColumnsWithTypeAndName args;
|
||||
args.emplace_back(data_type_and_name->type, "tmp");
|
||||
auto converter_base = converter->build(args);
|
||||
if (!converter_base || !converter_base->hasInformationAboutPreimage()) return;
|
||||
|
||||
auto preimage_range = converter_base->getPreimage(*(data_type_and_name->type), literal->value);
|
||||
if (!preimage_range) return;
|
||||
|
||||
const auto new_ast = generateOptimizedDateFilterAST(comparator, *data_type_and_name, *preimage_range);
|
||||
if (!new_ast) return;
|
||||
|
||||
ast = new_ast;
|
||||
}
|
||||
|
||||
bool OptimizeDateOrDateTimeConverterWithPreimageMatcher::needChildVisit(ASTPtr & ast, ASTPtr & /*child*/)
|
||||
{
|
||||
const static std::unordered_set<String> relations = {
|
||||
"equals",
|
||||
"notEquals",
|
||||
"less",
|
||||
"greater",
|
||||
"lessOrEquals",
|
||||
"greaterOrEquals",
|
||||
};
|
||||
|
||||
if (const auto * ast_function = ast->as<ASTFunction>())
|
||||
{
|
||||
return !relations.contains(ast_function->name);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/DatabaseAndTableWithAlias.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ASTFunction;
|
||||
|
||||
/** Replace predicate having Date/DateTime converters with their preimages to improve performance.
|
||||
* Given a Date column c, toYear(c) = 2023 -> c >= '2023-01-01' AND c < '2024-01-01'
|
||||
* Or if c is a DateTime column, toYear(c) = 2023 -> c >= '2023-01-01 00:00:00' AND c < '2024-01-01 00:00:00'.
|
||||
* The similar optimization also applies to other converters.
|
||||
*/
|
||||
class OptimizeDateOrDateTimeConverterWithPreimageMatcher
|
||||
{
|
||||
public:
|
||||
struct Data
|
||||
{
|
||||
const TablesWithColumns & tables;
|
||||
ContextPtr context;
|
||||
};
|
||||
|
||||
static void visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (const auto * ast_function = ast->as<ASTFunction>())
|
||||
visit(*ast_function, ast, data);
|
||||
}
|
||||
|
||||
static void visit(const ASTFunction & function, ASTPtr & ast, const Data & data);
|
||||
|
||||
static bool needChildVisit(ASTPtr & ast, ASTPtr & child);
|
||||
};
|
||||
|
||||
using OptimizeDateOrDateTimeConverterWithPreimageVisitor = InDepthNodeVisitor<OptimizeDateOrDateTimeConverterWithPreimageMatcher, true>;
|
||||
}
|
@ -25,6 +25,7 @@
|
||||
#include <Interpreters/GatherFunctionQuantileVisitor.h>
|
||||
#include <Interpreters/RewriteSumIfFunctionVisitor.h>
|
||||
#include <Interpreters/RewriteArrayExistsFunctionVisitor.h>
|
||||
#include <Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.h>
|
||||
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
@ -677,6 +678,21 @@ void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, ContextPtr context)
|
||||
RemoveInjectiveFunctionsVisitor(data).visit(query);
|
||||
}
|
||||
|
||||
void optimizeDateFilters(ASTSelectQuery * select_query, const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns, ContextPtr context)
|
||||
{
|
||||
/// Predicates in HAVING clause has been moved to WHERE clause.
|
||||
if (select_query->where())
|
||||
{
|
||||
OptimizeDateOrDateTimeConverterWithPreimageVisitor::Data data{tables_with_columns, context};
|
||||
OptimizeDateOrDateTimeConverterWithPreimageVisitor(data).visit(select_query->refWhere());
|
||||
}
|
||||
if (select_query->prewhere())
|
||||
{
|
||||
OptimizeDateOrDateTimeConverterWithPreimageVisitor::Data data{tables_with_columns, context};
|
||||
OptimizeDateOrDateTimeConverterWithPreimageVisitor(data).visit(select_query->refPrewhere());
|
||||
}
|
||||
}
|
||||
|
||||
void transformIfStringsIntoEnum(ASTPtr & query)
|
||||
{
|
||||
std::unordered_set<String> function_names = {"if", "transform"};
|
||||
@ -780,6 +796,9 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
|
||||
tables_with_columns, result.storage_snapshot->metadata, result.storage);
|
||||
}
|
||||
|
||||
/// Rewrite date filters to avoid the calls of converters such as toYear, toYYYYMM, etc.
|
||||
optimizeDateFilters(select_query, tables_with_columns, context);
|
||||
|
||||
/// GROUP BY injective function elimination.
|
||||
optimizeGroupBy(select_query, context);
|
||||
|
||||
|
@ -1,2 +1,54 @@
|
||||
2021-12-31 23:00:00 0
|
||||
2021-12-31 23:00:00 0
|
||||
Date
|
||||
2
|
||||
3
|
||||
2
|
||||
4
|
||||
1
|
||||
3
|
||||
3
|
||||
2
|
||||
1
|
||||
4
|
||||
1
|
||||
4
|
||||
DateTime
|
||||
2
|
||||
3
|
||||
2
|
||||
4
|
||||
1
|
||||
3
|
||||
3
|
||||
2
|
||||
1
|
||||
4
|
||||
1
|
||||
4
|
||||
Date32
|
||||
2
|
||||
3
|
||||
2
|
||||
4
|
||||
1
|
||||
3
|
||||
3
|
||||
2
|
||||
1
|
||||
4
|
||||
1
|
||||
4
|
||||
DateTime64
|
||||
2
|
||||
3
|
||||
2
|
||||
4
|
||||
1
|
||||
3
|
||||
3
|
||||
2
|
||||
1
|
||||
4
|
||||
1
|
||||
4
|
||||
|
@ -11,3 +11,79 @@ INSERT INTO source values ('2021-12-31 23:00:00', 0);
|
||||
|
||||
SELECT * FROM source WHERE toYYYYMM(ts) = 202112;
|
||||
SELECT * FROM source WHERE toYear(ts) = 2021;
|
||||
|
||||
DROP TABLE IF EXISTS source;
|
||||
CREATE TABLE source
|
||||
(
|
||||
`dt` Date,
|
||||
`ts` DateTime,
|
||||
`dt_32` Date32,
|
||||
`ts_64` DateTime64(3),
|
||||
`n` Int32
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PARTITION BY toYYYYMM(ts)
|
||||
ORDER BY tuple();
|
||||
|
||||
INSERT INTO source values ('2022-12-31', '2022-12-31 23:59:59', '2022-12-31', '2022-12-31 23:59:59.123', 0);
|
||||
INSERT INTO source values ('2023-01-01', '2023-01-01 00:00:00', '2023-01-01', '2023-01-01 00:00:00.000', 1);
|
||||
INSERT INTO source values ('2023-12-01', '2023-12-01 00:00:00', '2023-12-01', '2023-12-01 00:00:00.000', 2);
|
||||
INSERT INTO source values ('2023-12-31', '2023-12-31 23:59:59', '2023-12-31', '2023-12-31 23:59:59.123', 3);
|
||||
INSERT INTO source values ('2024-01-01', '2024-01-01 00:00:00', '2024-01-01', '2024-01-01 00:00:00.000', 4);
|
||||
|
||||
SELECT 'Date';
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(dt) = 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(dt) <> 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(dt) < 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(dt) <= 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(dt) > 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(dt) >= 202312;
|
||||
SELECT count(*) FROM source WHERE toYear(dt) = 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(dt) <> 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(dt) < 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(dt) <= 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(dt) > 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(dt) >= 2023;
|
||||
|
||||
SELECT 'DateTime';
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(ts) = 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(ts) <> 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(ts) < 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(ts) <= 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(ts) > 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(ts) >= 202312;
|
||||
SELECT count(*) FROM source WHERE toYear(ts) = 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(ts) <> 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(ts) < 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(ts) <= 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(ts) > 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(ts) >= 2023;
|
||||
|
||||
SELECT 'Date32';
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(dt_32) = 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(dt_32) <> 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(dt_32) < 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(dt_32) <= 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(dt_32) > 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(dt_32) >= 202312;
|
||||
SELECT count(*) FROM source WHERE toYear(dt_32) = 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(dt_32) <> 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(dt_32) < 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(dt_32) <= 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(dt_32) > 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(dt_32) >= 2023;
|
||||
|
||||
SELECT 'DateTime64';
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(ts_64) = 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(ts_64) <> 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(ts_64) < 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(ts_64) <= 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(ts_64) > 202312;
|
||||
SELECT count(*) FROM source WHERE toYYYYMM(ts_64) >= 202312;
|
||||
SELECT count(*) FROM source WHERE toYear(ts_64) = 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(ts_64) <> 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(ts_64) < 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(ts_64) <= 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(ts_64) > 2023;
|
||||
SELECT count(*) FROM source WHERE toYear(ts_64) >= 2023;
|
||||
DROP TABLE source;
|
||||
|
@ -0,0 +1,87 @@
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE ((date1 < \'1993-01-01\') OR (date1 >= \'1994-01-01\')) AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE (date1 >= \'1994-01-01\') AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE (date1 < \'1994-01-01\') AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE (date1 >= \'1993-01-01\') AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1998-01-01\')) AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE (((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) OR ((date1 >= \'1994-01-01\') AND (date1 < \'1995-01-01\'))) AND ((id >= 1) AND (id <= 3))
|
||||
SELECT
|
||||
value1,
|
||||
toYear(date1) AS year1
|
||||
FROM date_t
|
||||
WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
PREWHERE (date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')
|
||||
WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE ((id >= 1) AND (id <= 3)) AND ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\'))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE (toYYYYMM(date1) = 199300) AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE (toYYYYMM(date1) = 199313) AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE ((date1 >= \'1993-12-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE ((date1 >= \'1992-03-01\') AND (date1 < \'1992-04-01\')) AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE ((date1 < \'1992-03-01\') OR (date1 >= \'1992-04-01\')) AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE (date1 < \'1992-03-01\') AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE (date1 >= \'1992-04-01\') AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE (date1 < \'1992-04-01\') AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE (date1 >= \'1992-03-01\') AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date_t
|
||||
WHERE ((date1 >= \'1992-03-01\') OR ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\'))) AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM datetime_t
|
||||
WHERE ((date1 >= \'1993-01-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM datetime_t
|
||||
WHERE ((date1 >= \'1993-12-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date32_t
|
||||
WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM date32_t
|
||||
WHERE ((date1 >= \'1993-12-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM datetime64_t
|
||||
WHERE ((date1 >= \'1993-01-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3))
|
||||
SELECT value1
|
||||
FROM datetime64_t
|
||||
WHERE ((date1 >= \'1993-12-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3))
|
@ -0,0 +1,47 @@
|
||||
DROP TABLE IF EXISTS date_t;
|
||||
CREATE TABLE date_t (id UInt32, value1 String, date1 Date) ENGINE ReplacingMergeTree() ORDER BY id;
|
||||
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) <> 1993 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) < 1993 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) > 1993 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) <= 1993 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) >= 1993 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) BETWEEN 1993 AND 1997 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE (toYear(date1) = 1993 OR toYear(date1) = 1994) AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1, toYear(date1) as year1 FROM date_t WHERE year1 = 1993 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE 1993 > toYear(date1) AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t PREWHERE toYear(date1) = 1993 WHERE id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE id BETWEEN 1 AND 3 HAVING toYear(date1) = 1993;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199300 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199313 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199203 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) <> 199203 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) < 199203 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) > 199203 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) <= 199203 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) >= 199203 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE (toYYYYMM(date1) >= 199203 OR toYear(date1) = 1993) AND id BETWEEN 1 AND 3;
|
||||
DROP TABLE date_t;
|
||||
|
||||
DROP TABLE IF EXISTS datetime_t;
|
||||
CREATE TABLE datetime_t (id UInt32, value1 String, date1 Datetime) ENGINE ReplacingMergeTree() ORDER BY id;
|
||||
|
||||
EXPLAIN SYNTAX SELECT value1 FROM datetime_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM datetime_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3;
|
||||
DROP TABLE datetime_t;
|
||||
|
||||
DROP TABLE IF EXISTS date32_t;
|
||||
CREATE TABLE date32_t (id UInt32, value1 String, date1 Date32) ENGINE ReplacingMergeTree() ORDER BY id;
|
||||
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date32_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM date32_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3;
|
||||
DROP TABLE date32_t;
|
||||
|
||||
DROP TABLE IF EXISTS datetime64_t;
|
||||
CREATE TABLE datetime64_t (id UInt32, value1 String, date1 Datetime64) ENGINE ReplacingMergeTree() ORDER BY id;
|
||||
|
||||
EXPLAIN SYNTAX SELECT value1 FROM datetime64_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3;
|
||||
EXPLAIN SYNTAX SELECT value1 FROM datetime64_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3;
|
||||
DROP TABLE datetime64_t;
|
Loading…
Reference in New Issue
Block a user