more cases for optimize_functions_to_subcolumns

This commit is contained in:
Anton Popov 2024-01-22 19:33:34 +00:00
parent e087f6b67c
commit a89956bb0f
29 changed files with 411 additions and 207 deletions

View File

@ -9,6 +9,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
namespace DB
{
@ -165,31 +166,17 @@ private:
auto aggregate_function_clone = aggregate_function->clone();
auto & aggregate_function_clone_typed = aggregate_function_clone->as<FunctionNode &>();
aggregate_function_clone_typed.getArguments().getNodes() = { arithmetic_function_clone_argument };
resolveAggregateFunctionNode(aggregate_function_clone_typed, arithmetic_function_clone_argument, result_aggregate_function_name);
resolveAggregateFunctionNodeByName(
aggregate_function_clone_typed,
result_aggregate_function_name,
{arithmetic_function_clone_argument->getResultType()});
arithmetic_function_clone_arguments_nodes[arithmetic_function_argument_index] = std::move(aggregate_function_clone);
resolveOrdinaryFunctionNode(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName());
resolveOrdinaryFunctionNodeByName(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName(), getContext());
return arithmetic_function_clone;
}
inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name)
{
auto function_aggregate_function = function_node.getAggregateFunction();
AggregateFunctionProperties properties;
auto action = NullsAction::EMPTY;
auto aggregate_function = AggregateFunctionFactory::instance().get(
aggregate_function_name, action, {argument->getResultType()}, function_aggregate_function->getParameters(), properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
};
}

View File

@ -11,6 +11,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
namespace DB
{
@ -171,13 +172,13 @@ private:
{
auto result_function = std::make_shared<FunctionNode>("and");
result_function->getArguments().getNodes() = std::move(tuple_arguments_equals_functions);
resolveOrdinaryFunctionNode(*result_function, result_function->getFunctionName());
resolveOrdinaryFunctionNodeByName(*result_function, result_function->getFunctionName(), context);
if (comparison_function_name == "notEquals")
{
auto not_function = std::make_shared<FunctionNode>("not");
not_function->getArguments().getNodes().push_back(std::move(result_function));
resolveOrdinaryFunctionNode(*not_function, not_function->getFunctionName());
resolveOrdinaryFunctionNodeByName(*not_function, not_function->getFunctionName(), context);
result_function = std::move(not_function);
}
@ -197,17 +198,11 @@ private:
comparison_function->getArguments().getNodes().push_back(std::move(lhs_argument));
comparison_function->getArguments().getNodes().push_back(std::move(rhs_argument));
resolveOrdinaryFunctionNode(*comparison_function, comparison_function->getFunctionName());
resolveOrdinaryFunctionNodeByName(*comparison_function, comparison_function->getFunctionName(), context);
return comparison_function;
}
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, context);
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
ContextPtr context;
};

View File

@ -9,6 +9,7 @@
#include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/QueryNode.h>
#include <Analyzer/Utils.h>
namespace DB
{
@ -77,11 +78,9 @@ public:
/// Replace `countDistinct` of initial query into `count`
auto result_type = function_node->getResultType();
AggregateFunctionProperties properties;
auto action = NullsAction::EMPTY;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", action, {}, {}, properties);
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
function_node->getArguments().getNodes().clear();
resolveAggregateFunctionNodeByName(*function_node, "count", {});
}
};

View File

@ -17,6 +17,7 @@
#include <Analyzer/FunctionNode.h>
#include <Analyzer/TableNode.h>
#include <Analyzer/TableFunctionNode.h>
#include <Analyzer/Utils.h>
namespace DB
{
@ -178,12 +179,12 @@ private:
}
else if (column_type.isNullable())
{
if (function_name == "isNull" || function_name == "isNotNull")
if (function_name == "count" || function_name == "isNull" || function_name == "isNotNull")
++data.optimized_identifiers_count[qualified_name];
}
else if (column_type.isMap())
{
if (function_name == "mapKeys" || function_name == "mapValues")
if (function_name == "length" || function_name == "mapKeys" || function_name == "mapValues")
++data.optimized_identifiers_count[qualified_name];
}
}
@ -192,10 +193,10 @@ private:
const auto * second_argument_constant_node = function_arguments_nodes[1]->as<ConstantNode>();
if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node)
{
const auto & tuple_element_constant_value = second_argument_constant_node->getValue();
const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType();
const auto & constant_value = second_argument_constant_node->getValue();
const auto & constant_value_type = constant_value.getType();
if (tuple_element_constant_value_type == Field::Types::String || tuple_element_constant_value_type == Field::Types::UInt64)
if (constant_value_type == Field::Types::String || constant_value_type == Field::Types::UInt64)
++data.optimized_identifiers_count[qualified_name];
}
else if (function_name == "mapContains" && column_type.isMap())
@ -209,6 +210,9 @@ private:
/// Second pass optimizes functions to subcolumns for allowed identifiers.
class FunctionToSubcolumnsVisitorSecondPass : public InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorSecondPass>
{
private:
std::unordered_set<Identifier> identifiers_to_optimize;
public:
using Base = InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorSecondPass>;
using Base::Base;
@ -262,7 +266,7 @@ public:
function_arguments_nodes.push_back(std::make_shared<ColumnNode>(column, column_source));
function_arguments_nodes.push_back(std::make_shared<ConstantNode>(static_cast<UInt64>(0)));
resolveOrdinaryFunctionNode(*function_node, "equals");
resolveOrdinaryFunctionNodeByName(*function_node, "equals", getContext());
}
else if (function_name == "notEmpty")
{
@ -274,12 +278,27 @@ public:
function_arguments_nodes.push_back(std::make_shared<ColumnNode>(column, column_source));
function_arguments_nodes.push_back(std::make_shared<ConstantNode>(static_cast<UInt64>(0)));
resolveOrdinaryFunctionNode(*function_node, "notEquals");
resolveOrdinaryFunctionNodeByName(*function_node, "notEquals", getContext());
}
}
else if (column_type.isNullable())
{
if (function_name == "isNull")
if (function_name == "count")
{
/// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))`
column.name += ".null";
column.type = std::make_shared<DataTypeUInt8>();
auto column_node = std::make_shared<ColumnNode>(column, column_source);
auto function_node_not = std::make_shared<FunctionNode>("not");
function_node_not->getArguments().getNodes().push_back(std::move(column_node));
resolveOrdinaryFunctionNodeByName(*function_node_not, "not", getContext());
function_arguments_nodes = {std::move(function_node_not)};
resolveAggregateFunctionNodeByName(*function_node, "sum", {column.type});
}
else if (function_name == "isNull")
{
/// Replace `isNull(nullable_argument)` with `nullable_argument.null`
column.name += ".null";
@ -295,12 +314,20 @@ public:
function_arguments_nodes = {std::make_shared<ColumnNode>(column, column_source)};
resolveOrdinaryFunctionNode(*function_node, "not");
resolveOrdinaryFunctionNodeByName(*function_node, "not", getContext());
}
}
else if (column_type.isMap())
{
if (function_name == "mapKeys")
if (function_name == "length")
{
/// Replace `length(map_argument)` with `map_argument.size0`
column.name += ".size0";
column.type = std::make_shared<DataTypeUInt64>();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "mapKeys")
{
/// Replace `mapKeys(map_argument)` with `map_argument.keys`
column.name += ".keys";
@ -364,19 +391,10 @@ public:
auto has_function_argument = std::make_shared<ColumnNode>(column, column_source);
function_arguments_nodes[0] = std::move(has_function_argument);
resolveOrdinaryFunctionNode(*function_node, "has");
resolveOrdinaryFunctionNodeByName(*function_node, "has", getContext());
}
}
}
private:
std::unordered_set<Identifier> identifiers_to_optimize;
inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
};
}

View File

@ -6,6 +6,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
#include <Interpreters/Context.h>
namespace DB
@ -41,25 +42,17 @@ public:
if (function_node->getFunctionName() == "count" && !first_argument_constant_literal.isNull())
{
resolveAsCountAggregateFunction(*function_node);
function_node->getArguments().getNodes().clear();
resolveAggregateFunctionNodeByName(*function_node, "count", {});
}
else if (function_node->getFunctionName() == "sum" &&
first_argument_constant_literal.getType() == Field::Types::UInt64 &&
first_argument_constant_literal.get<UInt64>() == 1)
{
resolveAsCountAggregateFunction(*function_node);
function_node->getArguments().getNodes().clear();
resolveAggregateFunctionNodeByName(*function_node, "count", {});
}
}
private:
static inline void resolveAsCountAggregateFunction(FunctionNode & function_node)
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
};
}

View File

@ -6,6 +6,7 @@
#include <Analyzer/ColumnNode.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
#include <Common/DateLUT.h>
#include <Common/DateLUTImpl.h>
@ -140,16 +141,16 @@ private:
const auto lhs = std::make_shared<FunctionNode>("greaterOrEquals");
lhs->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
lhs->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(start_date_or_date_time));
resolveOrdinaryFunctionNode(*lhs, lhs->getFunctionName());
resolveOrdinaryFunctionNodeByName(*lhs, lhs->getFunctionName(), getContext());
const auto rhs = std::make_shared<FunctionNode>("less");
rhs->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
rhs->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(end_date_or_date_time));
resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName());
resolveOrdinaryFunctionNodeByName(*rhs, rhs->getFunctionName(), getContext());
const auto new_date_filter = std::make_shared<FunctionNode>("and");
new_date_filter->getArguments().getNodes() = {lhs, rhs};
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext());
return new_date_filter;
}
@ -158,16 +159,16 @@ private:
const auto lhs = std::make_shared<FunctionNode>("less");
lhs->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
lhs->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(start_date_or_date_time));
resolveOrdinaryFunctionNode(*lhs, lhs->getFunctionName());
resolveOrdinaryFunctionNodeByName(*lhs, lhs->getFunctionName(), getContext());
const auto rhs = std::make_shared<FunctionNode>("greaterOrEquals");
rhs->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
rhs->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(end_date_or_date_time));
resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName());
resolveOrdinaryFunctionNodeByName(*rhs, rhs->getFunctionName(), getContext());
const auto new_date_filter = std::make_shared<FunctionNode>("or");
new_date_filter->getArguments().getNodes() = {lhs, rhs};
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext());
return new_date_filter;
}
@ -176,7 +177,7 @@ private:
const auto new_date_filter = std::make_shared<FunctionNode>("greaterOrEquals");
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(end_date_or_date_time));
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext());
return new_date_filter;
}
@ -185,7 +186,7 @@ private:
const auto new_date_filter = std::make_shared<FunctionNode>("less");
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(end_date_or_date_time));
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext());
return new_date_filter;
}
@ -194,7 +195,7 @@ private:
const auto new_date_filter = std::make_shared<FunctionNode>(comparator);
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(start_date_or_date_time));
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext());
return new_date_filter;
}
@ -205,12 +206,6 @@ private:
comparator);
}
}
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
};
}

View File

@ -13,6 +13,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
namespace DB
{
@ -58,8 +59,7 @@ public:
function_arguments_nodes.resize(2);
function_arguments_nodes[0] = std::move(if_arguments_nodes[1]);
function_arguments_nodes[1] = std::move(if_arguments_nodes[0]);
resolveAsAggregateFunctionWithIf(
*function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()});
resolveAsAggregateFunctionWithIf(*function_node, function_arguments_nodes);
}
}
else if (first_const_node)
@ -79,30 +79,21 @@ public:
function_arguments_nodes.resize(2);
function_arguments_nodes[0] = std::move(if_arguments_nodes[2]);
function_arguments_nodes[1] = std::move(not_function);
resolveAsAggregateFunctionWithIf(
*function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()});
resolveAsAggregateFunctionWithIf(*function_node, function_arguments_nodes);
}
}
}
private:
static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types)
static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const QueryTreeNodes & arguments)
{
auto result_type = function_node.getResultType();
auto suffix = result_type->isNullable() ? "OrNullIf" : "If";
std::string suffix = "If";
if (result_type->isNullable())
suffix = "OrNullIf";
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
resolveAggregateFunctionNodeByName(
function_node,
function_node.getFunctionName() + suffix,
function_node.getNullsAction(),
argument_types,
function_node.getAggregateFunction()->getParameters(),
properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
{arguments[0]->getResultType(), arguments[1]->getResultType()});
}
};

View File

@ -5,6 +5,7 @@
#include <Analyzer/ColumnNode.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
#include <Functions/FunctionFactory.h>
namespace DB
@ -77,50 +78,30 @@ public:
const auto lhs = std::make_shared<FunctionNode>("sum");
lhs->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]);
resolveAsAggregateFunctionNode(*lhs, column_type);
resolveAggregateFunctionNodeByName(*lhs, lhs->getFunctionName(), {column_type});
const auto rhs_count = std::make_shared<FunctionNode>("count");
rhs_count->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]);
resolveAsAggregateFunctionNode(*rhs_count, column_type);
resolveAggregateFunctionNodeByName(*rhs_count, rhs_count->getFunctionName(), {column_type});
const auto rhs = std::make_shared<FunctionNode>("multiply");
rhs->getArguments().getNodes().push_back(func_plus_minus_nodes[literal_id]);
rhs->getArguments().getNodes().push_back(rhs_count);
resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName());
resolveOrdinaryFunctionNodeByName(*rhs, rhs->getFunctionName(), getContext());
const auto new_node = std::make_shared<FunctionNode>(Poco::toLower(func_plus_minus_node->getFunctionName()));
if (column_id == 0)
new_node->getArguments().getNodes() = {lhs, rhs};
else if (column_id == 1)
new_node->getArguments().getNodes() = {rhs, lhs};
resolveOrdinaryFunctionNode(*new_node, new_node->getFunctionName());
resolveOrdinaryFunctionNodeByName(*new_node, new_node->getFunctionName(), getContext());
if (!new_node)
return;
node = new_node;
}
private:
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
const auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
static inline void resolveAsAggregateFunctionNode(FunctionNode & function_node, const DataTypePtr & argument_type)
{
AggregateFunctionProperties properties;
const auto aggregate_function = AggregateFunctionFactory::instance().get(function_node.getFunctionName(),
NullsAction::EMPTY,
{argument_type},
{},
properties);
function_node.resolveAsAggregateFunction(aggregate_function);
}
};
}

View File

@ -5,6 +5,7 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Analyzer/Utils.h>
#include <Functions/FunctionFactory.h>
@ -65,7 +66,8 @@ public:
auto multiplier_node = function_node_arguments_nodes[0];
function_node_arguments_nodes[0] = std::move(function_node_arguments_nodes[1]);
function_node_arguments_nodes.resize(1);
resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType());
resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()});
if (constant_value_literal.get<UInt64>() != 1)
{
@ -113,7 +115,7 @@ public:
function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0];
function_node_arguments_nodes.resize(1);
resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType());
resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()});
if (if_true_condition_value != 1)
{
@ -142,7 +144,7 @@ public:
function_node_arguments_nodes[0] = std::move(not_function);
function_node_arguments_nodes.resize(1);
resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType());
resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()});
if (if_false_condition_value != 1)
{
@ -154,14 +156,6 @@ public:
}
private:
static inline void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type)
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
"countIf", NullsAction::EMPTY, {argument_type}, function_node.getAggregateFunction()->getParameters(), properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
inline QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right)
{

View File

@ -7,6 +7,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
namespace DB
@ -75,15 +76,7 @@ public:
for (const auto & function_node_argument : function_node_argument_nodes)
argument_types.emplace_back(function_node_argument->getResultType());
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
function_node->getFunctionName(),
NullsAction::EMPTY,
argument_types,
function_node->getAggregateFunction()->getParameters(),
properties);
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName(), argument_types);
}
};

View File

@ -7,6 +7,7 @@
#include <Analyzer/FunctionNode.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/QueryNode.h>
#include <Analyzer/Utils.h>
namespace DB
{
@ -175,11 +176,8 @@ public:
/// Replace uniq of initial query to count
if (match_subquery_with_distinct() || match_subquery_with_group_by())
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
function_node->getArguments().getNodes().clear();
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
resolveAggregateFunctionNodeByName(*function_node, "count", {});
}
}
};

View File

@ -685,4 +685,26 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_ty
return function_node;
}
void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context)
{
auto function = FunctionFactory::instance().get(function_name, context);
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name, const DataTypes & argument_types)
{
chassert(function_node.isAggregateFunction());
auto old_aggregate_function = function_node.getAggregateFunction();
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
function_name,
function_node.getNullsAction(),
argument_types,
old_aggregate_function->getParameters(),
properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
}

View File

@ -102,4 +102,12 @@ NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node);
/// Wrap node into `_CAST` function
QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context);
/// Resolves function node as ordinary function with given name.
/// Arguments and parameters are taken from the node.
void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context);
/// Resolves function node as aggregate function with given name.
/// Arguments and parameters are taken from the node.
void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name, const DataTypes & argument_types);
}

View File

@ -43,6 +43,7 @@ namespace ErrorCodes
extern const int UNKNOWN_SETTING;
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int BAD_ARGUMENTS;
}
namespace
@ -170,6 +171,7 @@ struct QueryASTSettings
struct QueryTreeSettings
{
bool run_passes = true;
bool dump_tree = true;
bool dump_passes = false;
bool dump_ast = false;
Int64 passes = -1;
@ -179,6 +181,7 @@ struct QueryTreeSettings
std::unordered_map<std::string, std::reference_wrapper<bool>> boolean_settings =
{
{"run_passes", run_passes},
{"dump_tree", dump_tree},
{"dump_passes", dump_passes},
{"dump_ast", dump_ast}
};
@ -398,7 +401,11 @@ QueryPipeline InterpreterExplainQuery::executeImpl()
throw Exception(ErrorCodes::INCORRECT_QUERY, "Only SELECT is supported for EXPLAIN QUERY TREE query");
auto settings = checkAndGetSettings<QueryTreeSettings>(ast.getSettings());
if (!settings.dump_tree && !settings.dump_ast)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Either 'dump_tree' or 'dump_ast' must be set for EXPLAIN QUERY TREE query");
auto query_tree = buildQueryTree(ast.getExplainedQuery(), getContext());
bool need_newline = false;
if (settings.run_passes)
{
@ -410,23 +417,26 @@ QueryPipeline InterpreterExplainQuery::executeImpl()
if (settings.dump_passes)
{
query_tree_pass_manager.dump(buf, pass_index);
if (pass_index > 0)
buf << '\n';
need_newline = true;
}
query_tree_pass_manager.run(query_tree, pass_index);
}
if (settings.dump_tree)
{
if (need_newline)
buf << "\n\n";
query_tree->dumpTree(buf);
}
else
{
query_tree->dumpTree(buf);
need_newline = true;
}
if (settings.dump_ast)
{
buf << '\n';
buf << '\n';
if (need_newline)
buf << "\n\n";
query_tree->toAST()->format(IAST::FormatSettings(buf, false));
}

View File

@ -40,27 +40,16 @@ ASTPtr transformCountNullableToSubcolumn(const String & name_in_storage, const S
return makeASTFunction("sum", makeASTFunction("not", ast));
}
ASTPtr transformMapContainsToSubcolumn(const String & name_in_storage, const String & subcolumn_name, const ASTPtr & arg)
const std::unordered_map<String, std::tuple<std::set<TypeIndex>, String, decltype(&transformToSubcolumn)>> unary_function_to_subcolumn =
{
auto ast = transformToSubcolumn(name_in_storage, subcolumn_name);
return makeASTFunction("has", ast, arg);
}
const std::unordered_map<String, std::tuple<TypeIndex, String, decltype(&transformToSubcolumn)>> unary_function_to_subcolumn =
{
{"length", {TypeIndex::Array, "size0", transformToSubcolumn}},
{"empty", {TypeIndex::Array, "size0", transformEmptyToSubcolumn}},
{"notEmpty", {TypeIndex::Array, "size0", transformNotEmptyToSubcolumn}},
{"isNull", {TypeIndex::Nullable, "null", transformToSubcolumn}},
{"isNotNull", {TypeIndex::Nullable, "null", transformIsNotNullToSubcolumn}},
{"count", {TypeIndex::Nullable, "null", transformCountNullableToSubcolumn}},
{"mapKeys", {TypeIndex::Map, "keys", transformToSubcolumn}},
{"mapValues", {TypeIndex::Map, "values", transformToSubcolumn}},
};
const std::unordered_map<String, std::tuple<TypeIndex, String, decltype(&transformMapContainsToSubcolumn)>> binary_function_to_subcolumn
{
{"mapContains", {TypeIndex::Map, "keys", transformMapContainsToSubcolumn}},
{"length", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformToSubcolumn}},
{"empty", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformEmptyToSubcolumn}},
{"notEmpty", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformNotEmptyToSubcolumn}},
{"isNull", {{TypeIndex::Nullable}, "null", transformToSubcolumn}},
{"isNotNull", {{TypeIndex::Nullable}, "null", transformIsNotNullToSubcolumn}},
{"count", {{TypeIndex::Nullable}, "null", transformCountNullableToSubcolumn}},
{"mapKeys", {{TypeIndex::Map}, "keys", transformToSubcolumn}},
{"mapValues", {{TypeIndex::Map}, "values", transformToSubcolumn}},
};
std::optional<NameAndTypePair> getColumnFromArgumentsToOptimize(
@ -116,10 +105,14 @@ void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTFunction & funct
if (arguments.size() == 1)
{
auto it = unary_function_to_subcolumn.find(function.name);
if (it != unary_function_to_subcolumn.end() && std::get<0>(it->second) == column_type_id)
if (it == unary_function_to_subcolumn.end())
return;
const auto & expected_types_id = std::get<0>(it->second);
if (expected_types_id.contains(column_type_id))
++data.optimized_identifiers_count[column->name];
}
else
else if (arguments.size() == 2)
{
if (function.name == "tupleElement" && column_type_id == TypeIndex::Tuple)
{
@ -131,11 +124,9 @@ void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTFunction & funct
if (value_type == Field::Types::UInt64 || value_type == Field::Types::String)
++data.optimized_identifiers_count[column->name];
}
else
else if (function.name == "mapContains" && column_type_id == TypeIndex::Map)
{
auto it = binary_function_to_subcolumn.find(function.name);
if (it != binary_function_to_subcolumn.end() && std::get<0>(it->second) == column_type_id)
++data.optimized_identifiers_count[column->name];
++data.optimized_identifiers_count[column->name];
}
}
}
@ -148,7 +139,7 @@ void RewriteFunctionToSubcolumnSecondPassData::visit(ASTFunction & function, AST
return;
auto column_type_id = column->type->getTypeId();
const auto & alias = function.tryGetAlias();
auto alias = function.getAliasOrColumnName();
if (arguments.size() == 1)
{
@ -156,8 +147,8 @@ void RewriteFunctionToSubcolumnSecondPassData::visit(ASTFunction & function, AST
if (it == unary_function_to_subcolumn.end())
return;
const auto & [expected_type_id, subcolumn_name, transformer] = it->second;
if (column_type_id != expected_type_id)
const auto & [expected_types_id, subcolumn_name, transformer] = it->second;
if (!expected_types_id.contains(column_type_id))
return;
ast = transformer(column->name, subcolumn_name);
@ -191,17 +182,10 @@ void RewriteFunctionToSubcolumnSecondPassData::visit(ASTFunction & function, AST
ast = transformToSubcolumn(column->name, subcolumn_name);
ast->setAlias(alias);
}
else
else if (function.name == "mapContains" && column_type_id == TypeIndex::Map)
{
auto it = binary_function_to_subcolumn.find(function.name);
if (it == binary_function_to_subcolumn.end())
return;
const auto & [expected_type_id, subcolumn_name, transformer] = it->second;
if (column_type_id != expected_type_id)
return;
ast = transformer(column->name, subcolumn_name, arguments[1]);
auto subcolumn = transformToSubcolumn(column->name, "keys");
ast = makeASTFunction("has", subcolumn, arguments[1]);
ast->setAlias(alias);
}
}

View File

@ -2,25 +2,25 @@
0 1 0
SELECT
id IS NULL,
`n.null`,
NOT `n.null`
`n.null` AS `isNull(n)`,
NOT `n.null` AS `isNotNull(n)`
FROM t_func_to_subcolumns
3 0 1 0
0 1 0 \N
SELECT
`arr.size0`,
`arr.size0` = 0,
`arr.size0` != 0,
`arr.size0` AS `length(arr)`,
`arr.size0` = 0 AS `empty(arr)`,
`arr.size0` != 0 AS `notEmpty(arr)`,
empty(n)
FROM t_func_to_subcolumns
['foo','bar'] [1,2]
[] []
SELECT
`m.keys`,
`m.values`
`m.keys` AS `mapKeys(m)`,
`m.values` AS `mapValues(m)`
FROM t_func_to_subcolumns
1
SELECT sum(NOT `n.null`)
SELECT sum(NOT `n.null`) AS `count(n)`
FROM t_func_to_subcolumns
2
SELECT count(id)
@ -30,7 +30,7 @@ FROM t_func_to_subcolumns
3 0 0
SELECT
id,
`n.null`,
`n.null` AS `isNull(n)`,
right.n IS NULL
FROM t_func_to_subcolumns AS left
ALL FULL OUTER JOIN

View File

@ -1,6 +1,5 @@
DROP TABLE IF EXISTS t_func_to_subcolumns;
SET allow_experimental_map_type = 1;
SET optimize_functions_to_subcolumns = 1;
CREATE TABLE t_func_to_subcolumns (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64))

View File

@ -0,0 +1,50 @@
0 0 1
0 1 0
SELECT
__table1.id IS NULL AS `isNull(id)`,
__table1.`n.null` AS `isNull(n)`,
NOT __table1.`n.null` AS `isNotNull(n)`
FROM default.t_func_to_subcolumns AS __table1
3 0 1 0
0 1 0 \N
SELECT
__table1.`arr.size0` AS `length(arr)`,
__table1.`arr.size0` = 0 AS `empty(arr)`,
__table1.`arr.size0` != 0 AS `notEmpty(arr)`,
empty(__table1.n) AS `empty(n)`
FROM default.t_func_to_subcolumns AS __table1
['foo','bar'] [1,2]
[] []
SELECT
__table1.`m.keys` AS `mapKeys(m)`,
__table1.`m.values` AS `mapValues(m)`
FROM default.t_func_to_subcolumns AS __table1
1
SELECT sum(NOT __table1.`n.null`) AS `count(n)`
FROM default.t_func_to_subcolumns AS __table1
2
SELECT count(__table1.id) AS `count(id)`
FROM default.t_func_to_subcolumns AS __table1
1 0 0
2 1 0
3 0 0
SELECT
__table1.id AS id,
__table1.`n.null` AS `isNull(n)`,
__table2.n IS NULL AS `isNull(right.n)`
FROM default.t_func_to_subcolumns AS __table1
ALL FULL OUTER JOIN
(
SELECT
1 AS id,
\'qqq\' AS n
FROM system.one AS __table4
UNION ALL
SELECT
3 AS id,
\'www\' AS `\'www\'`
FROM system.one AS __table6
) AS __table2 USING (id)
0 10
0 20

View File

@ -0,0 +1,42 @@
DROP TABLE IF EXISTS t_func_to_subcolumns;
SET allow_experimental_analyzer = 1;
SET optimize_functions_to_subcolumns = 1;
CREATE TABLE t_func_to_subcolumns (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64))
ENGINE = MergeTree ORDER BY tuple();
INSERT INTO t_func_to_subcolumns VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map());
SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns;
EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns;
SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns;
EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns;
SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns;
EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns;
SELECT count(n) FROM t_func_to_subcolumns;
EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT count(n) FROM t_func_to_subcolumns;
SELECT count(id) FROM t_func_to_subcolumns;
EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT count(id) FROM t_func_to_subcolumns;
SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left
FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id);
EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left
FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id);
DROP TABLE t_func_to_subcolumns;
DROP TABLE IF EXISTS t_tuple_null;
CREATE TABLE t_tuple_null (t Tuple(null UInt32)) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO t_tuple_null VALUES ((10)), ((20));
SELECT t IS NULL, t.null FROM t_tuple_null;
DROP TABLE t_tuple_null;

View File

@ -1,4 +1,4 @@
SELECT has(`m.keys`, \'a\')
SELECT has(`m.keys`, \'a\') AS `mapContains(m, \'a\')`
FROM t_map_contains
1
0

View File

@ -0,0 +1,4 @@
SELECT has(__table1.`m.keys`, \'a\') AS `mapContains(m, \'a\')`
FROM default.t_map_contains AS __table1
1
0

View File

@ -0,0 +1,13 @@
DROP TABLE IF EXISTS t_map_contains;
CREATE TABLE t_map_contains (m Map(String, UInt32)) ENGINE = Memory;
INSERT INTO t_map_contains VALUES (map('a', 1, 'b', 2)), (map('c', 3, 'd', 4));
SET optimize_functions_to_subcolumns = 1;
SET allow_experimental_analyzer = 1;
EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT mapContains(m, 'a') FROM t_map_contains;
SELECT mapContains(m, 'a') FROM t_map_contains;
DROP TABLE t_map_contains;

View File

@ -1,17 +1,17 @@
1
SELECT `t1.a`
SELECT `t1.a` AS `tupleElement(t1, 1)`
FROM t_tuple_element
a
SELECT `t1.s`
SELECT `t1.s` AS `tupleElement(t1, 2)`
FROM t_tuple_element
1
SELECT `t1.a`
SELECT `t1.a` AS `tupleElement(t1, \'a\')`
FROM t_tuple_element
2
SELECT `t2.1`
SELECT `t2.1` AS `tupleElement(t2, 1)`
FROM t_tuple_element
2
SELECT `t2.1`
SELECT `t2.1` AS `tupleElement(t2, 1)`
FROM t_tuple_element
1 2
WITH (1, 2) AS t

View File

@ -0,0 +1,25 @@
1
SELECT __table1.`t1.a` AS `tupleElement(t1, 1)`
FROM default.t_tuple_element AS __table1
a
SELECT __table1.`t1.s` AS `tupleElement(t1, 2)`
FROM default.t_tuple_element AS __table1
1
SELECT __table1.`t1.a` AS `tupleElement(t1, \'a\')`
FROM default.t_tuple_element AS __table1
2
SELECT __table1.`t2.1` AS `tupleElement(t2, 1)`
FROM default.t_tuple_element AS __table1
2
SELECT __table1.`t2.1` AS `tupleElement(t2, 1)`
FROM default.t_tuple_element AS __table1
1 2
SELECT
1 AS `tupleElement(t, 1)`,
2 AS `tupleElement(t, 2)`
FROM system.one AS __table1
1 2
SELECT
_CAST(1, \'UInt32\') AS `tupleElement(t, 1)`,
_CAST(2, \'UInt32\') AS `tupleElement(t, \'b\')`
FROM system.one AS __table1

View File

@ -0,0 +1,43 @@
DROP TABLE IF EXISTS t_tuple_element;
CREATE TABLE t_tuple_element(t1 Tuple(a UInt32, s String), t2 Tuple(UInt32, String)) ENGINE = Memory;
INSERT INTO t_tuple_element VALUES ((1, 'a'), (2, 'b'));
SET optimize_functions_to_subcolumns = 1;
SET allow_experimental_analyzer = 1;
SELECT t1.1 FROM t_tuple_element;
EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT t1.1 FROM t_tuple_element;
SELECT tupleElement(t1, 2) FROM t_tuple_element;
EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT tupleElement(t1, 2) FROM t_tuple_element;
SELECT tupleElement(t1, 'a') FROM t_tuple_element;
EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT tupleElement(t1, 'a') FROM t_tuple_element;
SELECT tupleElement(number, 1) FROM numbers(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER }
SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK }
SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK }
SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT t2.1 FROM t_tuple_element;
EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT t2.1 FROM t_tuple_element;
SELECT tupleElement(t2, 1) FROM t_tuple_element;
EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT tupleElement(t2, 1) FROM t_tuple_element;
SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER }
SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK }
SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK }
SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
DROP TABLE t_tuple_element;
WITH (1, 2) AS t SELECT t.1, t.2;
EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 WITH (1, 2) AS t SELECT t.1, t.2;
WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b');
EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b');

View File

@ -0,0 +1,14 @@
SELECT
`arr.size0` AS `length(arr)`,
`n.null` AS `isNull(n)`
FROM t_column_names
┌─length(arr)─┬─isNull(n)─┐
│ 3 │ 0 │
└─────────────┴───────────┘
SELECT
__table1.`arr.size0` AS `length(arr)`,
__table1.`n.null` AS `isNull(n)`
FROM default.t_column_names AS __table1
┌─length(arr)─┬─isNull(n)─┐
│ 3 │ 0 │
└─────────────┴───────────┘

View File

@ -0,0 +1,19 @@
DROP TABLE IF EXISTS t_column_names;
CREATE TABLE t_column_names (arr Array(UInt64), n Nullable(String)) ENGINE = Memory;
INSERT INTO t_column_names VALUES ([1, 2, 3], 'foo');
SET optimize_functions_to_subcolumns = 1;
SET allow_experimental_analyzer = 0;
EXPLAIN SYNTAX SELECT length(arr), isNull(n) FROM t_column_names;
SELECT length(arr), isNull(n) FROM t_column_names FORMAT PrettyCompactNoEscapes;
SET optimize_functions_to_subcolumns = 1;
SET allow_experimental_analyzer = 1;
EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr), isNull(n) FROM t_column_names;
SELECT length(arr), isNull(n) FROM t_column_names FORMAT PrettyCompactNoEscapes;
DROP TABLE t_column_names;

View File

@ -0,0 +1,8 @@
SELECT `m.size0` AS `length(m)`
FROM t_func_to_subcolumns_map
2
1
SELECT __table1.`m.size0` AS `length(m)`
FROM default.t_func_to_subcolumns_map AS __table1
2
1

View File

@ -0,0 +1,19 @@
DROP TABLE IF EXISTS t_func_to_subcolumns_map;
CREATE TABLE t_func_to_subcolumns_map (id UInt64, m Map(String, UInt64)) ENGINE = MergeTree ORDER BY id;
INSERT INTO t_func_to_subcolumns_map VALUES (1, map('aaa', 1, 'bbb', 2)) (2, map('ccc', 3));
SET optimize_functions_to_subcolumns = 1;
SET allow_experimental_analyzer = 0;
EXPLAIN SYNTAX SELECT length(m) FROM t_func_to_subcolumns_map;
SELECT length(m) FROM t_func_to_subcolumns_map;
SET optimize_functions_to_subcolumns = 1;
SET allow_experimental_analyzer = 1;
EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(m) FROM t_func_to_subcolumns_map;
SELECT length(m) FROM t_func_to_subcolumns_map;
DROP TABLE t_func_to_subcolumns_map;