2021-11-08 12:44:13 +00:00
|
|
|
#include <algorithm>
|
2022-03-30 10:07:09 +00:00
|
|
|
#include <memory>
|
2022-10-19 00:00:09 +00:00
|
|
|
|
2019-12-27 19:45:41 +00:00
|
|
|
#include <Core/Settings.h>
|
|
|
|
#include <Core/NamesAndTypes.h>
|
2022-07-15 14:57:58 +00:00
|
|
|
#include <Core/SettingsEnums.h>
|
2021-09-17 13:51:44 +00:00
|
|
|
|
2019-12-27 19:45:41 +00:00
|
|
|
#include <Interpreters/ArrayJoinedColumnsVisitor.h>
|
2022-08-10 15:54:56 +00:00
|
|
|
#include <Interpreters/CollectJoinOnKeysVisitor.h>
|
2019-12-27 19:45:41 +00:00
|
|
|
#include <Interpreters/Context.h>
|
2022-08-10 15:54:56 +00:00
|
|
|
#include <Interpreters/ExecuteScalarSubqueriesVisitor.h>
|
|
|
|
#include <Interpreters/ExpressionActions.h> /// getSmallestColumn()
|
2021-02-14 11:09:36 +00:00
|
|
|
#include <Interpreters/FunctionNameNormalizer.h>
|
2022-08-10 15:54:56 +00:00
|
|
|
#include <Interpreters/GetAggregatesVisitor.h>
|
|
|
|
#include <Interpreters/GroupingSetsRewriterVisitor.h>
|
|
|
|
#include <Interpreters/LogicalExpressionsOptimizer.h>
|
2019-12-27 19:45:41 +00:00
|
|
|
#include <Interpreters/MarkTableIdentifiersVisitor.h>
|
2022-08-10 15:54:56 +00:00
|
|
|
#include <Interpreters/PredicateExpressionsOptimizer.h>
|
|
|
|
#include <Interpreters/QueryAliasesVisitor.h>
|
2019-12-27 19:45:41 +00:00
|
|
|
#include <Interpreters/QueryNormalizer.h>
|
|
|
|
#include <Interpreters/RequiredSourceColumnsVisitor.h>
|
2022-08-10 15:54:56 +00:00
|
|
|
#include <Interpreters/RewriteOrderByVisitor.hpp>
|
2020-04-07 09:48:47 +00:00
|
|
|
#include <Interpreters/TableJoin.h>
|
2022-08-10 15:54:56 +00:00
|
|
|
#include <Interpreters/TranslateQualifiedNamesVisitor.h>
|
2020-07-22 17:13:05 +00:00
|
|
|
#include <Interpreters/TreeOptimizer.h>
|
2022-08-10 15:54:56 +00:00
|
|
|
#include <Interpreters/TreeRewriter.h>
|
2021-11-08 12:44:13 +00:00
|
|
|
#include <Interpreters/evaluateConstantExpression.h>
|
2022-08-10 15:54:56 +00:00
|
|
|
#include <Interpreters/getTableExpressions.h>
|
|
|
|
#include <Interpreters/replaceAliasColumnsInQuery.h>
|
2022-07-16 20:23:49 +00:00
|
|
|
#include <Interpreters/replaceForPositionalArguments.h>
|
2019-12-27 19:45:41 +00:00
|
|
|
|
2022-09-24 21:24:39 +00:00
|
|
|
#include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h>
|
|
|
|
#include <Functions/UserDefined/UserDefinedSQLFunctionVisitor.h>
|
|
|
|
|
2022-03-30 10:07:09 +00:00
|
|
|
#include <Parsers/IAST_fwd.h>
|
2019-12-27 19:45:41 +00:00
|
|
|
#include <Parsers/ASTExpressionList.h>
|
|
|
|
#include <Parsers/ASTFunction.h>
|
2021-11-26 18:27:16 +00:00
|
|
|
#include <Parsers/ASTLiteral.h>
|
2019-12-27 19:45:41 +00:00
|
|
|
#include <Parsers/ASTSelectQuery.h>
|
2021-11-26 18:27:16 +00:00
|
|
|
#include <Parsers/ASTSelectWithUnionQuery.h>
|
2019-12-27 19:45:41 +00:00
|
|
|
#include <Parsers/ASTTablesInSelectQuery.h>
|
2022-04-07 05:21:24 +00:00
|
|
|
#include <Parsers/ASTInterpolateElement.h>
|
2019-12-27 19:45:41 +00:00
|
|
|
#include <Parsers/queryToString.h>
|
|
|
|
|
|
|
|
#include <DataTypes/NestedUtils.h>
|
|
|
|
#include <DataTypes/DataTypeNullable.h>
|
2021-11-08 12:44:13 +00:00
|
|
|
#include <DataTypes/DataTypeLowCardinality.h>
|
2019-12-27 19:45:41 +00:00
|
|
|
|
|
|
|
#include <IO/WriteHelpers.h>
|
|
|
|
#include <Storages/IStorage.h>
|
2022-10-19 00:00:09 +00:00
|
|
|
#include <Common/checkStackSize.h>
|
2019-12-27 19:45:41 +00:00
|
|
|
|
2020-10-18 12:18:31 +00:00
|
|
|
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
2019-12-27 19:45:41 +00:00
|
|
|
|
2022-10-19 00:00:09 +00:00
|
|
|
|
2019-12-27 19:45:41 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2021-02-09 08:21:26 +00:00
|
|
|
extern const int EMPTY_LIST_OF_COLUMNS_QUERIED;
|
2019-12-27 19:45:41 +00:00
|
|
|
extern const int EMPTY_NESTED_TABLE;
|
2021-02-09 08:21:26 +00:00
|
|
|
extern const int EXPECTED_ALL_OR_ANY;
|
2019-12-27 19:45:41 +00:00
|
|
|
extern const int INVALID_JOIN_ON_EXPRESSION;
|
2021-02-09 08:21:26 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2019-12-27 19:45:41 +00:00
|
|
|
extern const int NOT_IMPLEMENTED;
|
2021-02-09 08:21:26 +00:00
|
|
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
2019-12-27 19:45:41 +00:00
|
|
|
extern const int UNKNOWN_IDENTIFIER;
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
|
|
|
using LogAST = DebugASTLog<false>; /// set to true to enable logs
|
|
|
|
|
2022-05-26 07:28:55 +00:00
|
|
|
void optimizeGroupingSets(ASTPtr & query)
|
|
|
|
{
|
|
|
|
GroupingSetsRewriterVisitor::Data data;
|
|
|
|
GroupingSetsRewriterVisitor(data).visit(query);
|
|
|
|
}
|
|
|
|
|
2020-04-06 13:30:16 +00:00
|
|
|
/// Select implementation of a function based on settings.
|
2019-12-27 19:45:41 +00:00
|
|
|
/// Important that it is done as query rewrite. It means rewritten query
|
|
|
|
/// will be sent to remote servers during distributed query execution,
|
|
|
|
/// and on all remote servers, function implementation will be same.
|
2020-04-06 13:30:16 +00:00
|
|
|
template <char const * func_name>
|
2019-12-27 19:45:41 +00:00
|
|
|
struct CustomizeFunctionsData
|
|
|
|
{
|
|
|
|
using TypeToVisit = ASTFunction;
|
|
|
|
|
2020-04-06 13:30:16 +00:00
|
|
|
const String & customized_func_name;
|
2019-12-27 19:45:41 +00:00
|
|
|
|
2020-11-10 07:20:50 +00:00
|
|
|
void visit(ASTFunction & func, ASTPtr &) const
|
2019-12-27 19:45:41 +00:00
|
|
|
{
|
2020-04-06 13:30:16 +00:00
|
|
|
if (Poco::toLower(func.name) == func_name)
|
|
|
|
{
|
|
|
|
func.name = customized_func_name;
|
|
|
|
}
|
2019-12-27 19:45:41 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2020-04-06 13:30:16 +00:00
|
|
|
char countdistinct[] = "countdistinct";
|
2020-06-22 14:55:49 +00:00
|
|
|
using CustomizeCountDistinctVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<countdistinct>>, true>;
|
|
|
|
|
|
|
|
char countifdistinct[] = "countifdistinct";
|
|
|
|
using CustomizeCountIfDistinctVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<countifdistinct>>, true>;
|
2020-04-06 13:30:16 +00:00
|
|
|
|
|
|
|
char in[] = "in";
|
|
|
|
using CustomizeInVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<in>>, true>;
|
|
|
|
|
|
|
|
char notIn[] = "notin";
|
|
|
|
using CustomizeNotInVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<notIn>>, true>;
|
|
|
|
|
|
|
|
char globalIn[] = "globalin";
|
|
|
|
using CustomizeGlobalInVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<globalIn>>, true>;
|
|
|
|
|
|
|
|
char globalNotIn[] = "globalnotin";
|
|
|
|
using CustomizeGlobalNotInVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<globalNotIn>>, true>;
|
2019-12-27 19:45:41 +00:00
|
|
|
|
2020-06-22 14:55:49 +00:00
|
|
|
template <char const * func_suffix>
|
|
|
|
struct CustomizeFunctionsSuffixData
|
|
|
|
{
|
|
|
|
using TypeToVisit = ASTFunction;
|
|
|
|
|
|
|
|
const String & customized_func_suffix;
|
|
|
|
|
2020-11-10 07:20:50 +00:00
|
|
|
void visit(ASTFunction & func, ASTPtr &) const
|
2020-06-22 14:55:49 +00:00
|
|
|
{
|
|
|
|
if (endsWith(Poco::toLower(func.name), func_suffix))
|
|
|
|
{
|
|
|
|
size_t prefix_len = func.name.length() - strlen(func_suffix);
|
|
|
|
func.name = func.name.substr(0, prefix_len) + customized_func_suffix;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Swap 'if' and 'distinct' suffixes to make execution more optimal.
|
|
|
|
char ifDistinct[] = "ifdistinct";
|
|
|
|
using CustomizeIfDistinctVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsSuffixData<ifDistinct>>, true>;
|
2019-12-27 19:45:41 +00:00
|
|
|
|
2020-10-18 12:18:31 +00:00
|
|
|
/// Used to rewrite all aggregate functions to add -OrNull suffix to them if setting `aggregate_functions_null_for_empty` is set.
|
|
|
|
struct CustomizeAggregateFunctionsSuffixData
|
|
|
|
{
|
|
|
|
using TypeToVisit = ASTFunction;
|
|
|
|
|
|
|
|
const String & customized_func_suffix;
|
|
|
|
|
2020-11-10 07:20:50 +00:00
|
|
|
void visit(ASTFunction & func, ASTPtr &) const
|
2020-10-18 12:18:31 +00:00
|
|
|
{
|
2020-11-04 12:15:31 +00:00
|
|
|
const auto & instance = AggregateFunctionFactory::instance();
|
|
|
|
if (instance.isAggregateFunctionName(func.name) && !endsWith(func.name, customized_func_suffix))
|
2020-10-18 12:18:31 +00:00
|
|
|
{
|
2020-11-04 12:15:31 +00:00
|
|
|
auto properties = instance.tryGetProperties(func.name);
|
|
|
|
if (properties && !properties->returns_default_when_only_null)
|
2020-11-12 15:55:26 +00:00
|
|
|
{
|
2020-12-10 08:26:45 +00:00
|
|
|
func.name += customized_func_suffix;
|
2020-12-09 04:30:38 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
2020-11-12 15:55:26 +00:00
|
|
|
|
2020-12-24 10:11:07 +00:00
|
|
|
// Used to rewrite aggregate functions with -OrNull suffix in some cases, such as sumIfOrNull, we should rewrite to sumOrNullIf
|
2020-12-09 04:30:38 +00:00
|
|
|
struct CustomizeAggregateFunctionsMoveSuffixData
|
|
|
|
{
|
|
|
|
using TypeToVisit = ASTFunction;
|
2020-11-12 15:55:26 +00:00
|
|
|
|
2020-12-09 04:30:38 +00:00
|
|
|
const String & customized_func_suffix;
|
2020-11-12 15:55:26 +00:00
|
|
|
|
2020-12-09 04:30:38 +00:00
|
|
|
String moveSuffixAhead(const String & name) const
|
|
|
|
{
|
|
|
|
auto prefix = name.substr(0, name.size() - customized_func_suffix.size());
|
2020-11-12 15:55:26 +00:00
|
|
|
|
2020-12-09 04:30:38 +00:00
|
|
|
auto prefix_size = prefix.size();
|
|
|
|
|
|
|
|
if (endsWith(prefix, "MergeState"))
|
|
|
|
return prefix.substr(0, prefix_size - 10) + customized_func_suffix + "MergeState";
|
|
|
|
|
|
|
|
if (endsWith(prefix, "Merge"))
|
|
|
|
return prefix.substr(0, prefix_size - 5) + customized_func_suffix + "Merge";
|
|
|
|
|
|
|
|
if (endsWith(prefix, "State"))
|
|
|
|
return prefix.substr(0, prefix_size - 5) + customized_func_suffix + "State";
|
|
|
|
|
|
|
|
if (endsWith(prefix, "If"))
|
|
|
|
return prefix.substr(0, prefix_size - 2) + customized_func_suffix + "If";
|
|
|
|
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
|
|
|
void visit(ASTFunction & func, ASTPtr &) const
|
|
|
|
{
|
|
|
|
const auto & instance = AggregateFunctionFactory::instance();
|
|
|
|
if (instance.isAggregateFunctionName(func.name))
|
|
|
|
{
|
2020-12-10 08:26:45 +00:00
|
|
|
if (endsWith(func.name, customized_func_suffix))
|
2020-12-09 04:30:38 +00:00
|
|
|
{
|
2020-12-09 07:24:36 +00:00
|
|
|
auto properties = instance.tryGetProperties(func.name);
|
|
|
|
if (properties && !properties->returns_default_when_only_null)
|
|
|
|
{
|
|
|
|
func.name = moveSuffixAhead(func.name);
|
|
|
|
}
|
2020-11-12 15:55:26 +00:00
|
|
|
}
|
2020-10-18 12:18:31 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2021-04-15 16:40:49 +00:00
|
|
|
struct FuseSumCountAggregates
|
2021-03-01 10:04:34 +00:00
|
|
|
{
|
2021-03-12 07:29:38 +00:00
|
|
|
std::vector<ASTFunction *> sums {};
|
|
|
|
std::vector<ASTFunction *> counts {};
|
|
|
|
std::vector<ASTFunction *> avgs {};
|
2021-03-01 10:04:34 +00:00
|
|
|
|
2021-04-15 16:40:49 +00:00
|
|
|
void addFuncNode(ASTFunction * func)
|
2021-03-08 03:58:18 +00:00
|
|
|
{
|
2021-04-15 16:40:49 +00:00
|
|
|
if (func->name == "sum")
|
|
|
|
sums.push_back(func);
|
|
|
|
else if (func->name == "count")
|
|
|
|
counts.push_back(func);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
assert(func->name == "avg");
|
|
|
|
avgs.push_back(func);
|
|
|
|
}
|
2021-03-08 03:58:18 +00:00
|
|
|
}
|
2021-03-01 10:04:34 +00:00
|
|
|
|
2021-03-08 03:58:18 +00:00
|
|
|
bool canBeFused() const
|
2021-03-01 10:04:34 +00:00
|
|
|
{
|
2021-04-15 16:40:49 +00:00
|
|
|
// Need at least two different kinds of functions to fuse.
|
2021-03-08 03:58:18 +00:00
|
|
|
if (sums.empty() && counts.empty())
|
|
|
|
return false;
|
|
|
|
if (sums.empty() && avgs.empty())
|
|
|
|
return false;
|
|
|
|
if (counts.empty() && avgs.empty())
|
|
|
|
return false;
|
|
|
|
return true;
|
2021-03-01 10:04:34 +00:00
|
|
|
}
|
2021-03-08 03:58:18 +00:00
|
|
|
};
|
|
|
|
|
2021-04-15 16:40:49 +00:00
|
|
|
struct FuseSumCountAggregatesVisitorData
|
2021-03-08 03:58:18 +00:00
|
|
|
{
|
|
|
|
using TypeToVisit = ASTFunction;
|
|
|
|
|
2021-04-15 16:40:49 +00:00
|
|
|
std::unordered_map<String, FuseSumCountAggregates> fuse_map;
|
2021-03-01 10:04:34 +00:00
|
|
|
|
2021-03-12 07:29:38 +00:00
|
|
|
void visit(ASTFunction & func, ASTPtr &)
|
2021-03-01 10:04:34 +00:00
|
|
|
{
|
|
|
|
if (func.name == "sum" || func.name == "avg" || func.name == "count")
|
|
|
|
{
|
2021-03-09 09:03:24 +00:00
|
|
|
if (func.arguments->children.empty())
|
2021-03-08 03:58:18 +00:00
|
|
|
return;
|
|
|
|
|
2021-04-16 10:35:02 +00:00
|
|
|
// Probably we can extend it to match count() for non-nullable argument
|
2021-04-15 16:40:49 +00:00
|
|
|
// to sum/avg with any other argument. Now we require strict match.
|
|
|
|
const auto argument = func.arguments->children.at(0)->getColumnName();
|
|
|
|
auto it = fuse_map.find(argument);
|
2021-03-12 07:29:38 +00:00
|
|
|
if (it != fuse_map.end())
|
2021-03-01 10:04:34 +00:00
|
|
|
{
|
2021-04-15 16:40:49 +00:00
|
|
|
it->second.addFuncNode(&func);
|
2021-03-12 07:29:38 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-04-15 16:40:49 +00:00
|
|
|
FuseSumCountAggregates funcs{};
|
|
|
|
funcs.addFuncNode(&func);
|
|
|
|
fuse_map[argument] = funcs;
|
2021-03-01 10:04:34 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2020-10-18 12:18:31 +00:00
|
|
|
using CustomizeAggregateFunctionsOrNullVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeAggregateFunctionsSuffixData>, true>;
|
2020-12-09 04:30:38 +00:00
|
|
|
using CustomizeAggregateFunctionsMoveOrNullVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeAggregateFunctionsMoveSuffixData>, true>;
|
2021-04-15 16:40:49 +00:00
|
|
|
using FuseSumCountAggregatesVisitor = InDepthNodeVisitor<OneTypeMatcher<FuseSumCountAggregatesVisitorData>, true>;
|
2020-10-18 12:18:31 +00:00
|
|
|
|
2021-10-04 13:08:41 +00:00
|
|
|
|
|
|
|
struct ExistsExpressionData
|
|
|
|
{
|
|
|
|
using TypeToVisit = ASTFunction;
|
|
|
|
|
2021-10-05 08:25:50 +00:00
|
|
|
static void visit(ASTFunction & func, ASTPtr)
|
2021-10-04 13:08:41 +00:00
|
|
|
{
|
|
|
|
bool exists_expression = func.name == "exists"
|
|
|
|
&& func.arguments && func.arguments->children.size() == 1
|
|
|
|
&& typeid_cast<const ASTSubquery *>(func.arguments->children[0].get());
|
|
|
|
|
|
|
|
if (!exists_expression)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/// EXISTS(subquery) --> 1 IN (SELECT 1 FROM subquery LIMIT 1)
|
|
|
|
|
|
|
|
auto subquery_node = func.arguments->children[0];
|
|
|
|
auto table_expression = std::make_shared<ASTTableExpression>();
|
|
|
|
table_expression->subquery = std::move(subquery_node);
|
|
|
|
table_expression->children.push_back(table_expression->subquery);
|
|
|
|
|
|
|
|
auto tables_in_select_element = std::make_shared<ASTTablesInSelectQueryElement>();
|
|
|
|
tables_in_select_element->table_expression = std::move(table_expression);
|
|
|
|
tables_in_select_element->children.push_back(tables_in_select_element->table_expression);
|
|
|
|
|
|
|
|
auto tables_in_select = std::make_shared<ASTTablesInSelectQuery>();
|
|
|
|
tables_in_select->children.push_back(std::move(tables_in_select_element));
|
|
|
|
|
|
|
|
auto select_expr_list = std::make_shared<ASTExpressionList>();
|
|
|
|
select_expr_list->children.push_back(std::make_shared<ASTLiteral>(1u));
|
|
|
|
|
|
|
|
auto select_query = std::make_shared<ASTSelectQuery>();
|
|
|
|
select_query->children.push_back(select_expr_list);
|
|
|
|
|
|
|
|
select_query->setExpression(ASTSelectQuery::Expression::SELECT, select_expr_list);
|
|
|
|
select_query->setExpression(ASTSelectQuery::Expression::TABLES, tables_in_select);
|
|
|
|
|
2022-04-18 08:18:31 +00:00
|
|
|
ASTPtr limit_length_ast = std::make_shared<ASTLiteral>(Field(static_cast<UInt64>(1)));
|
2021-10-04 13:08:41 +00:00
|
|
|
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length_ast));
|
|
|
|
|
|
|
|
auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>();
|
|
|
|
select_with_union_query->list_of_selects = std::make_shared<ASTExpressionList>();
|
|
|
|
select_with_union_query->list_of_selects->children.push_back(std::move(select_query));
|
|
|
|
select_with_union_query->children.push_back(select_with_union_query->list_of_selects);
|
|
|
|
|
|
|
|
auto new_subquery = std::make_shared<ASTSubquery>();
|
|
|
|
new_subquery->children.push_back(select_with_union_query);
|
|
|
|
|
|
|
|
auto function = makeASTFunction("in", std::make_shared<ASTLiteral>(1u), new_subquery);
|
|
|
|
func = *function;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
using ExistsExpressionVisitor = InDepthNodeVisitor<OneTypeMatcher<ExistsExpressionData>, false>;
|
|
|
|
|
2019-12-27 19:45:41 +00:00
|
|
|
/// Translate qualified names such as db.table.column, table.column, table_alias.column to names' normal form.
|
|
|
|
/// Expand asterisks and qualified asterisks with column names.
|
|
|
|
/// There would be columns in normal form & column aliases after translation. Column & column alias would be normalized in QueryNormalizer.
|
|
|
|
void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query, const NameSet & source_columns_set,
|
2020-06-05 21:17:00 +00:00
|
|
|
const TablesWithColumns & tables_with_columns)
|
2019-12-27 19:45:41 +00:00
|
|
|
{
|
|
|
|
LogAST log;
|
2020-02-27 13:31:32 +00:00
|
|
|
TranslateQualifiedNamesVisitor::Data visitor_data(source_columns_set, tables_with_columns);
|
2019-12-27 19:45:41 +00:00
|
|
|
TranslateQualifiedNamesVisitor visitor(visitor_data, log.stream());
|
|
|
|
visitor.visit(query);
|
|
|
|
|
|
|
|
/// This may happen after expansion of COLUMNS('regexp').
|
|
|
|
if (select_query.select()->children.empty())
|
|
|
|
throw Exception("Empty list of columns in SELECT query", ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED);
|
|
|
|
}
|
|
|
|
|
2021-04-15 16:40:49 +00:00
|
|
|
// Replaces one avg/sum/count function with an appropriate expression with
|
|
|
|
// sumCount().
|
|
|
|
void replaceWithSumCount(String column_name, ASTFunction & func)
|
2021-03-08 03:58:18 +00:00
|
|
|
{
|
2021-03-12 07:29:38 +00:00
|
|
|
auto func_base = makeASTFunction("sumCount", std::make_shared<ASTIdentifier>(column_name));
|
|
|
|
auto exp_list = std::make_shared<ASTExpressionList>();
|
|
|
|
if (func.name == "sum" || func.name == "count")
|
2021-03-08 03:58:18 +00:00
|
|
|
{
|
2021-03-12 07:29:38 +00:00
|
|
|
/// Rewrite "sum" to sumCount().1, rewrite "count" to sumCount().2
|
|
|
|
UInt8 idx = (func.name == "sum" ? 1 : 2);
|
|
|
|
func.name = "tupleElement";
|
|
|
|
exp_list->children.push_back(func_base);
|
|
|
|
exp_list->children.push_back(std::make_shared<ASTLiteral>(idx));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/// Rewrite "avg" to sumCount().1 / sumCount().2
|
|
|
|
auto new_arg1 = makeASTFunction("tupleElement", func_base, std::make_shared<ASTLiteral>(UInt8(1)));
|
2022-03-28 11:26:42 +00:00
|
|
|
auto new_arg2 = makeASTFunction("CAST",
|
2022-04-18 08:18:31 +00:00
|
|
|
makeASTFunction("tupleElement", func_base, std::make_shared<ASTLiteral>(static_cast<UInt8>(2))),
|
2022-03-28 11:26:42 +00:00
|
|
|
std::make_shared<ASTLiteral>("Float64"));
|
|
|
|
|
2021-03-12 07:29:38 +00:00
|
|
|
func.name = "divide";
|
|
|
|
exp_list->children.push_back(new_arg1);
|
|
|
|
exp_list->children.push_back(new_arg2);
|
|
|
|
}
|
|
|
|
func.arguments = exp_list;
|
|
|
|
func.children.push_back(func.arguments);
|
|
|
|
}
|
2021-03-08 03:58:18 +00:00
|
|
|
|
2021-04-15 16:40:49 +00:00
|
|
|
void fuseSumCountAggregates(std::unordered_map<String, FuseSumCountAggregates> & fuse_map)
|
2021-03-12 07:29:38 +00:00
|
|
|
{
|
|
|
|
for (auto & it : fuse_map)
|
|
|
|
{
|
|
|
|
if (it.second.canBeFused())
|
|
|
|
{
|
|
|
|
for (auto & func: it.second.sums)
|
2021-04-15 16:40:49 +00:00
|
|
|
replaceWithSumCount(it.first, *func);
|
2021-03-12 07:29:38 +00:00
|
|
|
for (auto & func: it.second.avgs)
|
2021-04-15 16:40:49 +00:00
|
|
|
replaceWithSumCount(it.first, *func);
|
2021-03-12 07:29:38 +00:00
|
|
|
for (auto & func: it.second.counts)
|
2021-04-15 16:40:49 +00:00
|
|
|
replaceWithSumCount(it.first, *func);
|
2021-03-08 03:58:18 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-27 19:45:41 +00:00
|
|
|
bool hasArrayJoin(const ASTPtr & ast)
|
|
|
|
{
|
|
|
|
if (const ASTFunction * function = ast->as<ASTFunction>())
|
|
|
|
if (function->name == "arrayJoin")
|
|
|
|
return true;
|
|
|
|
|
|
|
|
for (const auto & child : ast->children)
|
|
|
|
if (!child->as<ASTSelectQuery>() && hasArrayJoin(child))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Keep number of columns for 'GLOBAL IN (SELECT 1 AS a, a)'
|
|
|
|
void renameDuplicatedColumns(const ASTSelectQuery * select_query)
|
|
|
|
{
|
|
|
|
ASTs & elements = select_query->select()->children;
|
|
|
|
|
|
|
|
std::set<String> all_column_names;
|
|
|
|
std::set<String> assigned_column_names;
|
|
|
|
|
|
|
|
for (auto & expr : elements)
|
|
|
|
all_column_names.insert(expr->getAliasOrColumnName());
|
|
|
|
|
|
|
|
for (auto & expr : elements)
|
|
|
|
{
|
|
|
|
auto name = expr->getAliasOrColumnName();
|
|
|
|
|
|
|
|
if (!assigned_column_names.insert(name).second)
|
|
|
|
{
|
|
|
|
size_t i = 1;
|
|
|
|
while (all_column_names.end() != all_column_names.find(name + "_" + toString(i)))
|
|
|
|
++i;
|
|
|
|
|
|
|
|
name = name + "_" + toString(i);
|
|
|
|
expr = expr->clone(); /// Cancels fuse of the same expressions in the tree.
|
|
|
|
expr->setAlias(name);
|
|
|
|
|
|
|
|
all_column_names.insert(name);
|
|
|
|
assigned_column_names.insert(name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Sometimes we have to calculate more columns in SELECT clause than will be returned from query.
|
|
|
|
/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result.
|
2020-01-11 09:50:41 +00:00
|
|
|
/// Also we have to remove duplicates in case of GLOBAL subqueries. Their results are placed into tables so duplicates are impossible.
|
2022-04-07 05:21:24 +00:00
|
|
|
/// Also remove all INTERPOLATE columns which are not in SELECT anymore.
|
Revert "Fix converting types for UNION queries (may produce LOGICAL_ERROR)"
This fix is incorrect, and it introduce new issues, in particular it
may breaks UNION queries w/o column aliases, i.e.:
SELECT a, b, c FROM (SELECT 3 AS a, 2147483647 AS b, 1048575 AS c UNION ALL SELECT -2, NULL, -2) AS js1 ORDER BY a
CI: https://s3.amazonaws.com/clickhouse-test-reports/37796/e637489f81768df582fe7389e57f7ed12893087c/fuzzer_astfuzzerdebug,actions//report.html
Reverts: #37593/#34775 (2613149f6bf4f242bbbf2c3c8539b5176fd77286)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-06-04 13:13:01 +00:00
|
|
|
void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups)
|
2019-12-27 19:45:41 +00:00
|
|
|
{
|
|
|
|
ASTs & elements = select_query->select()->children;
|
|
|
|
|
Fix incorrect columns order in subqueries of UNION
Consider the following query:
SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Here is UNION from two SELECT queries
- `SELECT NULL, 255 AS x, 1 AS y`
- `SELECT y, NULL AS x, 1 AS y`
UNION queries matches columns by positions, not names, so the following
columns should be used by `avgWeighted()`:
- `255 AS x, 1 AS y`
- `NULL AS x, 1 AS y`
Result types of arguments should be:
- `x Nullable(UInt8)`
- `y UInt8`
And in case of UNION query is a subselect itself, it will return only
required columns, for the example above it needs only `x` and `y`.
For this it will get positions of these arguments from the first query,
and then use those positions to get required column names from the
second query (since there is no ability to get columns by positions
instead of names internally), and due to duplicated columns the second
query will return (`y`, `x`) not (`x`, `y`), and this will make the
result incorrect:
EXPLAIN header = 1, optimize = 0, actions=1 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Aggregates:
avgWeighted(x, y)
Function: avgWeighted(Nullable(UInt8), UInt8) → Nullable(Float64)
Arguments: x, y
Argument positions: 0, 1
Expression (Before GROUP BY)
Header: x UInt8
y Nullable(UInt8)
...
Union
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
And the query itself fails with an error:
Logical error: 'Bad cast from type DB::ColumnVector<char8_t> to DB::ColumnNullable'.
_NOTE: `avgWeighted()` here is required to trigger `LOGICAL_ERROR`_
CI: https://s3.amazonaws.com/clickhouse-test-reports/37796/e637489f81768df582fe7389e57f7ed12893087c/fuzzer_astfuzzerdebug,actions//report.html
Fixes: 02227_union_match_by_name
v2: fix untuple() (reserve space for output_columns_positions too)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-06-06 13:11:13 +00:00
|
|
|
std::unordered_map<String, size_t> required_columns_with_duplicate_count;
|
|
|
|
/// Order of output columns should match order in required_result_columns,
|
|
|
|
/// otherwise UNION queries may have incorrect header when subselect has duplicated columns.
|
|
|
|
///
|
|
|
|
/// NOTE: multimap is required since there can be duplicated column names.
|
|
|
|
std::unordered_multimap<String, size_t> output_columns_positions;
|
2019-12-27 19:45:41 +00:00
|
|
|
|
|
|
|
if (!required_result_columns.empty())
|
|
|
|
{
|
|
|
|
/// Some columns may be queried multiple times, like SELECT x, y, y FROM table.
|
Fix incorrect columns order in subqueries of UNION
Consider the following query:
SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Here is UNION from two SELECT queries
- `SELECT NULL, 255 AS x, 1 AS y`
- `SELECT y, NULL AS x, 1 AS y`
UNION queries matches columns by positions, not names, so the following
columns should be used by `avgWeighted()`:
- `255 AS x, 1 AS y`
- `NULL AS x, 1 AS y`
Result types of arguments should be:
- `x Nullable(UInt8)`
- `y UInt8`
And in case of UNION query is a subselect itself, it will return only
required columns, for the example above it needs only `x` and `y`.
For this it will get positions of these arguments from the first query,
and then use those positions to get required column names from the
second query (since there is no ability to get columns by positions
instead of names internally), and due to duplicated columns the second
query will return (`y`, `x`) not (`x`, `y`), and this will make the
result incorrect:
EXPLAIN header = 1, optimize = 0, actions=1 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Aggregates:
avgWeighted(x, y)
Function: avgWeighted(Nullable(UInt8), UInt8) → Nullable(Float64)
Arguments: x, y
Argument positions: 0, 1
Expression (Before GROUP BY)
Header: x UInt8
y Nullable(UInt8)
...
Union
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
And the query itself fails with an error:
Logical error: 'Bad cast from type DB::ColumnVector<char8_t> to DB::ColumnNullable'.
_NOTE: `avgWeighted()` here is required to trigger `LOGICAL_ERROR`_
CI: https://s3.amazonaws.com/clickhouse-test-reports/37796/e637489f81768df582fe7389e57f7ed12893087c/fuzzer_astfuzzerdebug,actions//report.html
Fixes: 02227_union_match_by_name
v2: fix untuple() (reserve space for output_columns_positions too)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-06-06 13:11:13 +00:00
|
|
|
for (size_t i = 0; i < required_result_columns.size(); ++i)
|
2019-12-27 19:45:41 +00:00
|
|
|
{
|
Fix incorrect columns order in subqueries of UNION
Consider the following query:
SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Here is UNION from two SELECT queries
- `SELECT NULL, 255 AS x, 1 AS y`
- `SELECT y, NULL AS x, 1 AS y`
UNION queries matches columns by positions, not names, so the following
columns should be used by `avgWeighted()`:
- `255 AS x, 1 AS y`
- `NULL AS x, 1 AS y`
Result types of arguments should be:
- `x Nullable(UInt8)`
- `y UInt8`
And in case of UNION query is a subselect itself, it will return only
required columns, for the example above it needs only `x` and `y`.
For this it will get positions of these arguments from the first query,
and then use those positions to get required column names from the
second query (since there is no ability to get columns by positions
instead of names internally), and due to duplicated columns the second
query will return (`y`, `x`) not (`x`, `y`), and this will make the
result incorrect:
EXPLAIN header = 1, optimize = 0, actions=1 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Aggregates:
avgWeighted(x, y)
Function: avgWeighted(Nullable(UInt8), UInt8) → Nullable(Float64)
Arguments: x, y
Argument positions: 0, 1
Expression (Before GROUP BY)
Header: x UInt8
y Nullable(UInt8)
...
Union
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
And the query itself fails with an error:
Logical error: 'Bad cast from type DB::ColumnVector<char8_t> to DB::ColumnNullable'.
_NOTE: `avgWeighted()` here is required to trigger `LOGICAL_ERROR`_
CI: https://s3.amazonaws.com/clickhouse-test-reports/37796/e637489f81768df582fe7389e57f7ed12893087c/fuzzer_astfuzzerdebug,actions//report.html
Fixes: 02227_union_match_by_name
v2: fix untuple() (reserve space for output_columns_positions too)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-06-06 13:11:13 +00:00
|
|
|
const auto & name = required_result_columns[i];
|
2019-12-27 19:45:41 +00:00
|
|
|
if (remove_dups)
|
|
|
|
required_columns_with_duplicate_count[name] = 1;
|
|
|
|
else
|
|
|
|
++required_columns_with_duplicate_count[name];
|
Fix incorrect columns order in subqueries of UNION
Consider the following query:
SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Here is UNION from two SELECT queries
- `SELECT NULL, 255 AS x, 1 AS y`
- `SELECT y, NULL AS x, 1 AS y`
UNION queries matches columns by positions, not names, so the following
columns should be used by `avgWeighted()`:
- `255 AS x, 1 AS y`
- `NULL AS x, 1 AS y`
Result types of arguments should be:
- `x Nullable(UInt8)`
- `y UInt8`
And in case of UNION query is a subselect itself, it will return only
required columns, for the example above it needs only `x` and `y`.
For this it will get positions of these arguments from the first query,
and then use those positions to get required column names from the
second query (since there is no ability to get columns by positions
instead of names internally), and due to duplicated columns the second
query will return (`y`, `x`) not (`x`, `y`), and this will make the
result incorrect:
EXPLAIN header = 1, optimize = 0, actions=1 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Aggregates:
avgWeighted(x, y)
Function: avgWeighted(Nullable(UInt8), UInt8) → Nullable(Float64)
Arguments: x, y
Argument positions: 0, 1
Expression (Before GROUP BY)
Header: x UInt8
y Nullable(UInt8)
...
Union
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
And the query itself fails with an error:
Logical error: 'Bad cast from type DB::ColumnVector<char8_t> to DB::ColumnNullable'.
_NOTE: `avgWeighted()` here is required to trigger `LOGICAL_ERROR`_
CI: https://s3.amazonaws.com/clickhouse-test-reports/37796/e637489f81768df582fe7389e57f7ed12893087c/fuzzer_astfuzzerdebug,actions//report.html
Fixes: 02227_union_match_by_name
v2: fix untuple() (reserve space for output_columns_positions too)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-06-06 13:11:13 +00:00
|
|
|
output_columns_positions.emplace(name, i);
|
2019-12-27 19:45:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (remove_dups)
|
|
|
|
{
|
|
|
|
/// Even if we have no requirements there could be duplicates cause of asterisks. SELECT *, t.*
|
|
|
|
for (const auto & elem : elements)
|
|
|
|
required_columns_with_duplicate_count.emplace(elem->getAliasOrColumnName(), 1);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
return;
|
|
|
|
|
Fix incorrect columns order in subqueries of UNION
Consider the following query:
SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Here is UNION from two SELECT queries
- `SELECT NULL, 255 AS x, 1 AS y`
- `SELECT y, NULL AS x, 1 AS y`
UNION queries matches columns by positions, not names, so the following
columns should be used by `avgWeighted()`:
- `255 AS x, 1 AS y`
- `NULL AS x, 1 AS y`
Result types of arguments should be:
- `x Nullable(UInt8)`
- `y UInt8`
And in case of UNION query is a subselect itself, it will return only
required columns, for the example above it needs only `x` and `y`.
For this it will get positions of these arguments from the first query,
and then use those positions to get required column names from the
second query (since there is no ability to get columns by positions
instead of names internally), and due to duplicated columns the second
query will return (`y`, `x`) not (`x`, `y`), and this will make the
result incorrect:
EXPLAIN header = 1, optimize = 0, actions=1 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Aggregates:
avgWeighted(x, y)
Function: avgWeighted(Nullable(UInt8), UInt8) → Nullable(Float64)
Arguments: x, y
Argument positions: 0, 1
Expression (Before GROUP BY)
Header: x UInt8
y Nullable(UInt8)
...
Union
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
And the query itself fails with an error:
Logical error: 'Bad cast from type DB::ColumnVector<char8_t> to DB::ColumnNullable'.
_NOTE: `avgWeighted()` here is required to trigger `LOGICAL_ERROR`_
CI: https://s3.amazonaws.com/clickhouse-test-reports/37796/e637489f81768df582fe7389e57f7ed12893087c/fuzzer_astfuzzerdebug,actions//report.html
Fixes: 02227_union_match_by_name
v2: fix untuple() (reserve space for output_columns_positions too)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-06-06 13:11:13 +00:00
|
|
|
ASTs new_elements(elements.size() + output_columns_positions.size());
|
|
|
|
size_t new_elements_size = 0;
|
2019-12-27 19:45:41 +00:00
|
|
|
|
2022-04-07 05:21:24 +00:00
|
|
|
NameSet remove_columns;
|
|
|
|
|
2019-12-27 19:45:41 +00:00
|
|
|
for (const auto & elem : elements)
|
|
|
|
{
|
|
|
|
String name = elem->getAliasOrColumnName();
|
|
|
|
|
Fix incorrect columns order in subqueries of UNION
Consider the following query:
SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Here is UNION from two SELECT queries
- `SELECT NULL, 255 AS x, 1 AS y`
- `SELECT y, NULL AS x, 1 AS y`
UNION queries matches columns by positions, not names, so the following
columns should be used by `avgWeighted()`:
- `255 AS x, 1 AS y`
- `NULL AS x, 1 AS y`
Result types of arguments should be:
- `x Nullable(UInt8)`
- `y UInt8`
And in case of UNION query is a subselect itself, it will return only
required columns, for the example above it needs only `x` and `y`.
For this it will get positions of these arguments from the first query,
and then use those positions to get required column names from the
second query (since there is no ability to get columns by positions
instead of names internally), and due to duplicated columns the second
query will return (`y`, `x`) not (`x`, `y`), and this will make the
result incorrect:
EXPLAIN header = 1, optimize = 0, actions=1 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Aggregates:
avgWeighted(x, y)
Function: avgWeighted(Nullable(UInt8), UInt8) → Nullable(Float64)
Arguments: x, y
Argument positions: 0, 1
Expression (Before GROUP BY)
Header: x UInt8
y Nullable(UInt8)
...
Union
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
And the query itself fails with an error:
Logical error: 'Bad cast from type DB::ColumnVector<char8_t> to DB::ColumnNullable'.
_NOTE: `avgWeighted()` here is required to trigger `LOGICAL_ERROR`_
CI: https://s3.amazonaws.com/clickhouse-test-reports/37796/e637489f81768df582fe7389e57f7ed12893087c/fuzzer_astfuzzerdebug,actions//report.html
Fixes: 02227_union_match_by_name
v2: fix untuple() (reserve space for output_columns_positions too)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-06-06 13:11:13 +00:00
|
|
|
/// Columns that are presented in output_columns_positions should
|
|
|
|
/// appears in the same order in the new_elements, hence default
|
|
|
|
/// result_index goes after all elements of output_columns_positions
|
|
|
|
/// (it is for columns that are not located in
|
|
|
|
/// output_columns_positions, i.e. untuple())
|
|
|
|
size_t result_index = output_columns_positions.size() + new_elements_size;
|
|
|
|
|
|
|
|
/// Note, order of duplicated columns is not important here (since they
|
|
|
|
/// are the same), only order for unique columns is important, so it is
|
|
|
|
/// fine to use multimap here.
|
|
|
|
if (auto it = output_columns_positions.find(name); it != output_columns_positions.end())
|
|
|
|
{
|
|
|
|
result_index = it->second;
|
|
|
|
output_columns_positions.erase(it);
|
|
|
|
}
|
|
|
|
|
2019-12-27 19:45:41 +00:00
|
|
|
auto it = required_columns_with_duplicate_count.find(name);
|
|
|
|
if (required_columns_with_duplicate_count.end() != it && it->second)
|
|
|
|
{
|
Fix incorrect columns order in subqueries of UNION
Consider the following query:
SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Here is UNION from two SELECT queries
- `SELECT NULL, 255 AS x, 1 AS y`
- `SELECT y, NULL AS x, 1 AS y`
UNION queries matches columns by positions, not names, so the following
columns should be used by `avgWeighted()`:
- `255 AS x, 1 AS y`
- `NULL AS x, 1 AS y`
Result types of arguments should be:
- `x Nullable(UInt8)`
- `y UInt8`
And in case of UNION query is a subselect itself, it will return only
required columns, for the example above it needs only `x` and `y`.
For this it will get positions of these arguments from the first query,
and then use those positions to get required column names from the
second query (since there is no ability to get columns by positions
instead of names internally), and due to duplicated columns the second
query will return (`y`, `x`) not (`x`, `y`), and this will make the
result incorrect:
EXPLAIN header = 1, optimize = 0, actions=1 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Aggregates:
avgWeighted(x, y)
Function: avgWeighted(Nullable(UInt8), UInt8) → Nullable(Float64)
Arguments: x, y
Argument positions: 0, 1
Expression (Before GROUP BY)
Header: x UInt8
y Nullable(UInt8)
...
Union
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
And the query itself fails with an error:
Logical error: 'Bad cast from type DB::ColumnVector<char8_t> to DB::ColumnNullable'.
_NOTE: `avgWeighted()` here is required to trigger `LOGICAL_ERROR`_
CI: https://s3.amazonaws.com/clickhouse-test-reports/37796/e637489f81768df582fe7389e57f7ed12893087c/fuzzer_astfuzzerdebug,actions//report.html
Fixes: 02227_union_match_by_name
v2: fix untuple() (reserve space for output_columns_positions too)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-06-06 13:11:13 +00:00
|
|
|
new_elements[result_index] = elem;
|
2019-12-27 19:45:41 +00:00
|
|
|
--it->second;
|
Fix incorrect columns order in subqueries of UNION
Consider the following query:
SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Here is UNION from two SELECT queries
- `SELECT NULL, 255 AS x, 1 AS y`
- `SELECT y, NULL AS x, 1 AS y`
UNION queries matches columns by positions, not names, so the following
columns should be used by `avgWeighted()`:
- `255 AS x, 1 AS y`
- `NULL AS x, 1 AS y`
Result types of arguments should be:
- `x Nullable(UInt8)`
- `y UInt8`
And in case of UNION query is a subselect itself, it will return only
required columns, for the example above it needs only `x` and `y`.
For this it will get positions of these arguments from the first query,
and then use those positions to get required column names from the
second query (since there is no ability to get columns by positions
instead of names internally), and due to duplicated columns the second
query will return (`y`, `x`) not (`x`, `y`), and this will make the
result incorrect:
EXPLAIN header = 1, optimize = 0, actions=1 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Aggregates:
avgWeighted(x, y)
Function: avgWeighted(Nullable(UInt8), UInt8) → Nullable(Float64)
Arguments: x, y
Argument positions: 0, 1
Expression (Before GROUP BY)
Header: x UInt8
y Nullable(UInt8)
...
Union
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
And the query itself fails with an error:
Logical error: 'Bad cast from type DB::ColumnVector<char8_t> to DB::ColumnNullable'.
_NOTE: `avgWeighted()` here is required to trigger `LOGICAL_ERROR`_
CI: https://s3.amazonaws.com/clickhouse-test-reports/37796/e637489f81768df582fe7389e57f7ed12893087c/fuzzer_astfuzzerdebug,actions//report.html
Fixes: 02227_union_match_by_name
v2: fix untuple() (reserve space for output_columns_positions too)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-06-06 13:11:13 +00:00
|
|
|
++new_elements_size;
|
2019-12-27 19:45:41 +00:00
|
|
|
}
|
|
|
|
else if (select_query->distinct || hasArrayJoin(elem))
|
|
|
|
{
|
Fix converting types for UNION queries (may produce LOGICAL_ERROR)
CI founds [1]:
2022.02.20 15:14:23.969247 [ 492 ] {} <Fatal> BaseDaemon: (version 22.3.1.1, build id: 6082C357CFA6FF99) (from thread 472) (query_id: a5187ff9-962a-4e7c-86f6-8d48850a47d6) (query: SELECT 0., round(avgWeighted(x, y)) FROM (SELECT toDate(toDate('214748364.8', '-922337203.6854775808', '-0.1', NULL) - NULL, 10.000100135803223, '-2147483647'), 255 AS x, -2147483647 AS y UNION ALL SELECT y, NULL AS x, 2147483646 AS y)) Received signal Aborted (6)
[1]: https://s3.amazonaws.com/clickhouse-test-reports/0/26d0e5438c86e52a145aaaf4cb523c399989a878/fuzzer_astfuzzerdebug,actions//report.html
The problem is that subqueries returns different headers:
- first query -- x, y
- second query -- y, x
v2: Make order of columns strict only for UNION
https://s3.amazonaws.com/clickhouse-test-reports/34775/9cc8c01a463d18c471853568b2f0af659a4e643f/stateless_tests__address__actions__[2/2].html
Fixes: 00597_push_down_predicate_long
v3: add no-backward-compatibility-check for the test
Fixes: #37569
Resubmit: #34775
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
(cherry picked from commit a813f5996e95e424193265bb090ef7a402497d6e)
2022-02-20 19:12:25 +00:00
|
|
|
/// ARRAY JOIN cannot be optimized out since it may change number of rows,
|
|
|
|
/// so as DISTINCT.
|
Fix incorrect columns order in subqueries of UNION
Consider the following query:
SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Here is UNION from two SELECT queries
- `SELECT NULL, 255 AS x, 1 AS y`
- `SELECT y, NULL AS x, 1 AS y`
UNION queries matches columns by positions, not names, so the following
columns should be used by `avgWeighted()`:
- `255 AS x, 1 AS y`
- `NULL AS x, 1 AS y`
Result types of arguments should be:
- `x Nullable(UInt8)`
- `y UInt8`
And in case of UNION query is a subselect itself, it will return only
required columns, for the example above it needs only `x` and `y`.
For this it will get positions of these arguments from the first query,
and then use those positions to get required column names from the
second query (since there is no ability to get columns by positions
instead of names internally), and due to duplicated columns the second
query will return (`y`, `x`) not (`x`, `y`), and this will make the
result incorrect:
EXPLAIN header = 1, optimize = 0, actions=1 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Aggregates:
avgWeighted(x, y)
Function: avgWeighted(Nullable(UInt8), UInt8) → Nullable(Float64)
Arguments: x, y
Argument positions: 0, 1
Expression (Before GROUP BY)
Header: x UInt8
y Nullable(UInt8)
...
Union
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
And the query itself fails with an error:
Logical error: 'Bad cast from type DB::ColumnVector<char8_t> to DB::ColumnNullable'.
_NOTE: `avgWeighted()` here is required to trigger `LOGICAL_ERROR`_
CI: https://s3.amazonaws.com/clickhouse-test-reports/37796/e637489f81768df582fe7389e57f7ed12893087c/fuzzer_astfuzzerdebug,actions//report.html
Fixes: 02227_union_match_by_name
v2: fix untuple() (reserve space for output_columns_positions too)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-06-06 13:11:13 +00:00
|
|
|
new_elements[result_index] = elem;
|
|
|
|
++new_elements_size;
|
2019-12-27 19:45:41 +00:00
|
|
|
}
|
2020-12-22 13:48:40 +00:00
|
|
|
else
|
|
|
|
{
|
2022-04-07 05:21:24 +00:00
|
|
|
remove_columns.insert(name);
|
|
|
|
|
2020-12-22 13:48:40 +00:00
|
|
|
ASTFunction * func = elem->as<ASTFunction>();
|
2021-04-12 12:15:55 +00:00
|
|
|
|
|
|
|
/// Never remove untuple. It's result column may be in required columns.
|
2021-12-07 08:09:39 +00:00
|
|
|
/// It is not easy to analyze untuple here, because types were not calculated yet.
|
2020-12-22 13:48:40 +00:00
|
|
|
if (func && func->name == "untuple")
|
Fix incorrect columns order in subqueries of UNION
Consider the following query:
SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Here is UNION from two SELECT queries
- `SELECT NULL, 255 AS x, 1 AS y`
- `SELECT y, NULL AS x, 1 AS y`
UNION queries matches columns by positions, not names, so the following
columns should be used by `avgWeighted()`:
- `255 AS x, 1 AS y`
- `NULL AS x, 1 AS y`
Result types of arguments should be:
- `x Nullable(UInt8)`
- `y UInt8`
And in case of UNION query is a subselect itself, it will return only
required columns, for the example above it needs only `x` and `y`.
For this it will get positions of these arguments from the first query,
and then use those positions to get required column names from the
second query (since there is no ability to get columns by positions
instead of names internally), and due to duplicated columns the second
query will return (`y`, `x`) not (`x`, `y`), and this will make the
result incorrect:
EXPLAIN header = 1, optimize = 0, actions=1 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Aggregates:
avgWeighted(x, y)
Function: avgWeighted(Nullable(UInt8), UInt8) → Nullable(Float64)
Arguments: x, y
Argument positions: 0, 1
Expression (Before GROUP BY)
Header: x UInt8
y Nullable(UInt8)
...
Union
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
And the query itself fails with an error:
Logical error: 'Bad cast from type DB::ColumnVector<char8_t> to DB::ColumnNullable'.
_NOTE: `avgWeighted()` here is required to trigger `LOGICAL_ERROR`_
CI: https://s3.amazonaws.com/clickhouse-test-reports/37796/e637489f81768df582fe7389e57f7ed12893087c/fuzzer_astfuzzerdebug,actions//report.html
Fixes: 02227_union_match_by_name
v2: fix untuple() (reserve space for output_columns_positions too)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-06-06 13:11:13 +00:00
|
|
|
{
|
|
|
|
new_elements[result_index] = elem;
|
|
|
|
++new_elements_size;
|
|
|
|
}
|
2021-12-07 08:09:39 +00:00
|
|
|
/// removing aggregation can change number of rows, so `count()` result in outer sub-query would be wrong
|
2022-09-01 09:10:32 +00:00
|
|
|
if (func && !select_query->groupBy())
|
Fix incorrect columns order in subqueries of UNION
Consider the following query:
SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Here is UNION from two SELECT queries
- `SELECT NULL, 255 AS x, 1 AS y`
- `SELECT y, NULL AS x, 1 AS y`
UNION queries matches columns by positions, not names, so the following
columns should be used by `avgWeighted()`:
- `255 AS x, 1 AS y`
- `NULL AS x, 1 AS y`
Result types of arguments should be:
- `x Nullable(UInt8)`
- `y UInt8`
And in case of UNION query is a subselect itself, it will return only
required columns, for the example above it needs only `x` and `y`.
For this it will get positions of these arguments from the first query,
and then use those positions to get required column names from the
second query (since there is no ability to get columns by positions
instead of names internally), and due to duplicated columns the second
query will return (`y`, `x`) not (`x`, `y`), and this will make the
result incorrect:
EXPLAIN header = 1, optimize = 0, actions=1 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Aggregates:
avgWeighted(x, y)
Function: avgWeighted(Nullable(UInt8), UInt8) → Nullable(Float64)
Arguments: x, y
Argument positions: 0, 1
Expression (Before GROUP BY)
Header: x UInt8
y Nullable(UInt8)
...
Union
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
And the query itself fails with an error:
Logical error: 'Bad cast from type DB::ColumnVector<char8_t> to DB::ColumnNullable'.
_NOTE: `avgWeighted()` here is required to trigger `LOGICAL_ERROR`_
CI: https://s3.amazonaws.com/clickhouse-test-reports/37796/e637489f81768df582fe7389e57f7ed12893087c/fuzzer_astfuzzerdebug,actions//report.html
Fixes: 02227_union_match_by_name
v2: fix untuple() (reserve space for output_columns_positions too)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-06-06 13:11:13 +00:00
|
|
|
{
|
2022-09-01 09:10:32 +00:00
|
|
|
GetAggregatesVisitor::Data data = {};
|
|
|
|
GetAggregatesVisitor(data).visit(elem);
|
|
|
|
if (!data.aggregates.empty())
|
|
|
|
{
|
|
|
|
new_elements[result_index] = elem;
|
|
|
|
++new_elements_size;
|
|
|
|
}
|
Fix incorrect columns order in subqueries of UNION
Consider the following query:
SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Here is UNION from two SELECT queries
- `SELECT NULL, 255 AS x, 1 AS y`
- `SELECT y, NULL AS x, 1 AS y`
UNION queries matches columns by positions, not names, so the following
columns should be used by `avgWeighted()`:
- `255 AS x, 1 AS y`
- `NULL AS x, 1 AS y`
Result types of arguments should be:
- `x Nullable(UInt8)`
- `y UInt8`
And in case of UNION query is a subselect itself, it will return only
required columns, for the example above it needs only `x` and `y`.
For this it will get positions of these arguments from the first query,
and then use those positions to get required column names from the
second query (since there is no ability to get columns by positions
instead of names internally), and due to duplicated columns the second
query will return (`y`, `x`) not (`x`, `y`), and this will make the
result incorrect:
EXPLAIN header = 1, optimize = 0, actions=1 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Aggregates:
avgWeighted(x, y)
Function: avgWeighted(Nullable(UInt8), UInt8) → Nullable(Float64)
Arguments: x, y
Argument positions: 0, 1
Expression (Before GROUP BY)
Header: x UInt8
y Nullable(UInt8)
...
Union
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
And the query itself fails with an error:
Logical error: 'Bad cast from type DB::ColumnVector<char8_t> to DB::ColumnNullable'.
_NOTE: `avgWeighted()` here is required to trigger `LOGICAL_ERROR`_
CI: https://s3.amazonaws.com/clickhouse-test-reports/37796/e637489f81768df582fe7389e57f7ed12893087c/fuzzer_astfuzzerdebug,actions//report.html
Fixes: 02227_union_match_by_name
v2: fix untuple() (reserve space for output_columns_positions too)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-06-06 13:11:13 +00:00
|
|
|
}
|
2020-12-22 13:48:40 +00:00
|
|
|
}
|
2019-12-27 19:45:41 +00:00
|
|
|
}
|
Fix incorrect columns order in subqueries of UNION
Consider the following query:
SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Here is UNION from two SELECT queries
- `SELECT NULL, 255 AS x, 1 AS y`
- `SELECT y, NULL AS x, 1 AS y`
UNION queries matches columns by positions, not names, so the following
columns should be used by `avgWeighted()`:
- `255 AS x, 1 AS y`
- `NULL AS x, 1 AS y`
Result types of arguments should be:
- `x Nullable(UInt8)`
- `y UInt8`
And in case of UNION query is a subselect itself, it will return only
required columns, for the example above it needs only `x` and `y`.
For this it will get positions of these arguments from the first query,
and then use those positions to get required column names from the
second query (since there is no ability to get columns by positions
instead of names internally), and due to duplicated columns the second
query will return (`y`, `x`) not (`x`, `y`), and this will make the
result incorrect:
EXPLAIN header = 1, optimize = 0, actions=1 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y)
Aggregates:
avgWeighted(x, y)
Function: avgWeighted(Nullable(UInt8), UInt8) → Nullable(Float64)
Arguments: x, y
Argument positions: 0, 1
Expression (Before GROUP BY)
Header: x UInt8
y Nullable(UInt8)
...
Union
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
Expression (Conversion before UNION)
Header: x UInt8
y Nullable(UInt8)
And the query itself fails with an error:
Logical error: 'Bad cast from type DB::ColumnVector<char8_t> to DB::ColumnNullable'.
_NOTE: `avgWeighted()` here is required to trigger `LOGICAL_ERROR`_
CI: https://s3.amazonaws.com/clickhouse-test-reports/37796/e637489f81768df582fe7389e57f7ed12893087c/fuzzer_astfuzzerdebug,actions//report.html
Fixes: 02227_union_match_by_name
v2: fix untuple() (reserve space for output_columns_positions too)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-06-06 13:11:13 +00:00
|
|
|
/// Remove empty nodes.
|
|
|
|
std::erase(new_elements, ASTPtr{});
|
2019-12-27 19:45:41 +00:00
|
|
|
|
2022-04-07 05:21:24 +00:00
|
|
|
if (select_query->interpolate())
|
|
|
|
{
|
|
|
|
auto & children = select_query->interpolate()->children;
|
|
|
|
if (!children.empty())
|
|
|
|
{
|
|
|
|
for (auto it = children.begin(); it != children.end();)
|
|
|
|
{
|
2022-04-18 10:18:43 +00:00
|
|
|
if (remove_columns.contains((*it)->as<ASTInterpolateElement>()->column))
|
2022-04-07 05:21:24 +00:00
|
|
|
it = select_query->interpolate()->children.erase(it);
|
|
|
|
else
|
|
|
|
++it;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (children.empty())
|
|
|
|
select_query->setExpression(ASTSelectQuery::Expression::INTERPOLATE, nullptr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-27 19:45:41 +00:00
|
|
|
elements = std::move(new_elements);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Replacing scalar subqueries with constant values.
|
2022-01-17 18:32:55 +00:00
|
|
|
void executeScalarSubqueries(
|
|
|
|
ASTPtr & query, ContextPtr context, size_t subquery_depth, Scalars & scalars, Scalars & local_scalars, bool only_analyze)
|
2019-12-27 19:45:41 +00:00
|
|
|
{
|
|
|
|
LogAST log;
|
2022-01-17 18:32:55 +00:00
|
|
|
ExecuteScalarSubqueriesVisitor::Data visitor_data{WithContext{context}, subquery_depth, scalars, local_scalars, only_analyze};
|
2019-12-27 19:45:41 +00:00
|
|
|
ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(query);
|
|
|
|
}
|
|
|
|
|
2020-07-22 17:13:05 +00:00
|
|
|
void getArrayJoinedColumns(ASTPtr & query, TreeRewriterResult & result, const ASTSelectQuery * select_query,
|
2019-12-27 19:45:41 +00:00
|
|
|
const NamesAndTypesList & source_columns, const NameSet & source_columns_set)
|
|
|
|
{
|
2021-06-28 09:53:54 +00:00
|
|
|
if (!select_query->arrayJoinExpressionList().first)
|
2021-06-26 14:27:44 +00:00
|
|
|
return;
|
|
|
|
|
|
|
|
ArrayJoinedColumnsVisitor::Data visitor_data{
|
|
|
|
result.aliases, result.array_join_name_to_alias, result.array_join_alias_to_name, result.array_join_result_to_source};
|
|
|
|
ArrayJoinedColumnsVisitor(visitor_data).visit(query);
|
|
|
|
|
|
|
|
/// If the result of ARRAY JOIN is not used, it is necessary to ARRAY-JOIN any column,
|
|
|
|
/// to get the correct number of rows.
|
|
|
|
if (result.array_join_result_to_source.empty())
|
2019-12-27 19:45:41 +00:00
|
|
|
{
|
2021-06-28 09:53:54 +00:00
|
|
|
if (select_query->arrayJoinExpressionList().first->children.empty())
|
2021-06-26 14:27:44 +00:00
|
|
|
throw DB::Exception("ARRAY JOIN requires an argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
2021-02-09 08:21:26 +00:00
|
|
|
|
2021-06-28 09:53:54 +00:00
|
|
|
ASTPtr expr = select_query->arrayJoinExpressionList().first->children.at(0);
|
2021-06-26 14:27:44 +00:00
|
|
|
String source_name = expr->getColumnName();
|
|
|
|
String result_name = expr->getAliasOrColumnName();
|
2019-12-27 19:45:41 +00:00
|
|
|
|
2021-06-26 14:27:44 +00:00
|
|
|
/// This is an array.
|
2022-04-18 10:18:43 +00:00
|
|
|
if (!expr->as<ASTIdentifier>() || source_columns_set.contains(source_name))
|
2021-06-26 14:27:44 +00:00
|
|
|
{
|
|
|
|
result.array_join_result_to_source[result_name] = source_name;
|
|
|
|
}
|
|
|
|
else /// This is a nested table.
|
|
|
|
{
|
|
|
|
bool found = false;
|
|
|
|
for (const auto & column : source_columns)
|
2019-12-27 19:45:41 +00:00
|
|
|
{
|
2021-11-09 12:36:25 +00:00
|
|
|
auto split = Nested::splitName(column.name, /*reverse=*/ true);
|
2021-06-26 14:27:44 +00:00
|
|
|
if (split.first == source_name && !split.second.empty())
|
2019-12-27 19:45:41 +00:00
|
|
|
{
|
2021-06-26 14:27:44 +00:00
|
|
|
result.array_join_result_to_source[Nested::concatenateName(result_name, split.second)] = column.name;
|
|
|
|
found = true;
|
|
|
|
break;
|
2019-12-27 19:45:41 +00:00
|
|
|
}
|
|
|
|
}
|
2021-06-26 14:27:44 +00:00
|
|
|
if (!found)
|
|
|
|
throw Exception("No columns in nested table " + source_name, ErrorCodes::EMPTY_NESTED_TABLE);
|
2019-12-27 19:45:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_default_strictness, bool old_any, ASTTableJoin & out_table_join)
|
|
|
|
{
|
|
|
|
const ASTTablesInSelectQueryElement * node = select_query.join();
|
|
|
|
if (!node)
|
|
|
|
return;
|
|
|
|
|
|
|
|
auto & table_join = const_cast<ASTTablesInSelectQueryElement *>(node)->table_join->as<ASTTableJoin &>();
|
|
|
|
|
2022-07-29 16:30:50 +00:00
|
|
|
if (table_join.strictness == JoinStrictness::Unspecified &&
|
|
|
|
table_join.kind != JoinKind::Cross)
|
2019-12-27 19:45:41 +00:00
|
|
|
{
|
2022-07-29 16:30:50 +00:00
|
|
|
if (join_default_strictness == JoinStrictness::Any)
|
|
|
|
table_join.strictness = JoinStrictness::Any;
|
|
|
|
else if (join_default_strictness == JoinStrictness::All)
|
|
|
|
table_join.strictness = JoinStrictness::All;
|
2019-12-27 19:45:41 +00:00
|
|
|
else
|
|
|
|
throw Exception("Expected ANY or ALL in JOIN section, because setting (join_default_strictness) is empty",
|
|
|
|
DB::ErrorCodes::EXPECTED_ALL_OR_ANY);
|
|
|
|
}
|
|
|
|
|
2020-02-02 14:07:50 +00:00
|
|
|
if (old_any)
|
|
|
|
{
|
2022-07-29 16:30:50 +00:00
|
|
|
if (table_join.strictness == JoinStrictness::Any &&
|
|
|
|
table_join.kind == JoinKind::Inner)
|
2020-02-02 14:07:50 +00:00
|
|
|
{
|
2022-07-29 16:30:50 +00:00
|
|
|
table_join.strictness = JoinStrictness::Semi;
|
|
|
|
table_join.kind = JoinKind::Left;
|
2020-02-02 14:07:50 +00:00
|
|
|
}
|
|
|
|
|
2022-07-29 16:30:50 +00:00
|
|
|
if (table_join.strictness == JoinStrictness::Any)
|
|
|
|
table_join.strictness = JoinStrictness::RightAny;
|
2020-02-02 14:07:50 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2022-07-29 16:30:50 +00:00
|
|
|
if (table_join.strictness == JoinStrictness::Any && table_join.kind == JoinKind::Full)
|
2022-04-25 21:29:23 +00:00
|
|
|
throw Exception("ANY FULL JOINs are not implemented", ErrorCodes::NOT_IMPLEMENTED);
|
2020-02-02 14:07:50 +00:00
|
|
|
}
|
2019-12-27 19:45:41 +00:00
|
|
|
|
|
|
|
out_table_join = table_join;
|
|
|
|
}
|
|
|
|
|
2021-11-08 12:44:13 +00:00
|
|
|
/// Evaluate expression and return boolean value if it can be interpreted as bool.
|
|
|
|
/// Only UInt8 or NULL are allowed.
|
|
|
|
/// Returns `false` for 0 or NULL values, `true` for any non-negative value.
|
|
|
|
std::optional<bool> tryEvaluateConstCondition(ASTPtr expr, ContextPtr context)
|
|
|
|
{
|
|
|
|
if (!expr)
|
|
|
|
return {};
|
|
|
|
|
|
|
|
Field eval_res;
|
|
|
|
DataTypePtr eval_res_type;
|
|
|
|
try
|
|
|
|
{
|
|
|
|
std::tie(eval_res, eval_res_type) = evaluateConstantExpression(expr, context);
|
|
|
|
}
|
|
|
|
catch (DB::Exception &)
|
|
|
|
{
|
|
|
|
/// not a constant expression
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
/// UInt8, maybe Nullable, maybe LowCardinality, and NULL are allowed
|
|
|
|
eval_res_type = removeNullable(removeLowCardinality(eval_res_type));
|
|
|
|
if (auto which = WhichDataType(eval_res_type); !which.isUInt8() && !which.isNothing())
|
|
|
|
return {};
|
|
|
|
|
|
|
|
if (eval_res.isNull())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
UInt8 res = eval_res.template safeGet<UInt8>();
|
|
|
|
return res > 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool tryJoinOnConst(TableJoin & analyzed_join, ASTPtr & on_expression, ContextPtr context)
|
|
|
|
{
|
|
|
|
bool join_on_value;
|
|
|
|
if (auto eval_const_res = tryEvaluateConstCondition(on_expression, context))
|
|
|
|
join_on_value = *eval_const_res;
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
|
2022-07-15 14:57:58 +00:00
|
|
|
if (!analyzed_join.isEnabledAlgorithm(JoinAlgorithm::HASH))
|
2021-11-08 12:44:13 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
|
|
|
|
"JOIN ON constant ({}) supported only with join algorithm 'hash'",
|
|
|
|
queryToString(on_expression));
|
|
|
|
|
|
|
|
on_expression = nullptr;
|
|
|
|
if (join_on_value)
|
|
|
|
{
|
|
|
|
LOG_DEBUG(&Poco::Logger::get("TreeRewriter"), "Join on constant executed as cross join");
|
|
|
|
analyzed_join.resetToCross();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
LOG_DEBUG(&Poco::Logger::get("TreeRewriter"), "Join on constant executed as empty join");
|
|
|
|
analyzed_join.resetKeys();
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-12-27 19:45:41 +00:00
|
|
|
/// Find the columns that are obtained by JOIN.
|
2021-11-08 12:44:13 +00:00
|
|
|
void collectJoinedColumns(TableJoin & analyzed_join, ASTTableJoin & table_join,
|
|
|
|
const TablesWithColumns & tables, const Aliases & aliases, ContextPtr context)
|
2019-12-27 19:45:41 +00:00
|
|
|
{
|
2021-06-25 10:46:19 +00:00
|
|
|
assert(tables.size() >= 2);
|
2019-12-27 19:45:41 +00:00
|
|
|
|
|
|
|
if (table_join.using_expression_list)
|
|
|
|
{
|
|
|
|
const auto & keys = table_join.using_expression_list->as<ASTExpressionList &>();
|
2021-09-24 16:21:05 +00:00
|
|
|
|
|
|
|
analyzed_join.addDisjunct();
|
2019-12-27 19:45:41 +00:00
|
|
|
for (const auto & key : keys.children)
|
|
|
|
analyzed_join.addUsingKey(key);
|
|
|
|
}
|
|
|
|
else if (table_join.on_expression)
|
|
|
|
{
|
2022-07-29 16:30:50 +00:00
|
|
|
bool is_asof = (table_join.strictness == JoinStrictness::Asof);
|
2019-12-27 19:45:41 +00:00
|
|
|
|
2021-04-07 11:57:20 +00:00
|
|
|
CollectJoinOnKeysVisitor::Data data{analyzed_join, tables[0], tables[1], aliases, is_asof};
|
2021-09-24 16:21:05 +00:00
|
|
|
if (auto * or_func = table_join.on_expression->as<ASTFunction>(); or_func && or_func->name == "or")
|
2021-07-21 17:03:33 +00:00
|
|
|
{
|
2021-09-24 16:21:05 +00:00
|
|
|
for (auto & disjunct : or_func->arguments->children)
|
2021-06-25 12:03:10 +00:00
|
|
|
{
|
2021-09-24 16:21:05 +00:00
|
|
|
analyzed_join.addDisjunct();
|
|
|
|
CollectJoinOnKeysVisitor(data).visit(disjunct);
|
2021-06-25 12:03:10 +00:00
|
|
|
}
|
2021-09-24 16:21:05 +00:00
|
|
|
assert(analyzed_join.getClauses().size() == or_func->arguments->children.size());
|
|
|
|
}
|
|
|
|
else
|
2021-07-21 17:03:33 +00:00
|
|
|
{
|
2021-09-24 16:21:05 +00:00
|
|
|
analyzed_join.addDisjunct();
|
|
|
|
CollectJoinOnKeysVisitor(data).visit(table_join.on_expression);
|
|
|
|
assert(analyzed_join.oneDisjunct());
|
2021-07-21 17:03:33 +00:00
|
|
|
}
|
|
|
|
|
2021-11-08 12:44:13 +00:00
|
|
|
auto check_keys_empty = [] (auto e) { return e.key_names_left.empty(); };
|
2021-09-24 16:21:05 +00:00
|
|
|
|
2021-11-08 12:44:13 +00:00
|
|
|
/// All clauses should to have keys or be empty simultaneously
|
|
|
|
bool all_keys_empty = std::all_of(analyzed_join.getClauses().begin(), analyzed_join.getClauses().end(), check_keys_empty);
|
|
|
|
if (all_keys_empty)
|
|
|
|
{
|
|
|
|
/// Try join on constant (cross or empty join) or fail
|
|
|
|
if (is_asof)
|
|
|
|
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
|
|
|
|
"Cannot get JOIN keys from JOIN ON section: {}", queryToString(table_join.on_expression));
|
|
|
|
|
|
|
|
bool join_on_const_ok = tryJoinOnConst(analyzed_join, table_join.on_expression, context);
|
|
|
|
if (!join_on_const_ok)
|
|
|
|
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
|
|
|
|
"Cannot get JOIN keys from JOIN ON section: {}", queryToString(table_join.on_expression));
|
|
|
|
}
|
|
|
|
else
|
2021-09-24 16:21:05 +00:00
|
|
|
{
|
2021-11-08 12:44:13 +00:00
|
|
|
bool any_keys_empty = std::any_of(analyzed_join.getClauses().begin(), analyzed_join.getClauses().end(), check_keys_empty);
|
|
|
|
|
|
|
|
if (any_keys_empty)
|
2021-09-24 16:21:05 +00:00
|
|
|
throw DB::Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
|
|
|
|
"Cannot get JOIN keys from JOIN ON section: '{}'",
|
|
|
|
queryToString(table_join.on_expression));
|
2021-07-21 17:03:33 +00:00
|
|
|
|
2021-11-08 12:44:13 +00:00
|
|
|
if (is_asof)
|
|
|
|
{
|
|
|
|
if (!analyzed_join.oneDisjunct())
|
|
|
|
throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "ASOF join doesn't support multiple ORs for keys in JOIN ON section");
|
|
|
|
data.asofToJoinKeys();
|
|
|
|
}
|
2021-09-13 09:34:34 +00:00
|
|
|
|
2022-07-15 14:57:58 +00:00
|
|
|
if (!analyzed_join.oneDisjunct() && !analyzed_join.isEnabledAlgorithm(JoinAlgorithm::HASH))
|
2021-11-08 12:44:13 +00:00
|
|
|
throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "Only `hash` join supports multiple ORs for keys in JOIN ON section");
|
|
|
|
}
|
2019-12-27 19:45:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-19 00:00:09 +00:00
|
|
|
|
2022-10-20 10:03:00 +00:00
|
|
|
std::pair<bool, UInt64> recursivelyCollectMaxOrdinaryExpressionsFromFunctionArgs(const ASTFunction & function, ASTExpressionList & into)
|
2022-10-12 16:50:56 +00:00
|
|
|
{
|
2022-10-19 00:00:09 +00:00
|
|
|
checkStackSize();
|
2022-10-19 04:15:43 +00:00
|
|
|
|
2022-10-20 10:03:00 +00:00
|
|
|
if (AggregateUtils::isAggregateFunction(function))
|
|
|
|
return {true, 0};
|
2022-10-19 04:15:43 +00:00
|
|
|
|
2022-10-20 10:03:00 +00:00
|
|
|
UInt64 pushed_children = 0;
|
|
|
|
bool has_aggregate = false;
|
2022-10-19 04:15:43 +00:00
|
|
|
|
2022-10-20 10:03:00 +00:00
|
|
|
for (const auto & child : function.arguments->children)
|
2022-10-12 16:50:56 +00:00
|
|
|
{
|
2022-10-19 00:00:09 +00:00
|
|
|
if (child->as<ASTIdentifier>())
|
2022-10-20 10:03:00 +00:00
|
|
|
{
|
2022-10-19 00:00:09 +00:00
|
|
|
into.children.push_back(child);
|
2022-10-20 10:03:00 +00:00
|
|
|
pushed_children++;
|
|
|
|
}
|
|
|
|
else if (child->as<ASTFunction>())
|
|
|
|
{
|
|
|
|
const auto * child_func = child->as<ASTFunction>();
|
|
|
|
|
|
|
|
auto [child_has_aggregate, child_pushed_children] = recursivelyCollectMaxOrdinaryExpressionsFromFunctionArgs(*child_func, into);
|
|
|
|
|
|
|
|
/// The current function is not aggregate function and there is no aggregate function in its arguments,
|
|
|
|
/// so use the current function to replace its children
|
|
|
|
if (!child_has_aggregate)
|
|
|
|
{
|
|
|
|
for (UInt64 i = 0; i < child_pushed_children; i++)
|
|
|
|
{
|
|
|
|
into.children.pop_back();
|
|
|
|
}
|
|
|
|
into.children.push_back(child);
|
|
|
|
pushed_children = pushed_children - child_pushed_children + 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
has_aggregate = true;
|
|
|
|
pushed_children += child_pushed_children;
|
|
|
|
}
|
|
|
|
}
|
2022-10-12 16:50:56 +00:00
|
|
|
}
|
2022-10-20 10:03:00 +00:00
|
|
|
|
|
|
|
return {has_aggregate, pushed_children};
|
2022-10-12 16:50:56 +00:00
|
|
|
}
|
|
|
|
|
2022-10-20 10:03:00 +00:00
|
|
|
/// Expand GROUP BY ALL
|
2022-10-19 04:15:43 +00:00
|
|
|
void expandGroupByAll(ASTSelectQuery * select_query)
|
2022-10-12 16:50:56 +00:00
|
|
|
{
|
|
|
|
auto group_expression_list = std::make_shared<ASTExpressionList>();
|
|
|
|
|
2022-10-19 00:00:09 +00:00
|
|
|
for (const auto & expr : select_query->select()->children)
|
2022-10-12 16:50:56 +00:00
|
|
|
{
|
|
|
|
if (expr->as<ASTIdentifier>())
|
|
|
|
group_expression_list->children.push_back(expr);
|
2022-10-20 10:03:00 +00:00
|
|
|
else if (expr->as<ASTFunction>())
|
|
|
|
{
|
|
|
|
auto [has_aggregate, pushed_children] = recursivelyCollectMaxOrdinaryExpressionsFromFunctionArgs(*expr->as<ASTFunction>(), *group_expression_list);
|
|
|
|
|
|
|
|
/// The current function is not aggregate function and there is no aggregate function in its arguments,
|
|
|
|
/// so use the current function to replace its children
|
|
|
|
if (!has_aggregate)
|
|
|
|
{
|
|
|
|
for (UInt64 i = 0; i < pushed_children; i++)
|
|
|
|
{
|
|
|
|
group_expression_list->children.pop_back();
|
|
|
|
}
|
|
|
|
group_expression_list->children.push_back(expr);
|
|
|
|
}
|
|
|
|
}
|
2022-10-12 16:50:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, group_expression_list);
|
|
|
|
}
|
|
|
|
|
2020-02-27 15:06:04 +00:00
|
|
|
std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
|
2019-12-27 19:45:41 +00:00
|
|
|
{
|
2020-02-27 15:06:04 +00:00
|
|
|
/// There can not be aggregate functions inside the WHERE and PREWHERE.
|
|
|
|
if (select_query.where())
|
|
|
|
assertNoAggregates(select_query.where(), "in WHERE");
|
|
|
|
if (select_query.prewhere())
|
|
|
|
assertNoAggregates(select_query.prewhere(), "in PREWHERE");
|
|
|
|
|
|
|
|
GetAggregatesVisitor::Data data;
|
|
|
|
GetAggregatesVisitor(data).visit(query);
|
|
|
|
|
|
|
|
/// There can not be other aggregate functions within the aggregate functions.
|
|
|
|
for (const ASTFunction * node : data.aggregates)
|
2020-12-09 11:14:40 +00:00
|
|
|
{
|
2020-12-04 02:15:44 +00:00
|
|
|
if (node->arguments)
|
2020-12-09 11:14:40 +00:00
|
|
|
{
|
2020-12-04 02:15:44 +00:00
|
|
|
for (auto & arg : node->arguments->children)
|
2020-12-10 19:12:41 +00:00
|
|
|
{
|
2020-12-04 02:15:44 +00:00
|
|
|
assertNoAggregates(arg, "inside another aggregate function");
|
2020-12-24 08:49:55 +00:00
|
|
|
// We also can't have window functions inside aggregate functions,
|
|
|
|
// because the window functions are calculated later.
|
2020-12-18 17:13:28 +00:00
|
|
|
assertNoWindows(arg, "inside an aggregate function");
|
2020-12-10 19:12:41 +00:00
|
|
|
}
|
2020-12-09 11:14:40 +00:00
|
|
|
}
|
|
|
|
}
|
2020-02-27 15:06:04 +00:00
|
|
|
return data.aggregates;
|
2019-12-27 19:45:41 +00:00
|
|
|
}
|
|
|
|
|
2020-12-09 11:14:40 +00:00
|
|
|
std::vector<const ASTFunction *> getWindowFunctions(ASTPtr & query, const ASTSelectQuery & select_query)
|
|
|
|
{
|
2020-12-24 08:49:55 +00:00
|
|
|
/// There can not be window functions inside the WHERE, PREWHERE and HAVING
|
|
|
|
if (select_query.having())
|
|
|
|
assertNoWindows(select_query.having(), "in HAVING");
|
2020-12-09 11:14:40 +00:00
|
|
|
if (select_query.where())
|
|
|
|
assertNoWindows(select_query.where(), "in WHERE");
|
|
|
|
if (select_query.prewhere())
|
|
|
|
assertNoWindows(select_query.prewhere(), "in PREWHERE");
|
2021-01-13 19:29:52 +00:00
|
|
|
if (select_query.window())
|
|
|
|
assertNoWindows(select_query.window(), "in WINDOW");
|
2020-12-09 11:14:40 +00:00
|
|
|
|
|
|
|
GetAggregatesVisitor::Data data;
|
|
|
|
GetAggregatesVisitor(data).visit(query);
|
|
|
|
|
2020-12-24 08:49:55 +00:00
|
|
|
/// Window functions cannot be inside aggregates or other window functions.
|
|
|
|
/// Aggregate functions can be inside window functions because they are
|
|
|
|
/// calculated earlier.
|
2020-12-09 11:14:40 +00:00
|
|
|
for (const ASTFunction * node : data.window_functions)
|
|
|
|
{
|
2020-12-10 19:12:41 +00:00
|
|
|
if (node->arguments)
|
2020-12-09 11:14:40 +00:00
|
|
|
{
|
2020-12-10 19:12:41 +00:00
|
|
|
for (auto & arg : node->arguments->children)
|
|
|
|
{
|
|
|
|
assertNoWindows(arg, "inside another window function");
|
|
|
|
}
|
2020-12-09 11:14:40 +00:00
|
|
|
}
|
2020-12-24 08:49:55 +00:00
|
|
|
|
2021-01-13 19:29:52 +00:00
|
|
|
if (node->window_definition)
|
2020-12-24 08:49:55 +00:00
|
|
|
{
|
2021-01-13 19:29:52 +00:00
|
|
|
assertNoWindows(node->window_definition, "inside window definition");
|
2020-12-24 08:49:55 +00:00
|
|
|
}
|
2020-12-09 11:14:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return data.window_functions;
|
|
|
|
}
|
|
|
|
|
2021-08-03 18:03:24 +00:00
|
|
|
class MarkTupleLiteralsAsLegacyData
|
|
|
|
{
|
|
|
|
public:
|
2022-06-27 12:13:21 +00:00
|
|
|
struct Data
|
|
|
|
{
|
|
|
|
};
|
2021-08-03 18:03:24 +00:00
|
|
|
|
2022-06-27 12:13:21 +00:00
|
|
|
static void visitLiteral(ASTLiteral & literal, ASTPtr &)
|
2021-08-03 18:03:24 +00:00
|
|
|
{
|
|
|
|
if (literal.value.getType() == Field::Types::Tuple)
|
|
|
|
literal.use_legacy_column_name_of_tuple = true;
|
|
|
|
}
|
2022-06-27 12:13:21 +00:00
|
|
|
static void visitFunction(ASTFunction & func, ASTPtr &ast)
|
|
|
|
{
|
|
|
|
if (func.name == "tuple" && func.arguments && !func.arguments->children.empty())
|
|
|
|
{
|
|
|
|
// re-write tuple() function as literal
|
|
|
|
if (auto literal = func.toLiteral())
|
|
|
|
{
|
|
|
|
ast = literal;
|
|
|
|
visitLiteral(*typeid_cast<ASTLiteral *>(ast.get()), ast);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void visit(ASTPtr & ast, Data &)
|
|
|
|
{
|
|
|
|
if (auto * identifier = typeid_cast<ASTFunction *>(ast.get()))
|
|
|
|
visitFunction(*identifier, ast);
|
|
|
|
if (auto * identifier = typeid_cast<ASTLiteral *>(ast.get()))
|
|
|
|
visitLiteral(*identifier, ast);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool needChildVisit(const ASTPtr & /*parent*/, const ASTPtr & /*child*/)
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
2021-08-03 18:03:24 +00:00
|
|
|
};
|
|
|
|
|
2022-06-27 12:13:21 +00:00
|
|
|
using MarkTupleLiteralsAsLegacyVisitor = InDepthNodeVisitor<MarkTupleLiteralsAsLegacyData, true>;
|
2021-08-03 18:03:24 +00:00
|
|
|
|
|
|
|
void markTupleLiteralsAsLegacy(ASTPtr & query)
|
|
|
|
{
|
|
|
|
MarkTupleLiteralsAsLegacyVisitor::Data data;
|
|
|
|
MarkTupleLiteralsAsLegacyVisitor(data).visit(query);
|
|
|
|
}
|
|
|
|
|
2022-01-10 18:21:24 +00:00
|
|
|
/// Rewrite _shard_num -> shardNum() AS _shard_num
|
|
|
|
struct RewriteShardNum
|
|
|
|
{
|
|
|
|
struct Data
|
|
|
|
{
|
|
|
|
};
|
|
|
|
|
|
|
|
static bool needChildVisit(const ASTPtr & parent, const ASTPtr & /*child*/)
|
|
|
|
{
|
|
|
|
/// ON section should not be rewritten.
|
|
|
|
return typeid_cast<ASTTableJoin *>(parent.get()) == nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void visit(ASTPtr & ast, Data &)
|
|
|
|
{
|
|
|
|
if (auto * identifier = typeid_cast<ASTIdentifier *>(ast.get()))
|
|
|
|
visit(*identifier, ast);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void visit(ASTIdentifier & identifier, ASTPtr & ast)
|
|
|
|
{
|
|
|
|
if (identifier.shortName() != "_shard_num")
|
|
|
|
return;
|
|
|
|
|
|
|
|
String alias = identifier.tryGetAlias();
|
|
|
|
if (alias.empty())
|
|
|
|
alias = "_shard_num";
|
|
|
|
ast = makeASTFunction("shardNum");
|
|
|
|
ast->setAlias(alias);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
using RewriteShardNumVisitor = InDepthNodeVisitor<RewriteShardNum, true>;
|
|
|
|
|
2019-12-27 19:45:41 +00:00
|
|
|
}
|
|
|
|
|
2020-11-13 15:56:25 +00:00
|
|
|
TreeRewriterResult::TreeRewriterResult(
|
|
|
|
const NamesAndTypesList & source_columns_,
|
|
|
|
ConstStoragePtr storage_,
|
2021-07-09 03:15:41 +00:00
|
|
|
const StorageSnapshotPtr & storage_snapshot_,
|
2020-11-13 15:56:25 +00:00
|
|
|
bool add_special)
|
|
|
|
: storage(storage_)
|
2021-07-09 03:15:41 +00:00
|
|
|
, storage_snapshot(storage_snapshot_)
|
2020-11-13 15:56:25 +00:00
|
|
|
, source_columns(source_columns_)
|
|
|
|
{
|
|
|
|
collectSourceColumns(add_special);
|
|
|
|
is_remote_storage = storage && storage->isRemote();
|
|
|
|
}
|
|
|
|
|
2020-03-03 14:25:45 +00:00
|
|
|
/// Add columns from storage to source_columns list. Deduplicate resulted list.
|
2020-04-29 12:15:23 +00:00
|
|
|
/// Special columns are non physical columns, for example ALIAS
|
2020-07-22 17:13:05 +00:00
|
|
|
void TreeRewriterResult::collectSourceColumns(bool add_special)
|
2020-03-03 14:25:45 +00:00
|
|
|
{
|
|
|
|
if (storage)
|
|
|
|
{
|
2021-05-04 23:02:54 +00:00
|
|
|
auto options = GetColumnsOptions(add_special ? GetColumnsOptions::All : GetColumnsOptions::AllPhysical);
|
|
|
|
options.withExtendedObjects();
|
2020-12-22 16:40:53 +00:00
|
|
|
if (storage->supportsSubcolumns())
|
2021-05-04 23:02:54 +00:00
|
|
|
options.withSubcolumns();
|
2021-04-24 04:09:01 +00:00
|
|
|
|
2021-07-09 03:15:41 +00:00
|
|
|
auto columns_from_storage = storage_snapshot->getColumns(options);
|
2021-04-24 04:09:01 +00:00
|
|
|
|
2020-03-03 14:25:45 +00:00
|
|
|
if (source_columns.empty())
|
|
|
|
source_columns.swap(columns_from_storage);
|
|
|
|
else
|
|
|
|
source_columns.insert(source_columns.end(), columns_from_storage.begin(), columns_from_storage.end());
|
|
|
|
}
|
|
|
|
|
|
|
|
source_columns_set = removeDuplicateColumns(source_columns);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-12-27 19:45:41 +00:00
|
|
|
/// Calculate which columns are required to execute the expression.
|
|
|
|
/// Then, delete all other columns from the list of available columns.
|
|
|
|
/// After execution, columns will only contain the list of columns needed to read from the table.
|
2022-07-28 12:24:16 +00:00
|
|
|
void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select, bool visit_index_hint)
|
2019-12-27 19:45:41 +00:00
|
|
|
{
|
2020-01-11 09:50:41 +00:00
|
|
|
/// We calculate required_source_columns with source_columns modifications and swap them on exit
|
2019-12-27 19:45:41 +00:00
|
|
|
required_source_columns = source_columns;
|
|
|
|
|
|
|
|
RequiredSourceColumnsVisitor::Data columns_context;
|
2022-07-28 12:24:16 +00:00
|
|
|
columns_context.visit_index_hint = visit_index_hint;
|
2019-12-27 19:45:41 +00:00
|
|
|
RequiredSourceColumnsVisitor(columns_context).visit(query);
|
|
|
|
|
|
|
|
NameSet source_column_names;
|
|
|
|
for (const auto & column : source_columns)
|
|
|
|
source_column_names.insert(column.name);
|
|
|
|
|
|
|
|
NameSet required = columns_context.requiredColumns();
|
|
|
|
if (columns_context.has_table_join)
|
|
|
|
{
|
2020-01-11 09:50:41 +00:00
|
|
|
NameSet available_columns;
|
2019-12-27 19:45:41 +00:00
|
|
|
for (const auto & name : source_columns)
|
2020-01-11 09:50:41 +00:00
|
|
|
available_columns.insert(name.name);
|
2019-12-27 19:45:41 +00:00
|
|
|
|
|
|
|
/// Add columns obtained by JOIN (if needed).
|
|
|
|
for (const auto & joined_column : analyzed_join->columnsFromJoinedTable())
|
|
|
|
{
|
2020-04-22 06:01:33 +00:00
|
|
|
const auto & name = joined_column.name;
|
2022-04-18 10:18:43 +00:00
|
|
|
if (available_columns.contains(name))
|
2019-12-27 19:45:41 +00:00
|
|
|
continue;
|
|
|
|
|
2022-04-18 10:18:43 +00:00
|
|
|
if (required.contains(name))
|
2019-12-27 19:45:41 +00:00
|
|
|
{
|
|
|
|
/// Optimisation: do not add columns needed only in JOIN ON section.
|
|
|
|
if (columns_context.nameInclusion(name) > analyzed_join->rightKeyInclusion(name))
|
|
|
|
analyzed_join->addJoinedColumn(joined_column);
|
|
|
|
|
|
|
|
required.erase(name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
NameSet array_join_sources;
|
|
|
|
if (columns_context.has_array_join)
|
|
|
|
{
|
|
|
|
/// Insert the columns required for the ARRAY JOIN calculation into the required columns list.
|
|
|
|
for (const auto & result_source : array_join_result_to_source)
|
|
|
|
array_join_sources.insert(result_source.second);
|
|
|
|
|
|
|
|
for (const auto & column_name_type : source_columns)
|
2022-04-18 10:18:43 +00:00
|
|
|
if (array_join_sources.contains(column_name_type.name))
|
2019-12-27 19:45:41 +00:00
|
|
|
required.insert(column_name_type.name);
|
|
|
|
}
|
|
|
|
|
2020-12-18 20:09:39 +00:00
|
|
|
/// Figure out if we're able to use the trivial count optimization.
|
|
|
|
has_explicit_columns = !required.empty();
|
|
|
|
if (is_select && !has_explicit_columns)
|
2019-12-27 19:45:41 +00:00
|
|
|
{
|
2022-09-09 17:56:38 +00:00
|
|
|
optimize_trivial_count = !columns_context.has_array_join;
|
2020-06-04 22:01:40 +00:00
|
|
|
|
2020-12-18 20:09:39 +00:00
|
|
|
/// You need to read at least one column to find the number of rows.
|
2019-12-27 19:45:41 +00:00
|
|
|
/// We will find a column with minimum <compressed_size, type_size, uncompressed_size>.
|
|
|
|
/// Because it is the column that is cheapest to read.
|
|
|
|
struct ColumnSizeTuple
|
|
|
|
{
|
|
|
|
size_t compressed_size;
|
|
|
|
size_t type_size;
|
|
|
|
size_t uncompressed_size;
|
|
|
|
String name;
|
2020-06-04 22:01:40 +00:00
|
|
|
|
2019-12-27 19:45:41 +00:00
|
|
|
bool operator<(const ColumnSizeTuple & that) const
|
|
|
|
{
|
|
|
|
return std::tie(compressed_size, type_size, uncompressed_size)
|
|
|
|
< std::tie(that.compressed_size, that.type_size, that.uncompressed_size);
|
|
|
|
}
|
|
|
|
};
|
2020-06-04 22:01:40 +00:00
|
|
|
|
2019-12-27 19:45:41 +00:00
|
|
|
std::vector<ColumnSizeTuple> columns;
|
|
|
|
if (storage)
|
|
|
|
{
|
|
|
|
auto column_sizes = storage->getColumnSizes();
|
|
|
|
for (auto & source_column : source_columns)
|
|
|
|
{
|
|
|
|
auto c = column_sizes.find(source_column.name);
|
|
|
|
if (c == column_sizes.end())
|
|
|
|
continue;
|
|
|
|
size_t type_size = source_column.type->haveMaximumSizeOfValue() ? source_column.type->getMaximumSizeOfValueInMemory() : 100;
|
|
|
|
columns.emplace_back(ColumnSizeTuple{c->second.data_compressed, type_size, c->second.data_uncompressed, source_column.name});
|
|
|
|
}
|
|
|
|
}
|
2020-06-04 22:01:40 +00:00
|
|
|
|
2020-03-09 00:28:05 +00:00
|
|
|
if (!columns.empty())
|
2019-12-27 19:45:41 +00:00
|
|
|
required.insert(std::min_element(columns.begin(), columns.end())->name);
|
2021-06-28 20:08:18 +00:00
|
|
|
else if (!source_columns.empty())
|
2019-12-27 19:45:41 +00:00
|
|
|
/// If we have no information about columns sizes, choose a column of minimum size of its data type.
|
|
|
|
required.insert(ExpressionActions::getSmallestColumn(source_columns));
|
|
|
|
}
|
2021-07-09 03:15:41 +00:00
|
|
|
else if (is_select && storage_snapshot && !columns_context.has_array_join)
|
2020-09-21 10:13:01 +00:00
|
|
|
{
|
2021-07-09 03:15:41 +00:00
|
|
|
const auto & partition_desc = storage_snapshot->metadata->getPartitionKey();
|
2020-09-21 10:13:01 +00:00
|
|
|
if (partition_desc.expression)
|
|
|
|
{
|
2021-03-03 08:36:20 +00:00
|
|
|
auto partition_source_columns = partition_desc.expression->getRequiredColumns();
|
|
|
|
partition_source_columns.push_back("_part");
|
|
|
|
partition_source_columns.push_back("_partition_id");
|
|
|
|
partition_source_columns.push_back("_part_uuid");
|
2021-04-27 08:15:59 +00:00
|
|
|
partition_source_columns.push_back("_partition_value");
|
2020-09-21 10:13:01 +00:00
|
|
|
optimize_trivial_count = true;
|
|
|
|
for (const auto & required_column : required)
|
|
|
|
{
|
|
|
|
if (std::find(partition_source_columns.begin(), partition_source_columns.end(), required_column)
|
|
|
|
== partition_source_columns.end())
|
|
|
|
{
|
2020-12-13 09:33:02 +00:00
|
|
|
optimize_trivial_count = false;
|
|
|
|
break;
|
2020-09-21 10:13:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-12-27 19:45:41 +00:00
|
|
|
|
|
|
|
NameSet unknown_required_source_columns = required;
|
|
|
|
|
|
|
|
for (NamesAndTypesList::iterator it = source_columns.begin(); it != source_columns.end();)
|
|
|
|
{
|
|
|
|
const String & column_name = it->name;
|
|
|
|
unknown_required_source_columns.erase(column_name);
|
|
|
|
|
2022-04-18 10:18:43 +00:00
|
|
|
if (!required.contains(column_name))
|
2021-12-20 10:42:31 +00:00
|
|
|
it = source_columns.erase(it);
|
2019-12-27 19:45:41 +00:00
|
|
|
else
|
|
|
|
++it;
|
|
|
|
}
|
|
|
|
|
2022-01-10 18:21:24 +00:00
|
|
|
has_virtual_shard_num = false;
|
2019-12-27 19:45:41 +00:00
|
|
|
/// If there are virtual columns among the unknown columns. Remove them from the list of unknown and add
|
|
|
|
/// in columns list, so that when further processing they are also considered.
|
|
|
|
if (storage)
|
|
|
|
{
|
2020-04-27 13:55:30 +00:00
|
|
|
const auto storage_virtuals = storage->getVirtuals();
|
2019-12-27 19:45:41 +00:00
|
|
|
for (auto it = unknown_required_source_columns.begin(); it != unknown_required_source_columns.end();)
|
|
|
|
{
|
2020-04-24 10:20:03 +00:00
|
|
|
auto column = storage_virtuals.tryGetByName(*it);
|
|
|
|
if (column)
|
2019-12-27 19:45:41 +00:00
|
|
|
{
|
2020-04-24 10:20:03 +00:00
|
|
|
source_columns.push_back(*column);
|
2021-12-20 10:42:31 +00:00
|
|
|
it = unknown_required_source_columns.erase(it);
|
2019-12-27 19:45:41 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
++it;
|
|
|
|
}
|
2022-01-10 18:21:24 +00:00
|
|
|
|
|
|
|
if (is_remote_storage)
|
|
|
|
{
|
|
|
|
for (const auto & name_type : storage_virtuals)
|
|
|
|
{
|
2022-01-30 00:24:37 +00:00
|
|
|
if (name_type.name == "_shard_num" && storage->isVirtualColumn("_shard_num", storage_snapshot->getMetadataForQuery()))
|
2022-01-10 18:21:24 +00:00
|
|
|
{
|
|
|
|
has_virtual_shard_num = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-12-27 19:45:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!unknown_required_source_columns.empty())
|
|
|
|
{
|
2020-11-10 18:22:26 +00:00
|
|
|
WriteBufferFromOwnString ss;
|
2019-12-27 19:45:41 +00:00
|
|
|
ss << "Missing columns:";
|
|
|
|
for (const auto & name : unknown_required_source_columns)
|
|
|
|
ss << " '" << name << "'";
|
|
|
|
ss << " while processing query: '" << queryToString(query) << "'";
|
|
|
|
|
|
|
|
ss << ", required columns:";
|
|
|
|
for (const auto & name : columns_context.requiredColumns())
|
|
|
|
ss << " '" << name << "'";
|
|
|
|
|
2020-12-21 13:46:55 +00:00
|
|
|
if (storage)
|
2019-12-27 19:45:41 +00:00
|
|
|
{
|
2021-01-29 08:14:34 +00:00
|
|
|
std::vector<String> hint_name{};
|
2020-12-21 13:46:55 +00:00
|
|
|
for (const auto & name : columns_context.requiredColumns())
|
|
|
|
{
|
|
|
|
auto hints = storage->getHints(name);
|
2021-01-29 08:14:34 +00:00
|
|
|
hint_name.insert(hint_name.end(), hints.begin(), hints.end());
|
2021-01-23 05:45:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!hint_name.empty())
|
|
|
|
{
|
|
|
|
ss << ", maybe you meant: ";
|
2021-01-29 08:14:34 +00:00
|
|
|
ss << toString(hint_name);
|
2020-12-21 13:46:55 +00:00
|
|
|
}
|
2019-12-27 19:45:41 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2020-12-21 13:46:55 +00:00
|
|
|
if (!source_column_names.empty())
|
|
|
|
for (const auto & name : columns_context.requiredColumns())
|
|
|
|
ss << " '" << name << "'";
|
|
|
|
else
|
|
|
|
ss << ", no source columns";
|
2019-12-27 19:45:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (columns_context.has_table_join)
|
|
|
|
{
|
|
|
|
ss << ", joined columns:";
|
|
|
|
for (const auto & column : analyzed_join->columnsFromJoinedTable())
|
|
|
|
ss << " '" << column.name << "'";
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!array_join_sources.empty())
|
|
|
|
{
|
|
|
|
ss << ", arrayJoin columns:";
|
|
|
|
for (const auto & name : array_join_sources)
|
|
|
|
ss << " '" << name << "'";
|
|
|
|
}
|
|
|
|
|
|
|
|
throw Exception(ss.str(), ErrorCodes::UNKNOWN_IDENTIFIER);
|
|
|
|
}
|
|
|
|
|
|
|
|
required_source_columns.swap(source_columns);
|
2021-06-25 12:03:10 +00:00
|
|
|
for (const auto & column : required_source_columns)
|
|
|
|
{
|
|
|
|
source_column_names.insert(column.name);
|
|
|
|
}
|
2019-12-27 19:45:41 +00:00
|
|
|
}
|
|
|
|
|
2020-12-12 16:42:15 +00:00
|
|
|
NameSet TreeRewriterResult::getArrayJoinSourceNameSet() const
|
|
|
|
{
|
|
|
|
NameSet forbidden_columns;
|
|
|
|
for (const auto & elem : array_join_result_to_source)
|
|
|
|
forbidden_columns.insert(elem.first);
|
|
|
|
return forbidden_columns;
|
|
|
|
}
|
2020-06-04 22:01:40 +00:00
|
|
|
|
2020-07-22 17:13:05 +00:00
|
|
|
TreeRewriterResultPtr TreeRewriter::analyzeSelect(
|
2019-12-27 19:45:41 +00:00
|
|
|
ASTPtr & query,
|
2020-07-22 17:13:05 +00:00
|
|
|
TreeRewriterResult && result,
|
2020-02-26 19:33:09 +00:00
|
|
|
const SelectQueryOptions & select_options,
|
2020-03-03 14:25:45 +00:00
|
|
|
const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns,
|
2020-04-08 18:59:52 +00:00
|
|
|
const Names & required_result_columns,
|
|
|
|
std::shared_ptr<TableJoin> table_join) const
|
2019-12-27 19:45:41 +00:00
|
|
|
{
|
|
|
|
auto * select_query = query->as<ASTSelectQuery>();
|
2020-02-26 19:33:09 +00:00
|
|
|
if (!select_query)
|
|
|
|
throw Exception("Select analyze for not select asts.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
2022-10-12 16:50:56 +00:00
|
|
|
// expand GROUP BY ALL
|
|
|
|
if (select_query->group_by_all)
|
2022-10-19 00:00:09 +00:00
|
|
|
expandGroupByAll(select_query);
|
2022-10-12 16:50:56 +00:00
|
|
|
|
2020-02-26 19:33:09 +00:00
|
|
|
size_t subquery_depth = select_options.subquery_depth;
|
|
|
|
bool remove_duplicates = select_options.remove_duplicates;
|
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
const auto & settings = getContext()->getSettingsRef();
|
2019-12-27 19:45:41 +00:00
|
|
|
|
2020-03-03 14:25:45 +00:00
|
|
|
const NameSet & source_columns_set = result.source_columns_set;
|
2020-05-20 09:29:23 +00:00
|
|
|
|
|
|
|
if (table_join)
|
|
|
|
{
|
|
|
|
result.analyzed_join = table_join;
|
|
|
|
result.analyzed_join->resetCollected();
|
|
|
|
}
|
|
|
|
else /// TODO: remove. For now ExpressionAnalyzer expects some not empty object here
|
2020-04-08 18:59:52 +00:00
|
|
|
result.analyzed_join = std::make_shared<TableJoin>();
|
2019-12-27 19:45:41 +00:00
|
|
|
|
2020-02-27 14:33:03 +00:00
|
|
|
if (remove_duplicates)
|
|
|
|
renameDuplicatedColumns(select_query);
|
2019-12-27 19:45:41 +00:00
|
|
|
|
2022-05-30 13:50:22 +00:00
|
|
|
/// Perform it before analyzing JOINs, because it may change number of columns with names unique and break some logic inside JOINs
|
2022-05-10 14:24:20 +00:00
|
|
|
if (settings.optimize_normalize_count_variants)
|
|
|
|
TreeOptimizer::optimizeCountConstantAndSumOne(query);
|
|
|
|
|
2020-03-03 14:25:45 +00:00
|
|
|
if (tables_with_columns.size() > 1)
|
2020-02-27 14:33:03 +00:00
|
|
|
{
|
2021-06-23 14:03:39 +00:00
|
|
|
const auto & right_table = tables_with_columns[1];
|
|
|
|
auto & cols_from_joined = result.analyzed_join->columns_from_joined_table;
|
|
|
|
cols_from_joined = right_table.columns;
|
2021-06-24 14:57:21 +00:00
|
|
|
/// query can use materialized or aliased columns from right joined table,
|
|
|
|
/// we want to request it for right table
|
|
|
|
cols_from_joined.insert(cols_from_joined.end(), right_table.hidden_columns.begin(), right_table.hidden_columns.end());
|
2021-06-23 14:03:39 +00:00
|
|
|
|
2020-02-27 14:33:03 +00:00
|
|
|
result.analyzed_join->deduplicateAndQualifyColumnNames(
|
2021-06-23 14:03:39 +00:00
|
|
|
source_columns_set, right_table.table.getQualifiedNamePrefix());
|
2020-02-27 14:33:03 +00:00
|
|
|
}
|
2019-12-27 19:45:41 +00:00
|
|
|
|
2020-06-05 21:17:00 +00:00
|
|
|
translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns);
|
2019-12-27 19:45:41 +00:00
|
|
|
|
2020-02-27 14:33:03 +00:00
|
|
|
/// Optimizes logical expressions.
|
|
|
|
LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform();
|
2019-12-27 19:45:41 +00:00
|
|
|
|
2021-03-30 13:51:45 +00:00
|
|
|
NameSet all_source_columns_set = source_columns_set;
|
|
|
|
if (table_join)
|
|
|
|
{
|
|
|
|
for (const auto & [name, _] : table_join->columns_from_joined_table)
|
|
|
|
all_source_columns_set.insert(name);
|
|
|
|
}
|
|
|
|
|
2022-07-16 20:23:49 +00:00
|
|
|
if (getContext()->getSettingsRef().enable_positional_arguments)
|
|
|
|
{
|
|
|
|
if (select_query->groupBy())
|
|
|
|
{
|
|
|
|
for (auto & expr : select_query->groupBy()->children)
|
|
|
|
replaceForPositionalArguments(expr, select_query, ASTSelectQuery::Expression::GROUP_BY);
|
|
|
|
}
|
|
|
|
if (select_query->orderBy())
|
|
|
|
{
|
|
|
|
for (auto & expr : select_query->orderBy()->children)
|
|
|
|
replaceForPositionalArguments(expr, select_query, ASTSelectQuery::Expression::ORDER_BY);
|
|
|
|
}
|
|
|
|
if (select_query->limitBy())
|
|
|
|
{
|
|
|
|
for (auto & expr : select_query->limitBy()->children)
|
|
|
|
replaceForPositionalArguments(expr, select_query, ASTSelectQuery::Expression::LIMIT_BY);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-09-11 11:54:25 +00:00
|
|
|
normalize(query, result.aliases, all_source_columns_set, select_options.ignore_alias, settings, /* allow_self_aliases = */ true, getContext());
|
|
|
|
|
2019-12-27 19:45:41 +00:00
|
|
|
/// Remove unneeded columns according to 'required_result_columns'.
|
|
|
|
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.
|
|
|
|
/// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)
|
|
|
|
/// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations.
|
Revert "Fix converting types for UNION queries (may produce LOGICAL_ERROR)"
This fix is incorrect, and it introduce new issues, in particular it
may breaks UNION queries w/o column aliases, i.e.:
SELECT a, b, c FROM (SELECT 3 AS a, 2147483647 AS b, 1048575 AS c UNION ALL SELECT -2, NULL, -2) AS js1 ORDER BY a
CI: https://s3.amazonaws.com/clickhouse-test-reports/37796/e637489f81768df582fe7389e57f7ed12893087c/fuzzer_astfuzzerdebug,actions//report.html
Reverts: #37593/#34775 (2613149f6bf4f242bbbf2c3c8539b5176fd77286)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-06-04 13:13:01 +00:00
|
|
|
removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates);
|
2019-12-27 19:45:41 +00:00
|
|
|
|
|
|
|
/// Executing scalar subqueries - replacing them with constant values.
|
2022-01-17 18:32:55 +00:00
|
|
|
executeScalarSubqueries(query, getContext(), subquery_depth, result.scalars, result.local_scalars, select_options.only_analyze);
|
2019-12-27 19:45:41 +00:00
|
|
|
|
2021-08-03 18:03:24 +00:00
|
|
|
if (settings.legacy_column_name_of_tuple_literal)
|
|
|
|
markTupleLiteralsAsLegacy(query);
|
|
|
|
|
2021-09-23 16:23:17 +00:00
|
|
|
/// Push the predicate expression down to subqueries. The optimization should be applied to both initial and secondary queries.
|
|
|
|
result.rewrite_subqueries = PredicateExpressionsOptimizer(getContext(), tables_with_columns, settings).optimize(*select_query);
|
|
|
|
|
2021-12-17 12:31:30 +00:00
|
|
|
TreeOptimizer::optimizeIf(query, result.aliases, settings.optimize_if_chain_to_multiif);
|
|
|
|
|
2021-09-23 16:23:17 +00:00
|
|
|
/// Only apply AST optimization for initial queries.
|
2021-12-17 17:36:37 +00:00
|
|
|
if (getContext()->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY && !select_options.ignore_ast_optimizations)
|
2021-09-23 16:23:17 +00:00
|
|
|
TreeOptimizer::apply(query, result, tables_with_columns, getContext());
|
2020-01-04 04:31:45 +00:00
|
|
|
|
2020-07-22 17:13:05 +00:00
|
|
|
/// array_join_alias_to_name, array_join_result_to_source.
|
|
|
|
getArrayJoinedColumns(query, result, select_query, result.source_columns, source_columns_set);
|
2019-12-27 19:45:41 +00:00
|
|
|
|
2021-05-21 17:01:21 +00:00
|
|
|
setJoinStrictness(
|
|
|
|
*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, result.analyzed_join->table_join);
|
|
|
|
|
2021-09-14 11:13:07 +00:00
|
|
|
auto * table_join_ast = select_query->join() ? select_query->join()->table_join->as<ASTTableJoin>() : nullptr;
|
|
|
|
if (table_join_ast && tables_with_columns.size() >= 2)
|
2021-11-08 12:44:13 +00:00
|
|
|
collectJoinedColumns(*result.analyzed_join, *table_join_ast, tables_with_columns, result.aliases, getContext());
|
2019-12-27 19:45:41 +00:00
|
|
|
|
2021-06-17 15:00:03 +00:00
|
|
|
result.aggregates = getAggregates(query, *select_query);
|
|
|
|
result.window_function_asts = getWindowFunctions(query, *select_query);
|
2022-06-16 13:29:56 +00:00
|
|
|
result.expressions_with_window_function = getExpressionsWithWindowFunctions(query);
|
2022-07-28 12:24:16 +00:00
|
|
|
result.collectUsedColumns(query, true, settings.query_plan_optimize_primary_key);
|
2021-06-17 15:00:03 +00:00
|
|
|
result.required_source_columns_before_expanding_alias_columns = result.required_source_columns.getNames();
|
|
|
|
|
2021-01-12 13:54:11 +00:00
|
|
|
/// rewrite filters for select query, must go after getArrayJoinedColumns
|
2021-08-16 14:44:41 +00:00
|
|
|
bool is_initiator = getContext()->getClientInfo().distributed_depth == 0;
|
2021-08-20 13:33:30 +00:00
|
|
|
if (settings.optimize_respect_aliases && result.storage_snapshot && is_initiator)
|
2020-12-12 16:42:15 +00:00
|
|
|
{
|
2021-09-14 11:13:07 +00:00
|
|
|
std::unordered_set<IAST *> excluded_nodes;
|
|
|
|
{
|
|
|
|
/// Do not replace ALIASed columns in JOIN ON/USING sections
|
|
|
|
if (table_join_ast && table_join_ast->on_expression)
|
|
|
|
excluded_nodes.insert(table_join_ast->on_expression.get());
|
|
|
|
if (table_join_ast && table_join_ast->using_expression_list)
|
|
|
|
excluded_nodes.insert(table_join_ast->using_expression_list.get());
|
|
|
|
}
|
|
|
|
|
2021-11-09 12:36:25 +00:00
|
|
|
bool is_changed = replaceAliasColumnsInQuery(query, result.storage_snapshot->metadata->getColumns(),
|
2021-09-14 11:13:07 +00:00
|
|
|
result.array_join_result_to_source, getContext(), excluded_nodes);
|
2021-07-18 15:27:19 +00:00
|
|
|
/// If query is changed, we need to redo some work to correct name resolution.
|
2021-09-14 11:13:07 +00:00
|
|
|
if (is_changed)
|
2021-07-18 15:27:19 +00:00
|
|
|
{
|
|
|
|
result.aggregates = getAggregates(query, *select_query);
|
|
|
|
result.window_function_asts = getWindowFunctions(query, *select_query);
|
2022-06-16 13:29:56 +00:00
|
|
|
result.expressions_with_window_function = getExpressionsWithWindowFunctions(query);
|
2022-07-28 12:24:16 +00:00
|
|
|
result.collectUsedColumns(query, true, settings.query_plan_optimize_primary_key);
|
2021-07-18 15:27:19 +00:00
|
|
|
}
|
2020-12-12 16:42:15 +00:00
|
|
|
}
|
|
|
|
|
2022-01-10 18:21:24 +00:00
|
|
|
/// Rewrite _shard_num to shardNum()
|
|
|
|
if (result.has_virtual_shard_num)
|
|
|
|
{
|
|
|
|
RewriteShardNumVisitor::Data data_rewrite_shard_num;
|
|
|
|
RewriteShardNumVisitor(data_rewrite_shard_num).visit(query);
|
|
|
|
}
|
|
|
|
|
2020-06-25 20:59:10 +00:00
|
|
|
result.ast_join = select_query->join();
|
2020-06-04 22:01:40 +00:00
|
|
|
|
|
|
|
if (result.optimize_trivial_count)
|
|
|
|
result.optimize_trivial_count = settings.optimize_trivial_count_query &&
|
2020-09-21 10:13:01 +00:00
|
|
|
!select_query->groupBy() && !select_query->having() &&
|
2020-06-04 22:01:40 +00:00
|
|
|
!select_query->sampleSize() && !select_query->sampleOffset() && !select_query->final() &&
|
2020-06-05 21:17:00 +00:00
|
|
|
(tables_with_columns.size() < 2 || isLeft(result.analyzed_join->kind()));
|
2020-06-04 22:01:40 +00:00
|
|
|
|
2022-07-05 15:31:46 +00:00
|
|
|
// remove outer braces in order by
|
|
|
|
RewriteOrderByVisitor::Data data;
|
|
|
|
RewriteOrderByVisitor(data).visit(query);
|
|
|
|
|
2020-07-22 17:13:05 +00:00
|
|
|
return std::make_shared<const TreeRewriterResult>(result);
|
2019-12-27 19:45:41 +00:00
|
|
|
}
|
|
|
|
|
2020-07-22 17:13:05 +00:00
|
|
|
TreeRewriterResultPtr TreeRewriter::analyze(
|
2020-06-17 16:39:58 +00:00
|
|
|
ASTPtr & query,
|
|
|
|
const NamesAndTypesList & source_columns,
|
|
|
|
ConstStoragePtr storage,
|
2021-07-09 03:15:41 +00:00
|
|
|
const StorageSnapshotPtr & storage_snapshot,
|
2021-06-08 09:54:00 +00:00
|
|
|
bool allow_aggregations,
|
2021-10-21 11:02:51 +00:00
|
|
|
bool allow_self_aliases,
|
2021-10-21 15:03:16 +00:00
|
|
|
bool execute_scalar_subqueries) const
|
2020-02-26 19:33:09 +00:00
|
|
|
{
|
2020-02-27 18:05:49 +00:00
|
|
|
if (query->as<ASTSelectQuery>())
|
2020-02-26 19:33:09 +00:00
|
|
|
throw Exception("Not select analyze for select asts.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
const auto & settings = getContext()->getSettingsRef();
|
2020-02-26 19:33:09 +00:00
|
|
|
|
2021-07-09 03:15:41 +00:00
|
|
|
TreeRewriterResult result(source_columns, storage, storage_snapshot, false);
|
2020-02-26 19:33:09 +00:00
|
|
|
|
2022-05-23 09:42:20 +00:00
|
|
|
normalize(query, result.aliases, result.source_columns_set, false, settings, allow_self_aliases, getContext());
|
2020-02-26 19:33:09 +00:00
|
|
|
|
2020-02-26 22:19:16 +00:00
|
|
|
/// Executing scalar subqueries. Column defaults could be a scalar subquery.
|
2022-01-17 18:32:55 +00:00
|
|
|
executeScalarSubqueries(query, getContext(), 0, result.scalars, result.local_scalars, !execute_scalar_subqueries);
|
2020-02-26 22:19:16 +00:00
|
|
|
|
2021-08-03 18:03:24 +00:00
|
|
|
if (settings.legacy_column_name_of_tuple_literal)
|
|
|
|
markTupleLiteralsAsLegacy(query);
|
|
|
|
|
2020-07-22 17:13:05 +00:00
|
|
|
TreeOptimizer::optimizeIf(query, result.aliases, settings.optimize_if_chain_to_multiif);
|
2020-02-27 14:33:03 +00:00
|
|
|
|
2020-05-14 23:32:45 +00:00
|
|
|
if (allow_aggregations)
|
2020-04-27 14:47:59 +00:00
|
|
|
{
|
|
|
|
GetAggregatesVisitor::Data data;
|
|
|
|
GetAggregatesVisitor(data).visit(query);
|
|
|
|
|
|
|
|
/// There can not be other aggregate functions within the aggregate functions.
|
|
|
|
for (const ASTFunction * node : data.aggregates)
|
|
|
|
for (auto & arg : node->arguments->children)
|
|
|
|
assertNoAggregates(arg, "inside another aggregate function");
|
|
|
|
result.aggregates = data.aggregates;
|
|
|
|
}
|
2020-05-12 20:44:48 +00:00
|
|
|
else
|
2020-04-27 14:47:59 +00:00
|
|
|
assertNoAggregates(query, "in wrong place");
|
|
|
|
|
2022-07-28 12:24:16 +00:00
|
|
|
result.collectUsedColumns(query, false, settings.query_plan_optimize_primary_key);
|
2020-07-22 17:13:05 +00:00
|
|
|
return std::make_shared<const TreeRewriterResult>(result);
|
2020-02-26 19:33:09 +00:00
|
|
|
}
|
|
|
|
|
2021-02-10 14:12:49 +00:00
|
|
|
void TreeRewriter::normalize(
|
2022-05-24 02:58:45 +00:00
|
|
|
ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_)
|
2020-02-26 19:33:09 +00:00
|
|
|
{
|
2022-08-10 15:54:56 +00:00
|
|
|
if (!UserDefinedSQLFunctionFactory::instance().empty())
|
|
|
|
{
|
|
|
|
UserDefinedSQLFunctionVisitor::Data data_user_defined_functions_visitor;
|
|
|
|
UserDefinedSQLFunctionVisitor(data_user_defined_functions_visitor).visit(query);
|
|
|
|
}
|
2021-08-18 21:54:55 +00:00
|
|
|
|
2020-06-22 14:55:49 +00:00
|
|
|
CustomizeCountDistinctVisitor::Data data_count_distinct{settings.count_distinct_implementation};
|
|
|
|
CustomizeCountDistinctVisitor(data_count_distinct).visit(query);
|
|
|
|
|
|
|
|
CustomizeCountIfDistinctVisitor::Data data_count_if_distinct{settings.count_distinct_implementation.toString() + "If"};
|
|
|
|
CustomizeCountIfDistinctVisitor(data_count_if_distinct).visit(query);
|
|
|
|
|
|
|
|
CustomizeIfDistinctVisitor::Data data_distinct_if{"DistinctIf"};
|
|
|
|
CustomizeIfDistinctVisitor(data_distinct_if).visit(query);
|
2020-02-26 19:33:09 +00:00
|
|
|
|
2021-10-04 13:08:41 +00:00
|
|
|
ExistsExpressionVisitor::Data exists;
|
|
|
|
ExistsExpressionVisitor(exists).visit(query);
|
|
|
|
|
2020-04-06 13:30:16 +00:00
|
|
|
if (settings.transform_null_in)
|
|
|
|
{
|
|
|
|
CustomizeInVisitor::Data data_null_in{"nullIn"};
|
|
|
|
CustomizeInVisitor(data_null_in).visit(query);
|
|
|
|
|
|
|
|
CustomizeNotInVisitor::Data data_not_null_in{"notNullIn"};
|
|
|
|
CustomizeNotInVisitor(data_not_null_in).visit(query);
|
|
|
|
|
|
|
|
CustomizeGlobalInVisitor::Data data_global_null_in{"globalNullIn"};
|
|
|
|
CustomizeGlobalInVisitor(data_global_null_in).visit(query);
|
|
|
|
|
|
|
|
CustomizeGlobalNotInVisitor::Data data_global_not_null_in{"globalNotNullIn"};
|
|
|
|
CustomizeGlobalNotInVisitor(data_global_not_null_in).visit(query);
|
|
|
|
}
|
|
|
|
|
2021-04-15 16:40:49 +00:00
|
|
|
// Try to fuse sum/avg/count with identical arguments to one sumCount call,
|
|
|
|
// if we have at least two different functions. E.g. we will replace sum(x)
|
|
|
|
// and count(x) with sumCount(x).1 and sumCount(x).2, and sumCount() will
|
|
|
|
// be calculated only once because of CSE.
|
2021-11-03 09:23:33 +00:00
|
|
|
if (settings.optimize_fuse_sum_count_avg && settings.optimize_syntax_fuse_functions)
|
2021-03-01 10:04:34 +00:00
|
|
|
{
|
2021-04-15 16:40:49 +00:00
|
|
|
FuseSumCountAggregatesVisitor::Data data;
|
|
|
|
FuseSumCountAggregatesVisitor(data).visit(query);
|
|
|
|
fuseSumCountAggregates(data.fuse_map);
|
2021-03-01 10:04:34 +00:00
|
|
|
}
|
|
|
|
|
2021-03-08 03:58:18 +00:00
|
|
|
/// Rewrite all aggregate functions to add -OrNull suffix to them
|
2020-10-18 12:18:31 +00:00
|
|
|
if (settings.aggregate_functions_null_for_empty)
|
|
|
|
{
|
|
|
|
CustomizeAggregateFunctionsOrNullVisitor::Data data_or_null{"OrNull"};
|
|
|
|
CustomizeAggregateFunctionsOrNullVisitor(data_or_null).visit(query);
|
|
|
|
}
|
|
|
|
|
2020-12-09 04:30:38 +00:00
|
|
|
/// Move -OrNull suffix ahead, this should execute after add -OrNull suffix
|
|
|
|
CustomizeAggregateFunctionsMoveOrNullVisitor::Data data_or_null{"OrNull"};
|
|
|
|
CustomizeAggregateFunctionsMoveOrNullVisitor(data_or_null).visit(query);
|
|
|
|
|
2020-02-26 19:33:09 +00:00
|
|
|
/// Creates a dictionary `aliases`: alias -> ASTPtr
|
2020-03-18 21:38:27 +00:00
|
|
|
QueryAliasesVisitor(aliases).visit(query);
|
2020-02-26 19:33:09 +00:00
|
|
|
|
|
|
|
/// Mark table ASTIdentifiers with not a column marker
|
|
|
|
MarkTableIdentifiersVisitor::Data identifiers_data{aliases};
|
|
|
|
MarkTableIdentifiersVisitor(identifiers_data).visit(query);
|
|
|
|
|
2021-02-14 11:09:36 +00:00
|
|
|
/// Rewrite function names to their canonical ones.
|
2022-05-10 04:06:50 +00:00
|
|
|
/// Notice: function name normalization is disabled when it's a secondary query, because queries are either
|
|
|
|
/// already normalized on initiator node, or not normalized and should remain unnormalized for
|
|
|
|
/// compatibility.
|
2022-05-24 02:58:45 +00:00
|
|
|
if (context_->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY && settings.normalize_function_names)
|
2021-02-18 03:27:24 +00:00
|
|
|
FunctionNameNormalizer().visit(query.get());
|
2021-02-14 11:09:36 +00:00
|
|
|
|
2020-02-26 19:33:09 +00:00
|
|
|
/// Common subexpression elimination. Rewrite rules.
|
2021-06-07 20:59:38 +00:00
|
|
|
QueryNormalizer::Data normalizer_data(aliases, source_columns_set, ignore_alias, settings, allow_self_aliases);
|
2020-02-26 19:33:09 +00:00
|
|
|
QueryNormalizer(normalizer_data).visit(query);
|
2022-05-26 07:28:55 +00:00
|
|
|
|
|
|
|
optimizeGroupingSets(query);
|
2020-02-26 19:33:09 +00:00
|
|
|
}
|
|
|
|
|
2019-12-27 19:45:41 +00:00
|
|
|
}
|