ClickHouse/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp

247 lines
9.0 KiB
C++
Raw Normal View History

#include <Interpreters/ExecuteScalarSubqueriesVisitor.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnNullable.h>
2019-10-19 20:36:35 +00:00
#include <DataStreams/materializeBlock.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeNullable.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
#include <Interpreters/addTypeConversionToAST.h>
#include <Interpreters/misc.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTWithElement.h>
2021-07-19 12:35:55 +00:00
#include <Parsers/queryToString.h>
#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
2021-01-13 18:58:00 +00:00
2021-07-17 18:06:46 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int INCORRECT_RESULT_OF_SCALAR_SUBQUERY;
}
2019-01-09 16:16:59 +00:00
bool ExecuteScalarSubqueriesMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child)
{
/// Processed
2019-03-11 13:22:51 +00:00
if (node->as<ASTSubquery>() || node->as<ASTFunction>())
return false;
/// Don't descend into subqueries in FROM section
2019-03-11 13:22:51 +00:00
if (node->as<ASTTableExpression>())
return false;
2020-09-12 17:00:04 +00:00
/// Do not go to subqueries defined in with statement
if (node->as<ASTWithElement>())
return false;
2019-03-11 13:22:51 +00:00
if (node->as<ASTSelectQuery>())
2019-01-09 16:16:59 +00:00
{
/// Do not go to FROM, JOIN, UNION.
2019-03-11 13:22:51 +00:00
if (child->as<ASTTableExpression>() || child->as<ASTSelectQuery>())
2019-01-09 16:16:59 +00:00
return false;
}
return true;
}
2019-02-22 13:33:56 +00:00
void ExecuteScalarSubqueriesMatcher::visit(ASTPtr & ast, Data & data)
{
2019-03-11 13:22:51 +00:00
if (const auto * t = ast->as<ASTSubquery>())
visit(*t, ast, data);
2019-03-11 13:22:51 +00:00
if (const auto * t = ast->as<ASTFunction>())
2019-02-22 13:33:56 +00:00
visit(*t, ast, data);
}
2019-10-19 20:36:35 +00:00
/// Converting to literal values might take a fair amount of overhead when the value is large, (e.g.
/// Array, BitMap, etc.), This conversion is required for constant folding, index lookup, branch
/// elimination. However, these optimizations should never be related to large values, thus we
/// blacklist them here.
static bool worthConvertingToLiteral(const Block & scalar)
{
2020-04-22 06:01:33 +00:00
const auto * scalar_type_name = scalar.safeGetByPosition(0).type->getFamilyName();
2019-10-19 20:36:35 +00:00
std::set<String> useless_literal_types = {"Array", "Tuple", "AggregateFunction", "Function", "Set", "LowCardinality"};
return !useless_literal_types.count(scalar_type_name);
}
void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr & ast, Data & data)
{
2019-10-19 20:36:35 +00:00
auto hash = subquery.getTreeHash();
auto scalar_query_hash_str = toString(hash.first) + "_" + toString(hash.second);
Block scalar;
if (data.getContext()->hasQueryContext() && data.getContext()->getQueryContext()->hasScalar(scalar_query_hash_str))
scalar = data.getContext()->getQueryContext()->getScalar(scalar_query_hash_str);
2019-10-19 20:36:35 +00:00
else if (data.scalars.count(scalar_query_hash_str))
scalar = data.scalars[scalar_query_hash_str];
else
{
auto subquery_context = Context::createCopy(data.getContext());
Settings subquery_settings = data.getContext()->getSettings();
2019-10-19 20:36:35 +00:00
subquery_settings.max_result_rows = 1;
2020-03-08 23:48:08 +00:00
subquery_settings.extremes = false;
subquery_context->setSettings(subquery_settings);
2019-10-19 20:36:35 +00:00
ASTPtr subquery_select = subquery.children.at(0);
2021-02-09 03:46:06 +00:00
auto options = SelectQueryOptions(QueryProcessingStage::Complete, data.subquery_depth + 1, true);
options.analyze(data.only_analyze);
auto interpreter = InterpreterSelectWithUnionQuery(subquery_select, subquery_context, options);
2019-10-19 20:36:35 +00:00
Block block;
if (data.only_analyze)
{
/// If query is only analyzed, then constants are not correct.
block = interpreter.getSampleBlock();
for (auto & column : block)
2020-05-08 11:19:27 +00:00
{
if (column.column->empty())
2020-05-08 11:19:27 +00:00
{
auto mut_col = column.column->cloneEmpty();
mut_col->insertDefault();
column.column = std::move(mut_col);
}
}
}
else
{
2021-01-13 18:58:00 +00:00
auto io = interpreter.execute();
2019-10-19 20:36:35 +00:00
PullingAsyncPipelineExecutor executor(io.pipeline);
while (block.rows() == 0 && executor.pull(block));
if (block.rows() == 0)
{
auto types = interpreter.getSampleBlock().getDataTypes();
if (types.size() != 1)
types = {std::make_shared<DataTypeTuple>(types)};
auto & type = types[0];
if (!type->isNullable())
{
if (!type->canBeInsideNullable())
throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY,
"Scalar subquery returned empty result of type {} which cannot be Nullable",
type->getName());
type = makeNullable(type);
}
2021-07-19 12:35:55 +00:00
ASTPtr ast_new = std::make_shared<ASTLiteral>(Null());
ast_new = addTypeConversionToAST(std::move(ast_new), type->getName());
ast_new->setAlias(ast->tryGetAlias());
ast = std::move(ast_new);
return;
}
if (block.rows() != 1)
throw Exception("Scalar subquery returned more than one row", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY);
Block tmp_block;
while (tmp_block.rows() == 0 && executor.pull(tmp_block));
if (tmp_block.rows() != 0)
throw Exception("Scalar subquery returned more than one row", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY);
}
2019-10-19 20:36:35 +00:00
block = materializeBlock(block);
size_t columns = block.columns();
if (columns == 1)
{
auto & column = block.getByPosition(0);
/// Here we wrap type to nullable if we can.
/// It is needed cause if subquery return no rows, it's result will be Null.
/// In case of many columns, do not check it cause tuple can't be nullable.
if (!column.type->isNullable() && column.type->canBeInsideNullable())
{
column.type = makeNullable(column.type);
column.column = makeNullable(column.column);
}
2019-10-19 20:36:35 +00:00
scalar = block;
}
else
2019-10-19 20:36:35 +00:00
{
ColumnWithTypeAndName ctn;
ctn.type = std::make_shared<DataTypeTuple>(block.getDataTypes());
ctn.column = ColumnTuple::create(block.getColumns());
scalar.insert(ctn);
}
}
const Settings & settings = data.getContext()->getSettingsRef();
2019-10-19 20:36:35 +00:00
// Always convert to literals when there is no query context.
if (data.only_analyze || !settings.enable_scalar_subquery_optimization || worthConvertingToLiteral(scalar)
|| !data.getContext()->hasQueryContext())
{
/// subquery and ast can be the same object and ast will be moved.
/// Save these fields to avoid use after move.
2021-07-16 14:17:22 +00:00
auto alias = subquery.alias;
auto prefer_alias_to_column_name = subquery.prefer_alias_to_column_name;
2019-10-19 20:36:35 +00:00
auto lit = std::make_unique<ASTLiteral>((*scalar.safeGetByPosition(0).column)[0]);
2021-07-16 14:17:22 +00:00
lit->alias = alias;
lit->prefer_alias_to_column_name = prefer_alias_to_column_name;
2019-10-19 20:36:35 +00:00
ast = addTypeConversionToAST(std::move(lit), scalar.safeGetByPosition(0).type->getName());
/// If only analyze was requested the expression is not suitable for constant folding, disable it.
if (data.only_analyze)
{
ast->as<ASTFunction>()->alias.clear();
auto func = makeASTFunction("identity", std::move(ast));
2021-07-16 14:17:22 +00:00
func->alias = alias;
func->prefer_alias_to_column_name = prefer_alias_to_column_name;
ast = std::move(func);
}
}
else
{
2019-10-19 20:36:35 +00:00
auto func = makeASTFunction("__getScalar", std::make_shared<ASTLiteral>(scalar_query_hash_str));
func->alias = subquery.alias;
func->prefer_alias_to_column_name = subquery.prefer_alias_to_column_name;
ast = std::move(func);
}
2019-10-19 20:36:35 +00:00
data.scalars[scalar_query_hash_str] = std::move(scalar);
}
2019-02-22 13:33:56 +00:00
void ExecuteScalarSubqueriesMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data & data)
{
/// Don't descend into subqueries in arguments of IN operator.
2021-03-04 05:59:57 +00:00
/// But if an argument is not subquery, then deeper may be scalar subqueries and we need to descend in them.
std::vector<ASTPtr *> out;
if (checkFunctionIsInOrGlobalInOperator(func))
{
for (auto & child : ast->children)
{
if (child != func.arguments)
out.push_back(&child);
else
for (size_t i = 0, size = func.arguments->children.size(); i < size; ++i)
2019-03-11 13:22:51 +00:00
if (i != 1 || !func.arguments->children[i]->as<ASTSubquery>())
out.push_back(&func.arguments->children[i]);
}
}
else
for (auto & child : ast->children)
out.push_back(&child);
2019-02-22 13:33:56 +00:00
for (ASTPtr * add_node : out)
2019-02-22 15:45:47 +00:00
Visitor(data).visit(*add_node);
}
}