2018-10-11 19:28:59 +00:00
|
|
|
#include <Interpreters/ExecuteScalarSubqueriesVisitor.h>
|
|
|
|
|
2021-07-16 13:38:35 +00:00
|
|
|
#include <Columns/ColumnNullable.h>
|
2022-01-17 18:32:55 +00:00
|
|
|
#include <Columns/ColumnTuple.h>
|
2021-07-16 13:38:35 +00:00
|
|
|
#include <DataTypes/DataTypeNullable.h>
|
2022-01-17 18:32:55 +00:00
|
|
|
#include <DataTypes/DataTypeTuple.h>
|
2019-12-12 08:57:25 +00:00
|
|
|
#include <IO/WriteHelpers.h>
|
2021-04-10 23:33:54 +00:00
|
|
|
#include <Interpreters/Context.h>
|
|
|
|
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
|
|
|
#include <Interpreters/addTypeConversionToAST.h>
|
|
|
|
#include <Interpreters/misc.h>
|
|
|
|
#include <Parsers/ASTExpressionList.h>
|
|
|
|
#include <Parsers/ASTFunction.h>
|
|
|
|
#include <Parsers/ASTLiteral.h>
|
|
|
|
#include <Parsers/ASTSelectQuery.h>
|
|
|
|
#include <Parsers/ASTSubquery.h>
|
|
|
|
#include <Parsers/ASTTablesInSelectQuery.h>
|
|
|
|
#include <Parsers/ASTWithElement.h>
|
2021-07-19 12:35:55 +00:00
|
|
|
#include <Parsers/queryToString.h>
|
2021-02-16 08:26:24 +00:00
|
|
|
#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
|
2022-01-17 18:32:55 +00:00
|
|
|
#include <Common/ProfileEvents.h>
|
2023-05-25 22:54:54 +00:00
|
|
|
#include <Common/FieldVisitorToString.h>
|
|
|
|
#include <IO/WriteBufferFromString.h>
|
2021-01-13 18:58:00 +00:00
|
|
|
|
2022-01-17 18:32:55 +00:00
|
|
|
namespace ProfileEvents
|
|
|
|
{
|
|
|
|
extern const Event ScalarSubqueriesGlobalCacheHit;
|
|
|
|
extern const Event ScalarSubqueriesLocalCacheHit;
|
|
|
|
extern const Event ScalarSubqueriesCacheMiss;
|
|
|
|
}
|
2021-07-17 18:06:46 +00:00
|
|
|
|
2018-10-11 19:28:59 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int INCORRECT_RESULT_OF_SCALAR_SUBQUERY;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-01-09 16:16:59 +00:00
|
|
|
bool ExecuteScalarSubqueriesMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child)
|
2018-12-10 13:19:09 +00:00
|
|
|
{
|
|
|
|
/// Processed
|
2019-03-11 13:22:51 +00:00
|
|
|
if (node->as<ASTSubquery>() || node->as<ASTFunction>())
|
2018-12-10 13:19:09 +00:00
|
|
|
return false;
|
|
|
|
|
|
|
|
/// Don't descend into subqueries in FROM section
|
2019-03-11 13:22:51 +00:00
|
|
|
if (node->as<ASTTableExpression>())
|
2018-12-10 13:19:09 +00:00
|
|
|
return false;
|
|
|
|
|
2020-09-12 17:00:04 +00:00
|
|
|
/// Do not go to subqueries defined in with statement
|
|
|
|
if (node->as<ASTWithElement>())
|
|
|
|
return false;
|
|
|
|
|
2019-03-11 13:22:51 +00:00
|
|
|
if (node->as<ASTSelectQuery>())
|
2019-01-09 16:16:59 +00:00
|
|
|
{
|
|
|
|
/// Do not go to FROM, JOIN, UNION.
|
2019-03-11 13:22:51 +00:00
|
|
|
if (child->as<ASTTableExpression>() || child->as<ASTSelectQuery>())
|
2019-01-09 16:16:59 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-12-10 13:19:09 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-02-22 13:33:56 +00:00
|
|
|
void ExecuteScalarSubqueriesMatcher::visit(ASTPtr & ast, Data & data)
|
2018-12-10 13:19:09 +00:00
|
|
|
{
|
2019-03-11 13:22:51 +00:00
|
|
|
if (const auto * t = ast->as<ASTSubquery>())
|
2018-12-10 13:19:09 +00:00
|
|
|
visit(*t, ast, data);
|
2019-03-11 13:22:51 +00:00
|
|
|
if (const auto * t = ast->as<ASTFunction>())
|
2019-02-22 13:33:56 +00:00
|
|
|
visit(*t, ast, data);
|
2018-12-10 13:19:09 +00:00
|
|
|
}
|
|
|
|
|
2022-01-19 17:26:37 +00:00
|
|
|
static auto getQueryInterpreter(const ASTSubquery & subquery, ExecuteScalarSubqueriesMatcher::Data & data)
|
|
|
|
{
|
|
|
|
auto subquery_context = Context::createCopy(data.getContext());
|
|
|
|
Settings subquery_settings = data.getContext()->getSettings();
|
|
|
|
subquery_settings.max_result_rows = 1;
|
|
|
|
subquery_settings.extremes = false;
|
|
|
|
subquery_context->setSettings(subquery_settings);
|
2022-01-24 17:51:11 +00:00
|
|
|
if (!data.only_analyze && subquery_context->hasQueryContext())
|
2022-01-19 17:26:37 +00:00
|
|
|
{
|
|
|
|
/// Save current cached scalars in the context before analyzing the query
|
|
|
|
/// This is specially helpful when analyzing CTE scalars
|
|
|
|
auto context = subquery_context->getQueryContext();
|
|
|
|
for (const auto & it : data.scalars)
|
|
|
|
context->addScalar(it.first, it.second);
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTPtr subquery_select = subquery.children.at(0);
|
|
|
|
|
|
|
|
auto options = SelectQueryOptions(QueryProcessingStage::Complete, data.subquery_depth + 1, true);
|
2023-03-27 14:30:53 +00:00
|
|
|
options.is_create_parameterized_view = data.is_create_parameterized_view;
|
2022-01-19 17:26:37 +00:00
|
|
|
options.analyze(data.only_analyze);
|
|
|
|
|
|
|
|
return std::make_unique<InterpreterSelectWithUnionQuery>(subquery_select, subquery_context, options);
|
|
|
|
}
|
|
|
|
|
2018-12-10 13:02:45 +00:00
|
|
|
void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr & ast, Data & data)
|
2018-10-11 19:28:59 +00:00
|
|
|
{
|
2019-10-19 20:36:35 +00:00
|
|
|
auto hash = subquery.getTreeHash();
|
|
|
|
auto scalar_query_hash_str = toString(hash.first) + "_" + toString(hash.second);
|
|
|
|
|
2022-01-19 17:26:37 +00:00
|
|
|
std::unique_ptr<InterpreterSelectWithUnionQuery> interpreter = nullptr;
|
|
|
|
bool hit = false;
|
2022-01-17 18:32:55 +00:00
|
|
|
bool is_local = false;
|
2022-01-19 17:26:37 +00:00
|
|
|
|
2019-10-19 20:36:35 +00:00
|
|
|
Block scalar;
|
2022-04-06 06:53:10 +00:00
|
|
|
if (data.only_analyze)
|
|
|
|
{
|
|
|
|
/// Don't use scalar cache during query analysis
|
|
|
|
}
|
|
|
|
else if (data.local_scalars.contains(scalar_query_hash_str))
|
2022-01-17 18:32:55 +00:00
|
|
|
{
|
2022-01-19 17:26:37 +00:00
|
|
|
hit = true;
|
2022-01-17 18:32:55 +00:00
|
|
|
scalar = data.local_scalars[scalar_query_hash_str];
|
|
|
|
is_local = true;
|
|
|
|
ProfileEvents::increment(ProfileEvents::ScalarSubqueriesLocalCacheHit);
|
2021-07-24 01:25:00 +00:00
|
|
|
}
|
2022-04-06 06:53:10 +00:00
|
|
|
else if (data.scalars.contains(scalar_query_hash_str))
|
2021-07-24 01:25:00 +00:00
|
|
|
{
|
2022-01-19 17:26:37 +00:00
|
|
|
hit = true;
|
2019-10-19 20:36:35 +00:00
|
|
|
scalar = data.scalars[scalar_query_hash_str];
|
2022-01-17 18:32:55 +00:00
|
|
|
ProfileEvents::increment(ProfileEvents::ScalarSubqueriesGlobalCacheHit);
|
2021-07-24 01:25:00 +00:00
|
|
|
}
|
2019-10-19 20:36:35 +00:00
|
|
|
else
|
2018-10-11 19:28:59 +00:00
|
|
|
{
|
2022-01-19 17:26:37 +00:00
|
|
|
if (data.getContext()->hasQueryContext() && data.getContext()->getQueryContext()->hasScalar(scalar_query_hash_str))
|
2022-01-17 18:32:55 +00:00
|
|
|
{
|
2022-01-19 17:26:37 +00:00
|
|
|
if (!data.getContext()->getViewSource())
|
|
|
|
{
|
|
|
|
/// We aren't using storage views so we can safely use the context cache
|
|
|
|
scalar = data.getContext()->getQueryContext()->getScalar(scalar_query_hash_str);
|
|
|
|
ProfileEvents::increment(ProfileEvents::ScalarSubqueriesGlobalCacheHit);
|
|
|
|
hit = true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/// If we are under a context that uses views that means that the cache might contain values that reference
|
|
|
|
/// the original table and not the view, so in order to be able to check the global cache we need to first
|
|
|
|
/// make sure that the query doesn't use the view
|
|
|
|
/// Note in any case the scalar will end up cached in *data* so this won't be repeated inside this context
|
|
|
|
interpreter = getQueryInterpreter(subquery, data);
|
|
|
|
if (!interpreter->usesViewSource())
|
|
|
|
{
|
|
|
|
scalar = data.getContext()->getQueryContext()->getScalar(scalar_query_hash_str);
|
|
|
|
ProfileEvents::increment(ProfileEvents::ScalarSubqueriesGlobalCacheHit);
|
|
|
|
hit = true;
|
|
|
|
}
|
|
|
|
}
|
2022-01-17 18:32:55 +00:00
|
|
|
}
|
2022-01-19 17:26:37 +00:00
|
|
|
}
|
2019-10-19 20:36:35 +00:00
|
|
|
|
2022-01-19 17:26:37 +00:00
|
|
|
if (!hit)
|
|
|
|
{
|
|
|
|
if (!interpreter)
|
|
|
|
interpreter = getQueryInterpreter(subquery, data);
|
2019-10-19 20:36:35 +00:00
|
|
|
|
2022-01-19 17:26:37 +00:00
|
|
|
ProfileEvents::increment(ProfileEvents::ScalarSubqueriesCacheMiss);
|
|
|
|
is_local = interpreter->usesViewSource();
|
2020-04-30 17:07:34 +00:00
|
|
|
|
2019-10-19 20:36:35 +00:00
|
|
|
Block block;
|
2020-04-30 17:07:34 +00:00
|
|
|
|
|
|
|
if (data.only_analyze)
|
|
|
|
{
|
|
|
|
/// If query is only analyzed, then constants are not correct.
|
2022-01-19 17:26:37 +00:00
|
|
|
block = interpreter->getSampleBlock();
|
2020-04-30 17:07:34 +00:00
|
|
|
for (auto & column : block)
|
2020-05-08 11:19:27 +00:00
|
|
|
{
|
2020-04-30 17:07:34 +00:00
|
|
|
if (column.column->empty())
|
2020-05-08 11:19:27 +00:00
|
|
|
{
|
|
|
|
auto mut_col = column.column->cloneEmpty();
|
|
|
|
mut_col->insertDefault();
|
|
|
|
column.column = std::move(mut_col);
|
|
|
|
}
|
|
|
|
}
|
2020-04-30 17:07:34 +00:00
|
|
|
}
|
|
|
|
else
|
2018-10-11 19:28:59 +00:00
|
|
|
{
|
2022-01-19 17:26:37 +00:00
|
|
|
auto io = interpreter->execute();
|
2019-10-19 20:36:35 +00:00
|
|
|
|
2021-05-26 15:44:52 +00:00
|
|
|
PullingAsyncPipelineExecutor executor(io.pipeline);
|
2022-05-27 20:47:35 +00:00
|
|
|
io.pipeline.setProgressCallback(data.getContext()->getProgressCallback());
|
2021-05-26 15:44:52 +00:00
|
|
|
while (block.rows() == 0 && executor.pull(block));
|
2021-02-16 08:26:24 +00:00
|
|
|
|
2021-05-26 15:44:52 +00:00
|
|
|
if (block.rows() == 0)
|
|
|
|
{
|
2022-01-19 17:26:37 +00:00
|
|
|
auto types = interpreter->getSampleBlock().getDataTypes();
|
2021-07-16 13:38:35 +00:00
|
|
|
if (types.size() != 1)
|
|
|
|
types = {std::make_shared<DataTypeTuple>(types)};
|
|
|
|
|
|
|
|
auto & type = types[0];
|
|
|
|
if (!type->isNullable())
|
|
|
|
{
|
|
|
|
if (!type->canBeInsideNullable())
|
|
|
|
throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY,
|
|
|
|
"Scalar subquery returned empty result of type {} which cannot be Nullable",
|
|
|
|
type->getName());
|
|
|
|
|
|
|
|
type = makeNullable(type);
|
|
|
|
}
|
|
|
|
|
2021-07-19 12:35:55 +00:00
|
|
|
ASTPtr ast_new = std::make_shared<ASTLiteral>(Null());
|
|
|
|
ast_new = addTypeConversionToAST(std::move(ast_new), type->getName());
|
|
|
|
|
2021-05-26 15:44:52 +00:00
|
|
|
ast_new->setAlias(ast->tryGetAlias());
|
|
|
|
ast = std::move(ast_new);
|
|
|
|
return;
|
|
|
|
}
|
2020-04-30 17:07:34 +00:00
|
|
|
|
2021-05-26 15:44:52 +00:00
|
|
|
if (block.rows() != 1)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row");
|
2021-02-16 08:26:24 +00:00
|
|
|
|
2021-05-26 15:44:52 +00:00
|
|
|
Block tmp_block;
|
2021-07-24 01:25:00 +00:00
|
|
|
while (tmp_block.rows() == 0 && executor.pull(tmp_block))
|
|
|
|
;
|
2021-02-16 08:26:24 +00:00
|
|
|
|
2021-05-26 15:44:52 +00:00
|
|
|
if (tmp_block.rows() != 0)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row");
|
2018-10-11 19:28:59 +00:00
|
|
|
}
|
|
|
|
|
2019-10-19 20:36:35 +00:00
|
|
|
block = materializeBlock(block);
|
|
|
|
size_t columns = block.columns();
|
|
|
|
|
|
|
|
if (columns == 1)
|
2021-07-16 13:38:35 +00:00
|
|
|
{
|
|
|
|
auto & column = block.getByPosition(0);
|
|
|
|
/// Here we wrap type to nullable if we can.
|
|
|
|
/// It is needed cause if subquery return no rows, it's result will be Null.
|
|
|
|
/// In case of many columns, do not check it cause tuple can't be nullable.
|
|
|
|
if (!column.type->isNullable() && column.type->canBeInsideNullable())
|
|
|
|
{
|
|
|
|
column.type = makeNullable(column.type);
|
|
|
|
column.column = makeNullable(column.column);
|
|
|
|
}
|
2019-10-19 20:36:35 +00:00
|
|
|
scalar = block;
|
2021-07-16 13:38:35 +00:00
|
|
|
}
|
2018-10-11 19:28:59 +00:00
|
|
|
else
|
2019-10-19 20:36:35 +00:00
|
|
|
{
|
2021-07-24 01:25:00 +00:00
|
|
|
scalar.insert({
|
|
|
|
ColumnTuple::create(block.getColumns()),
|
|
|
|
std::make_shared<DataTypeTuple>(block.getDataTypes()),
|
|
|
|
"tuple"});
|
2019-10-19 20:36:35 +00:00
|
|
|
}
|
2018-10-11 19:28:59 +00:00
|
|
|
}
|
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
const Settings & settings = data.getContext()->getSettingsRef();
|
2019-10-19 20:36:35 +00:00
|
|
|
|
|
|
|
// Always convert to literals when there is no query context.
|
2023-05-25 22:54:54 +00:00
|
|
|
if (data.only_analyze
|
|
|
|
|| !settings.enable_scalar_subquery_optimization
|
|
|
|
|| worthConvertingScalarToLiteral(scalar, data.max_literal_size)
|
2021-04-10 23:33:54 +00:00
|
|
|
|| !data.getContext()->hasQueryContext())
|
2018-10-11 19:28:59 +00:00
|
|
|
{
|
2021-07-16 22:11:44 +00:00
|
|
|
/// subquery and ast can be the same object and ast will be moved.
|
|
|
|
/// Save these fields to avoid use after move.
|
2021-07-16 14:17:22 +00:00
|
|
|
auto alias = subquery.alias;
|
|
|
|
auto prefer_alias_to_column_name = subquery.prefer_alias_to_column_name;
|
|
|
|
|
2019-10-19 20:36:35 +00:00
|
|
|
auto lit = std::make_unique<ASTLiteral>((*scalar.safeGetByPosition(0).column)[0]);
|
2021-07-16 14:17:22 +00:00
|
|
|
lit->alias = alias;
|
|
|
|
lit->prefer_alias_to_column_name = prefer_alias_to_column_name;
|
2019-10-19 20:36:35 +00:00
|
|
|
ast = addTypeConversionToAST(std::move(lit), scalar.safeGetByPosition(0).type->getName());
|
2020-12-23 19:36:10 +00:00
|
|
|
|
|
|
|
/// If only analyze was requested the expression is not suitable for constant folding, disable it.
|
|
|
|
if (data.only_analyze)
|
|
|
|
{
|
|
|
|
ast->as<ASTFunction>()->alias.clear();
|
|
|
|
auto func = makeASTFunction("identity", std::move(ast));
|
2021-07-16 14:17:22 +00:00
|
|
|
func->alias = alias;
|
|
|
|
func->prefer_alias_to_column_name = prefer_alias_to_column_name;
|
2020-12-23 19:36:10 +00:00
|
|
|
ast = std::move(func);
|
|
|
|
}
|
2018-10-11 19:28:59 +00:00
|
|
|
}
|
2023-05-25 22:54:54 +00:00
|
|
|
else if (!data.replace_only_to_literals)
|
2018-10-11 19:28:59 +00:00
|
|
|
{
|
2019-10-19 20:36:35 +00:00
|
|
|
auto func = makeASTFunction("__getScalar", std::make_shared<ASTLiteral>(scalar_query_hash_str));
|
|
|
|
func->alias = subquery.alias;
|
|
|
|
func->prefer_alias_to_column_name = subquery.prefer_alias_to_column_name;
|
|
|
|
ast = std::move(func);
|
2018-10-11 19:28:59 +00:00
|
|
|
}
|
2019-10-19 20:36:35 +00:00
|
|
|
|
2022-01-17 18:32:55 +00:00
|
|
|
if (is_local)
|
|
|
|
data.local_scalars[scalar_query_hash_str] = std::move(scalar);
|
|
|
|
else
|
|
|
|
data.scalars[scalar_query_hash_str] = std::move(scalar);
|
2018-10-11 19:28:59 +00:00
|
|
|
}
|
|
|
|
|
2019-02-22 13:33:56 +00:00
|
|
|
void ExecuteScalarSubqueriesMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data & data)
|
2018-10-11 19:28:59 +00:00
|
|
|
{
|
|
|
|
/// Don't descend into subqueries in arguments of IN operator.
|
2021-03-04 05:59:57 +00:00
|
|
|
/// But if an argument is not subquery, then deeper may be scalar subqueries and we need to descend in them.
|
2018-10-11 19:28:59 +00:00
|
|
|
|
2018-12-10 13:02:45 +00:00
|
|
|
std::vector<ASTPtr *> out;
|
2020-07-15 14:22:54 +00:00
|
|
|
if (checkFunctionIsInOrGlobalInOperator(func))
|
2018-10-11 19:28:59 +00:00
|
|
|
{
|
|
|
|
for (auto & child : ast->children)
|
|
|
|
{
|
2018-11-01 17:07:20 +00:00
|
|
|
if (child != func.arguments)
|
2018-12-10 13:02:45 +00:00
|
|
|
out.push_back(&child);
|
2018-10-11 19:28:59 +00:00
|
|
|
else
|
2018-11-01 17:07:20 +00:00
|
|
|
for (size_t i = 0, size = func.arguments->children.size(); i < size; ++i)
|
2019-03-11 13:22:51 +00:00
|
|
|
if (i != 1 || !func.arguments->children[i]->as<ASTSubquery>())
|
2018-12-10 13:02:45 +00:00
|
|
|
out.push_back(&func.arguments->children[i]);
|
2018-10-11 19:28:59 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
2018-12-10 13:02:45 +00:00
|
|
|
for (auto & child : ast->children)
|
|
|
|
out.push_back(&child);
|
|
|
|
|
2019-02-22 13:33:56 +00:00
|
|
|
for (ASTPtr * add_node : out)
|
2019-02-22 15:45:47 +00:00
|
|
|
Visitor(data).visit(*add_node);
|
2018-10-11 19:28:59 +00:00
|
|
|
}
|
|
|
|
|
2023-05-25 22:54:54 +00:00
|
|
|
static size_t getSizeOfSerializedLiteral(const Field & field)
|
|
|
|
{
|
|
|
|
auto field_str = applyVisitor(FieldVisitorToString(), field);
|
|
|
|
return field_str.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool worthConvertingScalarToLiteral(const Block & scalar, std::optional<size_t> max_literal_size)
|
|
|
|
{
|
|
|
|
/// Converting to literal values might take a fair amount of overhead when the value is large, (e.g.
|
|
|
|
/// Array, BitMap, etc.), This conversion is required for constant folding, index lookup, branch
|
|
|
|
/// elimination. However, these optimizations should never be related to large values, thus we blacklist them here.
|
|
|
|
const auto * scalar_type_name = scalar.safeGetByPosition(0).type->getFamilyName();
|
|
|
|
static const std::set<std::string_view> maybe_large_literal_types = {"Array", "Tuple", "AggregateFunction", "Function", "Set", "LowCardinality"};
|
|
|
|
|
|
|
|
if (!maybe_large_literal_types.contains(scalar_type_name))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (!max_literal_size)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/// Size of serialized literal cannot be less than size in bytes.
|
|
|
|
if (scalar.bytes() > *max_literal_size)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return getSizeOfSerializedLiteral((*scalar.safeGetByPosition(0).column)[0]) <= *max_literal_size;
|
|
|
|
}
|
|
|
|
|
2018-10-11 19:28:59 +00:00
|
|
|
}
|