2017-04-01 09:19:00 +00:00
|
|
|
#include <Interpreters/InJoinSubqueriesPreprocessor.h>
|
|
|
|
#include <Interpreters/Context.h>
|
2018-10-30 16:31:21 +00:00
|
|
|
#include <Interpreters/DatabaseAndTableWithAlias.h>
|
2019-01-25 15:42:24 +00:00
|
|
|
#include <Interpreters/IdentifierSemantic.h>
|
2019-04-11 19:29:28 +00:00
|
|
|
#include <Interpreters/InDepthNodeVisitor.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Storages/StorageDistributed.h>
|
2019-01-15 12:28:17 +00:00
|
|
|
#include <Parsers/ASTIdentifier.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Parsers/ASTSelectQuery.h>
|
|
|
|
#include <Parsers/ASTTablesInSelectQuery.h>
|
|
|
|
#include <Parsers/ASTFunction.h>
|
2017-07-13 20:58:19 +00:00
|
|
|
#include <Common/typeid_cast.h>
|
2017-01-04 02:37:47 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2020-07-22 15:37:59 +00:00
|
|
|
extern const int BAD_ARGUMENTS;
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const int DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED;
|
|
|
|
extern const int LOGICAL_ERROR;
|
2017-01-04 02:37:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
2019-04-11 19:29:28 +00:00
|
|
|
StoragePtr tryGetTable(const ASTPtr & database_and_table, const Context & context)
|
2017-01-04 02:37:47 +00:00
|
|
|
{
|
2020-03-13 10:30:55 +00:00
|
|
|
auto table_id = context.resolveStorageID(database_and_table);
|
2020-05-28 23:01:18 +00:00
|
|
|
return DatabaseCatalog::instance().tryGetTable(table_id, context);
|
2019-04-11 19:29:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
using CheckShardsAndTables = InJoinSubqueriesPreprocessor::CheckShardsAndTables;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-04-11 19:29:28 +00:00
|
|
|
struct NonGlobalTableData
|
|
|
|
{
|
|
|
|
using TypeToVisit = ASTTableExpression;
|
|
|
|
|
|
|
|
const CheckShardsAndTables & checker;
|
|
|
|
const Context & context;
|
2020-03-27 20:12:14 +00:00
|
|
|
std::vector<ASTPtr> & renamed_tables;
|
2019-04-11 19:29:28 +00:00
|
|
|
ASTFunction * function = nullptr;
|
|
|
|
ASTTableJoin * table_join = nullptr;
|
|
|
|
|
|
|
|
void visit(ASTTableExpression & node, ASTPtr &)
|
|
|
|
{
|
|
|
|
ASTPtr & database_and_table = node.database_and_table_name;
|
|
|
|
if (database_and_table)
|
|
|
|
renameIfNeeded(database_and_table);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2019-04-11 19:29:28 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
void renameIfNeeded(ASTPtr & database_and_table)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2020-07-16 22:08:44 +00:00
|
|
|
const DistributedProductMode distributed_product_mode = context.getSettingsRef().distributed_product_mode;
|
2019-04-11 19:29:28 +00:00
|
|
|
|
|
|
|
StoragePtr storage = tryGetTable(database_and_table, context);
|
|
|
|
if (!storage || !checker.hasAtLeastTwoShards(*storage))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (distributed_product_mode == DistributedProductMode::DENY)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2019-04-11 19:29:28 +00:00
|
|
|
throw Exception("Double-distributed IN/JOIN subqueries is denied (distributed_product_mode = 'deny')."
|
|
|
|
" You may rewrite query to use local tables in subqueries, or use GLOBAL keyword, or set distributed_product_mode to suitable value.",
|
|
|
|
ErrorCodes::DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED);
|
|
|
|
}
|
|
|
|
else if (distributed_product_mode == DistributedProductMode::GLOBAL)
|
|
|
|
{
|
|
|
|
if (function)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2019-04-11 19:29:28 +00:00
|
|
|
auto * concrete = function->as<ASTFunction>();
|
|
|
|
|
|
|
|
if (concrete->name == "in")
|
|
|
|
concrete->name = "globalIn";
|
|
|
|
else if (concrete->name == "notIn")
|
|
|
|
concrete->name = "globalNotIn";
|
|
|
|
else if (concrete->name == "globalIn" || concrete->name == "globalNotIn")
|
|
|
|
{
|
|
|
|
/// Already processed.
|
|
|
|
}
|
|
|
|
else
|
|
|
|
throw Exception("Logical error: unexpected function name " + concrete->name, ErrorCodes::LOGICAL_ERROR);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2019-04-11 19:29:28 +00:00
|
|
|
else if (table_join)
|
|
|
|
table_join->locality = ASTTableJoin::Locality::Global;
|
|
|
|
else
|
|
|
|
throw Exception("Logical error: unexpected AST node", ErrorCodes::LOGICAL_ERROR);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2019-04-11 19:29:28 +00:00
|
|
|
else if (distributed_product_mode == DistributedProductMode::LOCAL)
|
|
|
|
{
|
|
|
|
/// Convert distributed table to corresponding remote table.
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-04-11 19:29:28 +00:00
|
|
|
std::string database;
|
|
|
|
std::string table;
|
|
|
|
std::tie(database, table) = checker.getRemoteDatabaseAndTableName(*storage);
|
|
|
|
|
|
|
|
String alias = database_and_table->tryGetAlias();
|
|
|
|
if (alias.empty())
|
2020-03-27 20:12:14 +00:00
|
|
|
throw Exception("Distributed table should have an alias when distributed_product_mode set to local",
|
2019-04-11 19:29:28 +00:00
|
|
|
ErrorCodes::DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED);
|
|
|
|
|
2020-03-26 09:07:10 +00:00
|
|
|
auto & identifier = database_and_table->as<ASTIdentifier &>();
|
2020-03-27 20:12:14 +00:00
|
|
|
renamed_tables.emplace_back(identifier.clone());
|
2020-03-26 09:07:10 +00:00
|
|
|
identifier.resetTable(database, table);
|
2019-04-11 19:29:28 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
throw Exception("InJoinSubqueriesPreprocessor: unexpected value of 'distributed_product_mode' setting",
|
|
|
|
ErrorCodes::LOGICAL_ERROR);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2019-04-11 19:29:28 +00:00
|
|
|
};
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-04-11 19:29:28 +00:00
|
|
|
using NonGlobalTableMatcher = OneTypeMatcher<NonGlobalTableData>;
|
|
|
|
using NonGlobalTableVisitor = InDepthNodeVisitor<NonGlobalTableMatcher, true>;
|
2017-01-04 02:37:47 +00:00
|
|
|
|
|
|
|
|
2019-04-11 19:29:28 +00:00
|
|
|
class NonGlobalSubqueryMatcher
|
2017-01-04 02:37:47 +00:00
|
|
|
{
|
2019-04-11 19:29:28 +00:00
|
|
|
public:
|
|
|
|
struct Data
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2019-04-11 19:29:28 +00:00
|
|
|
const CheckShardsAndTables & checker;
|
|
|
|
const Context & context;
|
2020-03-27 20:12:14 +00:00
|
|
|
std::vector<std::pair<ASTPtr, std::vector<ASTPtr>>> & renamed_tables;
|
2019-04-11 19:29:28 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static void visit(ASTPtr & node, Data & data)
|
|
|
|
{
|
|
|
|
if (auto * function = node->as<ASTFunction>())
|
|
|
|
visit(*function, node, data);
|
|
|
|
if (const auto * tables = node->as<ASTTablesInSelectQueryElement>())
|
|
|
|
visit(*tables, node, data);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2019-04-11 19:29:28 +00:00
|
|
|
static bool needChildVisit(ASTPtr & node, const ASTPtr & child)
|
|
|
|
{
|
|
|
|
if (auto * function = node->as<ASTFunction>())
|
|
|
|
if (function->name == "in" || function->name == "notIn")
|
|
|
|
return false; /// Processed, process others
|
|
|
|
|
|
|
|
if (const auto * t = node->as<ASTTablesInSelectQueryElement>())
|
|
|
|
if (t->table_join && t->table_expression)
|
|
|
|
return false; /// Processed, process others
|
|
|
|
|
|
|
|
/// Descent into all children, but not into subqueries of other kind (scalar subqueries), that are irrelevant to us.
|
2020-03-09 00:08:02 +00:00
|
|
|
return !child->as<ASTSelectQuery>();
|
2019-04-11 19:29:28 +00:00
|
|
|
}
|
2017-01-04 02:37:47 +00:00
|
|
|
|
2019-04-11 19:29:28 +00:00
|
|
|
private:
|
|
|
|
static void visit(ASTFunction & node, ASTPtr &, Data & data)
|
|
|
|
{
|
|
|
|
if (node.name == "in" || node.name == "notIn")
|
|
|
|
{
|
2020-07-22 15:37:59 +00:00
|
|
|
if (node.arguments->children.size() != 2)
|
|
|
|
{
|
|
|
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
|
|
|
"Function '{}' expects two arguments, given: '{}'",
|
|
|
|
node.name, node.formatForErrorMessage());
|
|
|
|
}
|
2019-04-11 19:29:28 +00:00
|
|
|
auto & subquery = node.arguments->children.at(1);
|
2020-03-27 20:12:14 +00:00
|
|
|
std::vector<ASTPtr> renamed;
|
|
|
|
NonGlobalTableVisitor::Data table_data{data.checker, data.context, renamed, &node, nullptr};
|
2019-04-11 19:29:28 +00:00
|
|
|
NonGlobalTableVisitor(table_data).visit(subquery);
|
2020-03-27 20:12:14 +00:00
|
|
|
if (!renamed.empty())
|
|
|
|
data.renamed_tables.emplace_back(subquery, std::move(renamed));
|
2019-04-11 19:29:28 +00:00
|
|
|
}
|
|
|
|
}
|
2017-01-04 02:37:47 +00:00
|
|
|
|
2019-04-11 19:29:28 +00:00
|
|
|
static void visit(const ASTTablesInSelectQueryElement & node, ASTPtr &, Data & data)
|
|
|
|
{
|
|
|
|
if (!node.table_join || !node.table_expression)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ASTTableJoin * table_join = node.table_join->as<ASTTableJoin>();
|
|
|
|
if (table_join->locality != ASTTableJoin::Locality::Global)
|
|
|
|
{
|
|
|
|
if (auto & subquery = node.table_expression->as<ASTTableExpression>()->subquery)
|
|
|
|
{
|
2020-03-27 20:12:14 +00:00
|
|
|
std::vector<ASTPtr> renamed;
|
|
|
|
NonGlobalTableVisitor::Data table_data{data.checker, data.context, renamed, nullptr, table_join};
|
2019-04-11 19:29:28 +00:00
|
|
|
NonGlobalTableVisitor(table_data).visit(subquery);
|
2020-03-27 20:12:14 +00:00
|
|
|
if (!renamed.empty())
|
|
|
|
data.renamed_tables.emplace_back(subquery, std::move(renamed));
|
2019-04-11 19:29:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
using NonGlobalSubqueryVisitor = InDepthNodeVisitor<NonGlobalSubqueryMatcher, true>;
|
2017-01-04 02:37:47 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-04-11 19:29:28 +00:00
|
|
|
void InJoinSubqueriesPreprocessor::visit(ASTPtr & ast) const
|
2017-01-04 02:37:47 +00:00
|
|
|
{
|
2019-04-11 19:29:28 +00:00
|
|
|
if (!ast)
|
2017-04-01 07:20:54 +00:00
|
|
|
return;
|
|
|
|
|
2019-04-11 19:29:28 +00:00
|
|
|
ASTSelectQuery * query = ast->as<ASTSelectQuery>();
|
|
|
|
if (!query || !query->tables())
|
2017-04-01 07:20:54 +00:00
|
|
|
return;
|
|
|
|
|
2019-04-11 19:29:28 +00:00
|
|
|
if (context.getSettingsRef().distributed_product_mode == DistributedProductMode::ALLOW)
|
2017-04-01 07:20:54 +00:00
|
|
|
return;
|
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
const auto & tables_in_select_query = query->tables()->as<ASTTablesInSelectQuery &>();
|
2019-03-15 16:14:13 +00:00
|
|
|
if (tables_in_select_query.children.empty())
|
2017-04-01 07:20:54 +00:00
|
|
|
return;
|
|
|
|
|
2019-03-15 16:14:13 +00:00
|
|
|
const auto & tables_element = tables_in_select_query.children[0]->as<ASTTablesInSelectQueryElement &>();
|
|
|
|
if (!tables_element.table_expression)
|
2017-04-01 07:20:54 +00:00
|
|
|
return;
|
|
|
|
|
2019-03-15 16:14:13 +00:00
|
|
|
const auto * table_expression = tables_element.table_expression->as<ASTTableExpression>();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
/// If not ordinary table, skip it.
|
2018-06-04 14:17:24 +00:00
|
|
|
if (!table_expression->database_and_table_name)
|
2017-04-01 07:20:54 +00:00
|
|
|
return;
|
|
|
|
|
|
|
|
/// If not really distributed table, skip it.
|
2018-08-27 17:18:14 +00:00
|
|
|
{
|
|
|
|
StoragePtr storage = tryGetTable(table_expression->database_and_table_name, context);
|
2019-04-11 19:29:28 +00:00
|
|
|
if (!storage || !checker->hasAtLeastTwoShards(*storage))
|
2018-08-27 17:18:14 +00:00
|
|
|
return;
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-03-27 20:12:14 +00:00
|
|
|
NonGlobalSubqueryVisitor::Data visitor_data{*checker, context, renamed_tables};
|
2019-04-11 19:29:28 +00:00
|
|
|
NonGlobalSubqueryVisitor(visitor_data).visit(ast);
|
2017-01-04 02:37:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-04-11 19:29:28 +00:00
|
|
|
bool InJoinSubqueriesPreprocessor::CheckShardsAndTables::hasAtLeastTwoShards(const IStorage & table) const
|
2017-01-04 02:37:47 +00:00
|
|
|
{
|
2017-11-04 16:46:14 +00:00
|
|
|
const StorageDistributed * distributed = dynamic_cast<const StorageDistributed *>(&table);
|
2017-04-01 07:20:54 +00:00
|
|
|
if (!distributed)
|
|
|
|
return false;
|
2017-01-04 02:37:47 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
return distributed->getShardCount() >= 2;
|
2017-01-04 02:37:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::pair<std::string, std::string>
|
2019-04-11 19:29:28 +00:00
|
|
|
InJoinSubqueriesPreprocessor::CheckShardsAndTables::getRemoteDatabaseAndTableName(const IStorage & table) const
|
2017-01-04 02:37:47 +00:00
|
|
|
{
|
2017-11-04 16:46:14 +00:00
|
|
|
const StorageDistributed & distributed = dynamic_cast<const StorageDistributed &>(table);
|
2017-04-01 07:20:54 +00:00
|
|
|
return { distributed.getRemoteDatabaseName(), distributed.getRemoteTableName() };
|
2017-01-04 02:37:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
}
|