removeJoin: remove joined columns

This commit is contained in:
Azat Khuzhin 2021-05-30 17:43:54 +03:00
parent 25f3efde2b
commit 578ecc1645
3 changed files with 57 additions and 57 deletions

View File

@ -1,5 +1,7 @@
#include <Interpreters/getHeaderForProcessingStage.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Storages/IStorage.h>
#include <DataStreams/OneBlockInputStream.h>
#include <Parsers/ASTTablesInSelectQuery.h>
@ -23,16 +25,58 @@ bool hasJoin(const ASTSelectQuery & select)
}
/// Rewrite original query removing joined tables from it
bool removeJoin(ASTSelectQuery & select)
bool removeJoin(ASTSelectQuery & select, TreeRewriterResult & rewriter_result, ContextPtr context)
{
if (hasJoin(select))
if (!hasJoin(select))
return false;
select.tables()->children.resize(1);
/// Also remove GROUP BY cause ExpressionAnalyzer would check if it has all aggregate columns but joined columns would be missed.
select.setExpression(ASTSelectQuery::Expression::GROUP_BY, {});
rewriter_result.aggregates.clear();
/// Replace select list to remove joined columns
auto select_list = std::make_shared<ASTExpressionList>();
for (const auto & column : rewriter_result.required_source_columns)
select_list->children.emplace_back(std::make_shared<ASTIdentifier>(column.name));
select.setExpression(ASTSelectQuery::Expression::SELECT, select_list);
const DB::IdentifierMembershipCollector membership_collector{select, context};
/// Remove unknown identifiers from where, leave only ones from left table
auto replace_where = [&membership_collector](ASTSelectQuery & query, ASTSelectQuery::Expression expr)
{
/// The most simple temporary solution: leave only the first table in query.
/// TODO: we also need to remove joined columns and related functions (taking in account aliases if any).
select.tables()->children.resize(1);
return true;
}
return false;
auto where = query.getExpression(expr, false);
if (!where)
return;
const size_t left_table_pos = 0;
/// Test each argument of `and` function and select ones related to only left table
std::shared_ptr<ASTFunction> new_conj = makeASTFunction("and");
for (const auto & node : collectConjunctions(where))
{
if (membership_collector.getIdentsMembership(node) == left_table_pos)
new_conj->arguments->children.push_back(std::move(node));
}
if (new_conj->arguments->children.empty())
/// No identifiers from left table
query.setExpression(expr, {});
else if (new_conj->arguments->children.size() == 1)
/// Only one expression, lift from `and`
query.setExpression(expr, std::move(new_conj->arguments->children[0]));
else
/// Set new expression
query.setExpression(expr, std::move(new_conj));
};
replace_where(select, ASTSelectQuery::Expression::WHERE);
replace_where(select, ASTSelectQuery::Expression::PREWHERE);
select.setExpression(ASTSelectQuery::Expression::HAVING, {});
select.setExpression(ASTSelectQuery::Expression::ORDER_BY, {});
return true;
}
Block getHeaderForProcessingStage(
@ -72,7 +116,8 @@ Block getHeaderForProcessingStage(
case QueryProcessingStage::MAX:
{
auto query = query_info.query->clone();
removeJoin(*query->as<ASTSelectQuery>());
TreeRewriterResult new_rewriter_result = *query_info.syntax_analyzer_result;
removeJoin(*query->as<ASTSelectQuery>(), new_rewriter_result, context);
auto stream = std::make_shared<OneBlockInputStream>(
metadata_snapshot->getSampleBlockForColumns(column_names, storage.getVirtuals(), storage.getStorageID()));

View File

@ -13,10 +13,11 @@ class IStorage;
struct StorageInMemoryMetadata;
using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
struct SelectQueryInfo;
struct TreeRewriterResult;
class ASTSelectQuery;
bool hasJoin(const ASTSelectQuery & select);
bool removeJoin(ASTSelectQuery & select);
bool removeJoin(ASTSelectQuery & select, TreeRewriterResult & rewriter_result, ContextPtr context);
Block getHeaderForProcessingStage(
const IStorage & storage,

View File

@ -46,54 +46,8 @@ namespace
TreeRewriterResult modifySelect(ASTSelectQuery & select, const TreeRewriterResult & rewriter_result, ContextPtr context)
{
TreeRewriterResult new_rewriter_result = rewriter_result;
if (removeJoin(select))
{
/// Also remove GROUP BY cause ExpressionAnalyzer would check if it has all aggregate columns but joined columns would be missed.
select.setExpression(ASTSelectQuery::Expression::GROUP_BY, {});
new_rewriter_result.aggregates.clear();
/// Replace select list to remove joined columns
auto select_list = std::make_shared<ASTExpressionList>();
for (const auto & column : rewriter_result.required_source_columns)
select_list->children.emplace_back(std::make_shared<ASTIdentifier>(column.name));
select.setExpression(ASTSelectQuery::Expression::SELECT, select_list);
const DB::IdentifierMembershipCollector membership_collector{select, context};
/// Remove unknown identifiers from where, leave only ones from left table
auto replace_where = [&membership_collector](ASTSelectQuery & query, ASTSelectQuery::Expression expr)
{
auto where = query.getExpression(expr, false);
if (!where)
return;
const size_t left_table_pos = 0;
/// Test each argument of `and` function and select ones related to only left table
std::shared_ptr<ASTFunction> new_conj = makeASTFunction("and");
for (const auto & node : collectConjunctions(where))
{
if (membership_collector.getIdentsMembership(node) == left_table_pos)
new_conj->arguments->children.push_back(std::move(node));
}
if (new_conj->arguments->children.empty())
/// No identifiers from left table
query.setExpression(expr, {});
else if (new_conj->arguments->children.size() == 1)
/// Only one expression, lift from `and`
query.setExpression(expr, std::move(new_conj->arguments->children[0]));
else
/// Set new expression
query.setExpression(expr, std::move(new_conj));
};
replace_where(select,ASTSelectQuery::Expression::WHERE);
replace_where(select,ASTSelectQuery::Expression::PREWHERE);
select.setExpression(ASTSelectQuery::Expression::HAVING, {});
select.setExpression(ASTSelectQuery::Expression::ORDER_BY, {});
}
removeJoin(select, new_rewriter_result, context);
return new_rewriter_result;
}