fix dups in GLOBAL JOIN with asterisks

This commit is contained in:
chertus 2019-03-18 17:56:33 +03:00
parent bd559f8db8
commit 7561ff2ab7
7 changed files with 94 additions and 100 deletions

View File

@ -200,7 +200,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
if (storage) if (storage)
table_lock = storage->lockStructureForShare(false, context.getCurrentQueryId()); table_lock = storage->lockStructureForShare(false, context.getCurrentQueryId());
syntax_analyzer_result = SyntaxAnalyzer(context, options.subquery_depth).analyze( syntax_analyzer_result = SyntaxAnalyzer(context, options).analyze(
query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage); query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage);
query_analyzer = std::make_unique<ExpressionAnalyzer>( query_analyzer = std::make_unique<ExpressionAnalyzer>(
query_ptr, syntax_analyzer_result, context, NamesAndTypesList(), query_ptr, syntax_analyzer_result, context, NamesAndTypesList(),

View File

@ -26,12 +26,14 @@ struct SelectQueryOptions
size_t subquery_depth; size_t subquery_depth;
bool only_analyze; bool only_analyze;
bool modify_inplace; bool modify_inplace;
bool remove_duplicates;
SelectQueryOptions(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0) SelectQueryOptions(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0)
: to_stage(stage) : to_stage(stage)
, subquery_depth(depth) , subquery_depth(depth)
, only_analyze(false) , only_analyze(false)
, modify_inplace(false) , modify_inplace(false)
, remove_duplicates(false)
{} {}
SelectQueryOptions copy() const { return *this; } SelectQueryOptions copy() const { return *this; }
@ -58,6 +60,12 @@ struct SelectQueryOptions
SelectQueryOptions & noModify() { return modify(false); } SelectQueryOptions & noModify() { return modify(false); }
SelectQueryOptions & removeDuplicates(bool value = true)
{
remove_duplicates = value;
return *this;
}
SelectQueryOptions & noSubquery() SelectQueryOptions & noSubquery()
{ {
subquery_depth = 0; subquery_depth = 0;

View File

@ -125,22 +125,36 @@ bool hasArrayJoin(const ASTPtr & ast)
/// Sometimes we have to calculate more columns in SELECT clause than will be returned from query. /// Sometimes we have to calculate more columns in SELECT clause than will be returned from query.
/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result. /// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result.
void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns) /// Also we have to remove duplicates in case of GLOBAL subqueries. Their results are placed into tables so duplicates are inpossible.
void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups)
{ {
if (required_result_columns.empty())
return;
ASTs & elements = select_query->select_expression_list->children; ASTs & elements = select_query->select_expression_list->children;
std::map<String, size_t> required_columns_with_duplicate_count;
if (!required_result_columns.empty())
{
/// Some columns may be queried multiple times, like SELECT x, y, y FROM table.
for (const auto & name : required_result_columns)
{
if (remove_dups)
required_columns_with_duplicate_count[name] = 1;
else
++required_columns_with_duplicate_count[name];
}
}
else if (remove_dups)
{
/// Even if we have no requirements there could be duplicates cause of asterisks. SELECT *, t.*
for (const auto & elem : elements)
required_columns_with_duplicate_count.emplace(elem->getAliasOrColumnName(), 1);
}
else
return;
ASTs new_elements; ASTs new_elements;
new_elements.reserve(elements.size()); new_elements.reserve(elements.size());
/// Some columns may be queried multiple times, like SELECT x, y, y FROM table.
/// In that case we keep them exactly same number of times.
std::map<String, size_t> required_columns_with_duplicate_count;
for (const auto & name : required_result_columns)
++required_columns_with_duplicate_count[name];
for (const auto & elem : elements) for (const auto & elem : elements)
{ {
String name = elem->getAliasOrColumnName(); String name = elem->getAliasOrColumnName();
@ -688,7 +702,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
/// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost) /// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)
/// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations. /// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations.
if (select_query) if (select_query)
removeUnneededColumnsFromSelectClause(select_query, required_result_columns); removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates);
/// Executing scalar subqueries - replacing them with constant values. /// Executing scalar subqueries - replacing them with constant values.
executeScalarSubqueries(query, context, subquery_depth); executeScalarSubqueries(query, context, subquery_depth);

View File

@ -2,6 +2,7 @@
#include <Interpreters/AnalyzedJoin.h> #include <Interpreters/AnalyzedJoin.h>
#include <Interpreters/Aliases.h> #include <Interpreters/Aliases.h>
#include <Interpreters/SelectQueryOptions.h>
namespace DB namespace DB
{ {
@ -55,9 +56,10 @@ using SyntaxAnalyzerResultPtr = std::shared_ptr<const SyntaxAnalyzerResult>;
class SyntaxAnalyzer class SyntaxAnalyzer
{ {
public: public:
SyntaxAnalyzer(const Context & context_, size_t subquery_depth_ = 0) SyntaxAnalyzer(const Context & context_, const SelectQueryOptions & select_options = {})
: context(context_) : context(context_)
, subquery_depth(subquery_depth_) , subquery_depth(select_options.subquery_depth)
, remove_duplicates(select_options.remove_duplicates)
{} {}
SyntaxAnalyzerResultPtr analyze( SyntaxAnalyzerResultPtr analyze(
@ -69,6 +71,7 @@ public:
private: private:
const Context & context; const Context & context;
size_t subquery_depth; size_t subquery_depth;
bool remove_duplicates;
}; };
} }

View File

@ -41,6 +41,8 @@ std::shared_ptr<InterpreterSelectWithUnionQuery> interpretSubquery(
subquery_settings.extremes = 0; subquery_settings.extremes = 0;
subquery_context.setSettings(subquery_settings); subquery_context.setSettings(subquery_settings);
auto subquery_options = SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth).subquery();
ASTPtr query; ASTPtr query;
if (table || function) if (table || function)
{ {
@ -83,48 +85,10 @@ std::shared_ptr<InterpreterSelectWithUnionQuery> interpretSubquery(
else else
{ {
query = subquery->children.at(0); query = subquery->children.at(0);
subquery_options.removeDuplicates();
/** Columns with the same name can be specified in a subquery. For example, SELECT x, x FROM t
* This is bad, because the result of such a query can not be saved to the table, because the table can not have the same name columns.
* Saving to the table is required for GLOBAL subqueries.
*
* To avoid this situation, we will rename the same columns.
*/
std::set<std::string> all_column_names;
std::set<std::string> assigned_column_names;
if (const auto * select_with_union = query->as<ASTSelectWithUnionQuery>())
{
if (const auto * select = select_with_union->list_of_selects->children.at(0)->as<ASTSelectQuery>())
{
for (auto & expr : select->select_expression_list->children)
all_column_names.insert(expr->getAliasOrColumnName());
for (auto & expr : select->select_expression_list->children)
{
auto name = expr->getAliasOrColumnName();
if (!assigned_column_names.insert(name).second)
{
size_t i = 1;
while (all_column_names.end() != all_column_names.find(name + "_" + toString(i)))
++i;
name = name + "_" + toString(i);
expr = expr->clone(); /// Cancels fuse of the same expressions in the tree.
expr->setAlias(name);
all_column_names.insert(name);
assigned_column_names.insert(name);
}
}
}
}
} }
return std::make_shared<InterpreterSelectWithUnionQuery>( return std::make_shared<InterpreterSelectWithUnionQuery>(query, subquery_context, subquery_options, required_source_columns);
query, subquery_context, SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth + 1), required_source_columns);
} }
} }

View File

@ -1,2 +1,7 @@
1 1
0 0
0
0 0
0
0 0
0 0

View File

@ -35,49 +35,49 @@ GLOBAL INNER JOIN
) USING dummy; ) USING dummy;
-- SET asterisk_left_columns_only = 0; SET asterisk_left_columns_only = 0;
--
-- SELECT * FROM remote('127.0.0.2', system.one) SELECT * FROM remote('127.0.0.2', system.one)
-- GLOBAL INNER JOIN GLOBAL INNER JOIN
-- ( (
-- SELECT *, dummy SELECT *, dummy
-- FROM ( SELECT dummy FROM remote('127.0.0.2', system.one) ) t1 FROM ( SELECT dummy FROM remote('127.0.0.2', system.one) ) t1
-- GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2 GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2
-- USING dummy USING dummy
-- ) USING dummy; ) USING dummy;
--
-- SELECT * FROM remote('127.0.0.2', system.one) SELECT * FROM remote('127.0.0.2', system.one)
-- GLOBAL INNER JOIN GLOBAL INNER JOIN
-- ( (
-- SELECT *, t1.*, t2.* SELECT *, t1.*, t2.*
-- FROM ( SELECT toUInt8(1) AS dummy ) t1 FROM ( SELECT toUInt8(0) AS dummy ) t1
-- INNER JOIN ( SELECT toUInt8(1) AS dummy ) t2 INNER JOIN ( SELECT toUInt8(0) AS dummy ) t2
-- USING dummy USING dummy
-- ) USING dummy; ) USING dummy;
--
-- SELECT * FROM remote('127.0.0.2', system.one) SELECT * FROM remote('127.0.0.2', system.one)
-- GLOBAL INNER JOIN GLOBAL INNER JOIN
-- ( (
-- SELECT *, dummy SELECT *, dummy
-- FROM ( SELECT toUInt8(1) AS dummy ) t1 FROM ( SELECT toUInt8(0) AS dummy ) t1
-- INNER JOIN ( SELECT toUInt8(1) AS dummy ) t2 INNER JOIN ( SELECT toUInt8(0) AS dummy ) t2
-- USING dummy USING dummy
-- ) USING dummy; ) USING dummy;
--
-- SELECT * FROM remote('127.0.0.2', system.one) SELECT * FROM remote('127.0.0.2', system.one)
-- GLOBAL INNER JOIN GLOBAL INNER JOIN
-- ( (
-- SELECT * SELECT *, dummy as other
-- FROM ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t1 FROM ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t1
-- GLOBAL INNER JOIN ( SELECT toUInt8(1) AS dummy ) t2 GLOBAL INNER JOIN ( SELECT toUInt8(0) AS dummy ) t2
-- USING dummy USING dummy
-- ) USING dummy; ) USING dummy;
--
-- SELECT * FROM remote('127.0.0.2', system.one) SELECT * FROM remote('127.0.0.2', system.one)
-- GLOBAL INNER JOIN GLOBAL INNER JOIN
-- ( (
-- SELECT * SELECT *, dummy, dummy as other
-- FROM ( SELECT toUInt8(1) AS dummy ) t1 FROM ( SELECT toUInt8(0) AS dummy ) t1
-- GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2 GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2
-- USING dummy USING dummy
-- ) USING dummy; ) USING dummy;