mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
fix dups in GLOBAL JOIN with asterisks
This commit is contained in:
parent
bd559f8db8
commit
7561ff2ab7
@ -200,7 +200,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
|||||||
if (storage)
|
if (storage)
|
||||||
table_lock = storage->lockStructureForShare(false, context.getCurrentQueryId());
|
table_lock = storage->lockStructureForShare(false, context.getCurrentQueryId());
|
||||||
|
|
||||||
syntax_analyzer_result = SyntaxAnalyzer(context, options.subquery_depth).analyze(
|
syntax_analyzer_result = SyntaxAnalyzer(context, options).analyze(
|
||||||
query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage);
|
query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage);
|
||||||
query_analyzer = std::make_unique<ExpressionAnalyzer>(
|
query_analyzer = std::make_unique<ExpressionAnalyzer>(
|
||||||
query_ptr, syntax_analyzer_result, context, NamesAndTypesList(),
|
query_ptr, syntax_analyzer_result, context, NamesAndTypesList(),
|
||||||
|
@ -26,12 +26,14 @@ struct SelectQueryOptions
|
|||||||
size_t subquery_depth;
|
size_t subquery_depth;
|
||||||
bool only_analyze;
|
bool only_analyze;
|
||||||
bool modify_inplace;
|
bool modify_inplace;
|
||||||
|
bool remove_duplicates;
|
||||||
|
|
||||||
SelectQueryOptions(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0)
|
SelectQueryOptions(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0)
|
||||||
: to_stage(stage)
|
: to_stage(stage)
|
||||||
, subquery_depth(depth)
|
, subquery_depth(depth)
|
||||||
, only_analyze(false)
|
, only_analyze(false)
|
||||||
, modify_inplace(false)
|
, modify_inplace(false)
|
||||||
|
, remove_duplicates(false)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
SelectQueryOptions copy() const { return *this; }
|
SelectQueryOptions copy() const { return *this; }
|
||||||
@ -58,6 +60,12 @@ struct SelectQueryOptions
|
|||||||
|
|
||||||
SelectQueryOptions & noModify() { return modify(false); }
|
SelectQueryOptions & noModify() { return modify(false); }
|
||||||
|
|
||||||
|
SelectQueryOptions & removeDuplicates(bool value = true)
|
||||||
|
{
|
||||||
|
remove_duplicates = value;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
SelectQueryOptions & noSubquery()
|
SelectQueryOptions & noSubquery()
|
||||||
{
|
{
|
||||||
subquery_depth = 0;
|
subquery_depth = 0;
|
||||||
|
@ -125,22 +125,36 @@ bool hasArrayJoin(const ASTPtr & ast)
|
|||||||
|
|
||||||
/// Sometimes we have to calculate more columns in SELECT clause than will be returned from query.
|
/// Sometimes we have to calculate more columns in SELECT clause than will be returned from query.
|
||||||
/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result.
|
/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result.
|
||||||
void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns)
|
/// Also we have to remove duplicates in case of GLOBAL subqueries. Their results are placed into tables so duplicates are inpossible.
|
||||||
|
void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups)
|
||||||
{
|
{
|
||||||
if (required_result_columns.empty())
|
|
||||||
return;
|
|
||||||
|
|
||||||
ASTs & elements = select_query->select_expression_list->children;
|
ASTs & elements = select_query->select_expression_list->children;
|
||||||
|
|
||||||
|
std::map<String, size_t> required_columns_with_duplicate_count;
|
||||||
|
|
||||||
|
if (!required_result_columns.empty())
|
||||||
|
{
|
||||||
|
/// Some columns may be queried multiple times, like SELECT x, y, y FROM table.
|
||||||
|
for (const auto & name : required_result_columns)
|
||||||
|
{
|
||||||
|
if (remove_dups)
|
||||||
|
required_columns_with_duplicate_count[name] = 1;
|
||||||
|
else
|
||||||
|
++required_columns_with_duplicate_count[name];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (remove_dups)
|
||||||
|
{
|
||||||
|
/// Even if we have no requirements there could be duplicates cause of asterisks. SELECT *, t.*
|
||||||
|
for (const auto & elem : elements)
|
||||||
|
required_columns_with_duplicate_count.emplace(elem->getAliasOrColumnName(), 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return;
|
||||||
|
|
||||||
ASTs new_elements;
|
ASTs new_elements;
|
||||||
new_elements.reserve(elements.size());
|
new_elements.reserve(elements.size());
|
||||||
|
|
||||||
/// Some columns may be queried multiple times, like SELECT x, y, y FROM table.
|
|
||||||
/// In that case we keep them exactly same number of times.
|
|
||||||
std::map<String, size_t> required_columns_with_duplicate_count;
|
|
||||||
for (const auto & name : required_result_columns)
|
|
||||||
++required_columns_with_duplicate_count[name];
|
|
||||||
|
|
||||||
for (const auto & elem : elements)
|
for (const auto & elem : elements)
|
||||||
{
|
{
|
||||||
String name = elem->getAliasOrColumnName();
|
String name = elem->getAliasOrColumnName();
|
||||||
@ -688,7 +702,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
|
|||||||
/// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)
|
/// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)
|
||||||
/// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations.
|
/// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations.
|
||||||
if (select_query)
|
if (select_query)
|
||||||
removeUnneededColumnsFromSelectClause(select_query, required_result_columns);
|
removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates);
|
||||||
|
|
||||||
/// Executing scalar subqueries - replacing them with constant values.
|
/// Executing scalar subqueries - replacing them with constant values.
|
||||||
executeScalarSubqueries(query, context, subquery_depth);
|
executeScalarSubqueries(query, context, subquery_depth);
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
#include <Interpreters/AnalyzedJoin.h>
|
#include <Interpreters/AnalyzedJoin.h>
|
||||||
#include <Interpreters/Aliases.h>
|
#include <Interpreters/Aliases.h>
|
||||||
|
#include <Interpreters/SelectQueryOptions.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -55,9 +56,10 @@ using SyntaxAnalyzerResultPtr = std::shared_ptr<const SyntaxAnalyzerResult>;
|
|||||||
class SyntaxAnalyzer
|
class SyntaxAnalyzer
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
SyntaxAnalyzer(const Context & context_, size_t subquery_depth_ = 0)
|
SyntaxAnalyzer(const Context & context_, const SelectQueryOptions & select_options = {})
|
||||||
: context(context_)
|
: context(context_)
|
||||||
, subquery_depth(subquery_depth_)
|
, subquery_depth(select_options.subquery_depth)
|
||||||
|
, remove_duplicates(select_options.remove_duplicates)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
SyntaxAnalyzerResultPtr analyze(
|
SyntaxAnalyzerResultPtr analyze(
|
||||||
@ -69,6 +71,7 @@ public:
|
|||||||
private:
|
private:
|
||||||
const Context & context;
|
const Context & context;
|
||||||
size_t subquery_depth;
|
size_t subquery_depth;
|
||||||
|
bool remove_duplicates;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -41,6 +41,8 @@ std::shared_ptr<InterpreterSelectWithUnionQuery> interpretSubquery(
|
|||||||
subquery_settings.extremes = 0;
|
subquery_settings.extremes = 0;
|
||||||
subquery_context.setSettings(subquery_settings);
|
subquery_context.setSettings(subquery_settings);
|
||||||
|
|
||||||
|
auto subquery_options = SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth).subquery();
|
||||||
|
|
||||||
ASTPtr query;
|
ASTPtr query;
|
||||||
if (table || function)
|
if (table || function)
|
||||||
{
|
{
|
||||||
@ -83,48 +85,10 @@ std::shared_ptr<InterpreterSelectWithUnionQuery> interpretSubquery(
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
query = subquery->children.at(0);
|
query = subquery->children.at(0);
|
||||||
|
subquery_options.removeDuplicates();
|
||||||
/** Columns with the same name can be specified in a subquery. For example, SELECT x, x FROM t
|
|
||||||
* This is bad, because the result of such a query can not be saved to the table, because the table can not have the same name columns.
|
|
||||||
* Saving to the table is required for GLOBAL subqueries.
|
|
||||||
*
|
|
||||||
* To avoid this situation, we will rename the same columns.
|
|
||||||
*/
|
|
||||||
|
|
||||||
std::set<std::string> all_column_names;
|
|
||||||
std::set<std::string> assigned_column_names;
|
|
||||||
|
|
||||||
if (const auto * select_with_union = query->as<ASTSelectWithUnionQuery>())
|
|
||||||
{
|
|
||||||
if (const auto * select = select_with_union->list_of_selects->children.at(0)->as<ASTSelectQuery>())
|
|
||||||
{
|
|
||||||
for (auto & expr : select->select_expression_list->children)
|
|
||||||
all_column_names.insert(expr->getAliasOrColumnName());
|
|
||||||
|
|
||||||
for (auto & expr : select->select_expression_list->children)
|
|
||||||
{
|
|
||||||
auto name = expr->getAliasOrColumnName();
|
|
||||||
|
|
||||||
if (!assigned_column_names.insert(name).second)
|
|
||||||
{
|
|
||||||
size_t i = 1;
|
|
||||||
while (all_column_names.end() != all_column_names.find(name + "_" + toString(i)))
|
|
||||||
++i;
|
|
||||||
|
|
||||||
name = name + "_" + toString(i);
|
|
||||||
expr = expr->clone(); /// Cancels fuse of the same expressions in the tree.
|
|
||||||
expr->setAlias(name);
|
|
||||||
|
|
||||||
all_column_names.insert(name);
|
|
||||||
assigned_column_names.insert(name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::make_shared<InterpreterSelectWithUnionQuery>(
|
return std::make_shared<InterpreterSelectWithUnionQuery>(query, subquery_context, subquery_options, required_source_columns);
|
||||||
query, subquery_context, SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth + 1), required_source_columns);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,2 +1,7 @@
|
|||||||
1
|
1
|
||||||
0
|
0
|
||||||
|
0
|
||||||
|
0 0
|
||||||
|
0
|
||||||
|
0 0
|
||||||
|
0 0
|
||||||
|
@ -35,49 +35,49 @@ GLOBAL INNER JOIN
|
|||||||
) USING dummy;
|
) USING dummy;
|
||||||
|
|
||||||
|
|
||||||
-- SET asterisk_left_columns_only = 0;
|
SET asterisk_left_columns_only = 0;
|
||||||
--
|
|
||||||
-- SELECT * FROM remote('127.0.0.2', system.one)
|
SELECT * FROM remote('127.0.0.2', system.one)
|
||||||
-- GLOBAL INNER JOIN
|
GLOBAL INNER JOIN
|
||||||
-- (
|
(
|
||||||
-- SELECT *, dummy
|
SELECT *, dummy
|
||||||
-- FROM ( SELECT dummy FROM remote('127.0.0.2', system.one) ) t1
|
FROM ( SELECT dummy FROM remote('127.0.0.2', system.one) ) t1
|
||||||
-- GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2
|
GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2
|
||||||
-- USING dummy
|
USING dummy
|
||||||
-- ) USING dummy;
|
) USING dummy;
|
||||||
--
|
|
||||||
-- SELECT * FROM remote('127.0.0.2', system.one)
|
SELECT * FROM remote('127.0.0.2', system.one)
|
||||||
-- GLOBAL INNER JOIN
|
GLOBAL INNER JOIN
|
||||||
-- (
|
(
|
||||||
-- SELECT *, t1.*, t2.*
|
SELECT *, t1.*, t2.*
|
||||||
-- FROM ( SELECT toUInt8(1) AS dummy ) t1
|
FROM ( SELECT toUInt8(0) AS dummy ) t1
|
||||||
-- INNER JOIN ( SELECT toUInt8(1) AS dummy ) t2
|
INNER JOIN ( SELECT toUInt8(0) AS dummy ) t2
|
||||||
-- USING dummy
|
USING dummy
|
||||||
-- ) USING dummy;
|
) USING dummy;
|
||||||
--
|
|
||||||
-- SELECT * FROM remote('127.0.0.2', system.one)
|
SELECT * FROM remote('127.0.0.2', system.one)
|
||||||
-- GLOBAL INNER JOIN
|
GLOBAL INNER JOIN
|
||||||
-- (
|
(
|
||||||
-- SELECT *, dummy
|
SELECT *, dummy
|
||||||
-- FROM ( SELECT toUInt8(1) AS dummy ) t1
|
FROM ( SELECT toUInt8(0) AS dummy ) t1
|
||||||
-- INNER JOIN ( SELECT toUInt8(1) AS dummy ) t2
|
INNER JOIN ( SELECT toUInt8(0) AS dummy ) t2
|
||||||
-- USING dummy
|
USING dummy
|
||||||
-- ) USING dummy;
|
) USING dummy;
|
||||||
--
|
|
||||||
-- SELECT * FROM remote('127.0.0.2', system.one)
|
SELECT * FROM remote('127.0.0.2', system.one)
|
||||||
-- GLOBAL INNER JOIN
|
GLOBAL INNER JOIN
|
||||||
-- (
|
(
|
||||||
-- SELECT *
|
SELECT *, dummy as other
|
||||||
-- FROM ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t1
|
FROM ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t1
|
||||||
-- GLOBAL INNER JOIN ( SELECT toUInt8(1) AS dummy ) t2
|
GLOBAL INNER JOIN ( SELECT toUInt8(0) AS dummy ) t2
|
||||||
-- USING dummy
|
USING dummy
|
||||||
-- ) USING dummy;
|
) USING dummy;
|
||||||
--
|
|
||||||
-- SELECT * FROM remote('127.0.0.2', system.one)
|
SELECT * FROM remote('127.0.0.2', system.one)
|
||||||
-- GLOBAL INNER JOIN
|
GLOBAL INNER JOIN
|
||||||
-- (
|
(
|
||||||
-- SELECT *
|
SELECT *, dummy, dummy as other
|
||||||
-- FROM ( SELECT toUInt8(1) AS dummy ) t1
|
FROM ( SELECT toUInt8(0) AS dummy ) t1
|
||||||
-- GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2
|
GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2
|
||||||
-- USING dummy
|
USING dummy
|
||||||
-- ) USING dummy;
|
) USING dummy;
|
||||||
|
Loading…
Reference in New Issue
Block a user