mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
fix dups in GLOBAL JOIN with asterisks
This commit is contained in:
parent
bd559f8db8
commit
7561ff2ab7
@ -200,7 +200,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
if (storage)
|
||||
table_lock = storage->lockStructureForShare(false, context.getCurrentQueryId());
|
||||
|
||||
syntax_analyzer_result = SyntaxAnalyzer(context, options.subquery_depth).analyze(
|
||||
syntax_analyzer_result = SyntaxAnalyzer(context, options).analyze(
|
||||
query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage);
|
||||
query_analyzer = std::make_unique<ExpressionAnalyzer>(
|
||||
query_ptr, syntax_analyzer_result, context, NamesAndTypesList(),
|
||||
|
@ -26,12 +26,14 @@ struct SelectQueryOptions
|
||||
size_t subquery_depth;
|
||||
bool only_analyze;
|
||||
bool modify_inplace;
|
||||
bool remove_duplicates;
|
||||
|
||||
SelectQueryOptions(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0)
|
||||
: to_stage(stage)
|
||||
, subquery_depth(depth)
|
||||
, only_analyze(false)
|
||||
, modify_inplace(false)
|
||||
, remove_duplicates(false)
|
||||
{}
|
||||
|
||||
SelectQueryOptions copy() const { return *this; }
|
||||
@ -58,6 +60,12 @@ struct SelectQueryOptions
|
||||
|
||||
SelectQueryOptions & noModify() { return modify(false); }
|
||||
|
||||
SelectQueryOptions & removeDuplicates(bool value = true)
|
||||
{
|
||||
remove_duplicates = value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
SelectQueryOptions & noSubquery()
|
||||
{
|
||||
subquery_depth = 0;
|
||||
|
@ -125,22 +125,36 @@ bool hasArrayJoin(const ASTPtr & ast)
|
||||
|
||||
/// Sometimes we have to calculate more columns in SELECT clause than will be returned from query.
|
||||
/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result.
|
||||
void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns)
|
||||
/// Also we have to remove duplicates in case of GLOBAL subqueries. Their results are placed into tables so duplicates are inpossible.
|
||||
void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups)
|
||||
{
|
||||
if (required_result_columns.empty())
|
||||
return;
|
||||
|
||||
ASTs & elements = select_query->select_expression_list->children;
|
||||
|
||||
std::map<String, size_t> required_columns_with_duplicate_count;
|
||||
|
||||
if (!required_result_columns.empty())
|
||||
{
|
||||
/// Some columns may be queried multiple times, like SELECT x, y, y FROM table.
|
||||
for (const auto & name : required_result_columns)
|
||||
{
|
||||
if (remove_dups)
|
||||
required_columns_with_duplicate_count[name] = 1;
|
||||
else
|
||||
++required_columns_with_duplicate_count[name];
|
||||
}
|
||||
}
|
||||
else if (remove_dups)
|
||||
{
|
||||
/// Even if we have no requirements there could be duplicates cause of asterisks. SELECT *, t.*
|
||||
for (const auto & elem : elements)
|
||||
required_columns_with_duplicate_count.emplace(elem->getAliasOrColumnName(), 1);
|
||||
}
|
||||
else
|
||||
return;
|
||||
|
||||
ASTs new_elements;
|
||||
new_elements.reserve(elements.size());
|
||||
|
||||
/// Some columns may be queried multiple times, like SELECT x, y, y FROM table.
|
||||
/// In that case we keep them exactly same number of times.
|
||||
std::map<String, size_t> required_columns_with_duplicate_count;
|
||||
for (const auto & name : required_result_columns)
|
||||
++required_columns_with_duplicate_count[name];
|
||||
|
||||
for (const auto & elem : elements)
|
||||
{
|
||||
String name = elem->getAliasOrColumnName();
|
||||
@ -688,7 +702,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
|
||||
/// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)
|
||||
/// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations.
|
||||
if (select_query)
|
||||
removeUnneededColumnsFromSelectClause(select_query, required_result_columns);
|
||||
removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates);
|
||||
|
||||
/// Executing scalar subqueries - replacing them with constant values.
|
||||
executeScalarSubqueries(query, context, subquery_depth);
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <Interpreters/AnalyzedJoin.h>
|
||||
#include <Interpreters/Aliases.h>
|
||||
#include <Interpreters/SelectQueryOptions.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -55,9 +56,10 @@ using SyntaxAnalyzerResultPtr = std::shared_ptr<const SyntaxAnalyzerResult>;
|
||||
class SyntaxAnalyzer
|
||||
{
|
||||
public:
|
||||
SyntaxAnalyzer(const Context & context_, size_t subquery_depth_ = 0)
|
||||
SyntaxAnalyzer(const Context & context_, const SelectQueryOptions & select_options = {})
|
||||
: context(context_)
|
||||
, subquery_depth(subquery_depth_)
|
||||
, subquery_depth(select_options.subquery_depth)
|
||||
, remove_duplicates(select_options.remove_duplicates)
|
||||
{}
|
||||
|
||||
SyntaxAnalyzerResultPtr analyze(
|
||||
@ -69,6 +71,7 @@ public:
|
||||
private:
|
||||
const Context & context;
|
||||
size_t subquery_depth;
|
||||
bool remove_duplicates;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -41,6 +41,8 @@ std::shared_ptr<InterpreterSelectWithUnionQuery> interpretSubquery(
|
||||
subquery_settings.extremes = 0;
|
||||
subquery_context.setSettings(subquery_settings);
|
||||
|
||||
auto subquery_options = SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth).subquery();
|
||||
|
||||
ASTPtr query;
|
||||
if (table || function)
|
||||
{
|
||||
@ -83,48 +85,10 @@ std::shared_ptr<InterpreterSelectWithUnionQuery> interpretSubquery(
|
||||
else
|
||||
{
|
||||
query = subquery->children.at(0);
|
||||
|
||||
/** Columns with the same name can be specified in a subquery. For example, SELECT x, x FROM t
|
||||
* This is bad, because the result of such a query can not be saved to the table, because the table can not have the same name columns.
|
||||
* Saving to the table is required for GLOBAL subqueries.
|
||||
*
|
||||
* To avoid this situation, we will rename the same columns.
|
||||
*/
|
||||
|
||||
std::set<std::string> all_column_names;
|
||||
std::set<std::string> assigned_column_names;
|
||||
|
||||
if (const auto * select_with_union = query->as<ASTSelectWithUnionQuery>())
|
||||
{
|
||||
if (const auto * select = select_with_union->list_of_selects->children.at(0)->as<ASTSelectQuery>())
|
||||
{
|
||||
for (auto & expr : select->select_expression_list->children)
|
||||
all_column_names.insert(expr->getAliasOrColumnName());
|
||||
|
||||
for (auto & expr : select->select_expression_list->children)
|
||||
{
|
||||
auto name = expr->getAliasOrColumnName();
|
||||
|
||||
if (!assigned_column_names.insert(name).second)
|
||||
{
|
||||
size_t i = 1;
|
||||
while (all_column_names.end() != all_column_names.find(name + "_" + toString(i)))
|
||||
++i;
|
||||
|
||||
name = name + "_" + toString(i);
|
||||
expr = expr->clone(); /// Cancels fuse of the same expressions in the tree.
|
||||
expr->setAlias(name);
|
||||
|
||||
all_column_names.insert(name);
|
||||
assigned_column_names.insert(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
subquery_options.removeDuplicates();
|
||||
}
|
||||
|
||||
return std::make_shared<InterpreterSelectWithUnionQuery>(
|
||||
query, subquery_context, SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth + 1), required_source_columns);
|
||||
return std::make_shared<InterpreterSelectWithUnionQuery>(query, subquery_context, subquery_options, required_source_columns);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,2 +1,7 @@
|
||||
1
|
||||
0
|
||||
0
|
||||
0 0
|
||||
0
|
||||
0 0
|
||||
0 0
|
||||
|
@ -35,49 +35,49 @@ GLOBAL INNER JOIN
|
||||
) USING dummy;
|
||||
|
||||
|
||||
-- SET asterisk_left_columns_only = 0;
|
||||
--
|
||||
-- SELECT * FROM remote('127.0.0.2', system.one)
|
||||
-- GLOBAL INNER JOIN
|
||||
-- (
|
||||
-- SELECT *, dummy
|
||||
-- FROM ( SELECT dummy FROM remote('127.0.0.2', system.one) ) t1
|
||||
-- GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2
|
||||
-- USING dummy
|
||||
-- ) USING dummy;
|
||||
--
|
||||
-- SELECT * FROM remote('127.0.0.2', system.one)
|
||||
-- GLOBAL INNER JOIN
|
||||
-- (
|
||||
-- SELECT *, t1.*, t2.*
|
||||
-- FROM ( SELECT toUInt8(1) AS dummy ) t1
|
||||
-- INNER JOIN ( SELECT toUInt8(1) AS dummy ) t2
|
||||
-- USING dummy
|
||||
-- ) USING dummy;
|
||||
--
|
||||
-- SELECT * FROM remote('127.0.0.2', system.one)
|
||||
-- GLOBAL INNER JOIN
|
||||
-- (
|
||||
-- SELECT *, dummy
|
||||
-- FROM ( SELECT toUInt8(1) AS dummy ) t1
|
||||
-- INNER JOIN ( SELECT toUInt8(1) AS dummy ) t2
|
||||
-- USING dummy
|
||||
-- ) USING dummy;
|
||||
--
|
||||
-- SELECT * FROM remote('127.0.0.2', system.one)
|
||||
-- GLOBAL INNER JOIN
|
||||
-- (
|
||||
-- SELECT *
|
||||
-- FROM ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t1
|
||||
-- GLOBAL INNER JOIN ( SELECT toUInt8(1) AS dummy ) t2
|
||||
-- USING dummy
|
||||
-- ) USING dummy;
|
||||
--
|
||||
-- SELECT * FROM remote('127.0.0.2', system.one)
|
||||
-- GLOBAL INNER JOIN
|
||||
-- (
|
||||
-- SELECT *
|
||||
-- FROM ( SELECT toUInt8(1) AS dummy ) t1
|
||||
-- GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2
|
||||
-- USING dummy
|
||||
-- ) USING dummy;
|
||||
SET asterisk_left_columns_only = 0;
|
||||
|
||||
SELECT * FROM remote('127.0.0.2', system.one)
|
||||
GLOBAL INNER JOIN
|
||||
(
|
||||
SELECT *, dummy
|
||||
FROM ( SELECT dummy FROM remote('127.0.0.2', system.one) ) t1
|
||||
GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2
|
||||
USING dummy
|
||||
) USING dummy;
|
||||
|
||||
SELECT * FROM remote('127.0.0.2', system.one)
|
||||
GLOBAL INNER JOIN
|
||||
(
|
||||
SELECT *, t1.*, t2.*
|
||||
FROM ( SELECT toUInt8(0) AS dummy ) t1
|
||||
INNER JOIN ( SELECT toUInt8(0) AS dummy ) t2
|
||||
USING dummy
|
||||
) USING dummy;
|
||||
|
||||
SELECT * FROM remote('127.0.0.2', system.one)
|
||||
GLOBAL INNER JOIN
|
||||
(
|
||||
SELECT *, dummy
|
||||
FROM ( SELECT toUInt8(0) AS dummy ) t1
|
||||
INNER JOIN ( SELECT toUInt8(0) AS dummy ) t2
|
||||
USING dummy
|
||||
) USING dummy;
|
||||
|
||||
SELECT * FROM remote('127.0.0.2', system.one)
|
||||
GLOBAL INNER JOIN
|
||||
(
|
||||
SELECT *, dummy as other
|
||||
FROM ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t1
|
||||
GLOBAL INNER JOIN ( SELECT toUInt8(0) AS dummy ) t2
|
||||
USING dummy
|
||||
) USING dummy;
|
||||
|
||||
SELECT * FROM remote('127.0.0.2', system.one)
|
||||
GLOBAL INNER JOIN
|
||||
(
|
||||
SELECT *, dummy, dummy as other
|
||||
FROM ( SELECT toUInt8(0) AS dummy ) t1
|
||||
GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2
|
||||
USING dummy
|
||||
) USING dummy;
|
||||
|
Loading…
Reference in New Issue
Block a user