Merge pull request #25634 from vdimir/join-materialized-columns

Support materialized and aliased columns in joins
This commit is contained in:
alexey-milovidov 2021-07-09 03:18:49 +03:00 committed by GitHub
commit 7c17e2526d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 158 additions and 75 deletions

View File

@ -61,7 +61,7 @@ struct TableWithColumnNamesAndTypes
names.insert(col.name); names.insert(col.name);
} }
bool hasColumn(const String & name) const { return names.count(name); } bool hasColumn(const String & name) const { return names.contains(name); }
void addHiddenColumns(const NamesAndTypesList & addition) void addHiddenColumns(const NamesAndTypesList & addition)
{ {
@ -86,8 +86,6 @@ private:
names.insert(col.name); names.insert(col.name);
} }
private:
NameSet names; NameSet names;
}; };

View File

@ -11,7 +11,6 @@
#include <Parsers/DumpASTNode.h> #include <Parsers/DumpASTNode.h>
#include <DataTypes/DataTypeNullable.h> #include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Interpreters/ArrayJoinAction.h> #include <Interpreters/ArrayJoinAction.h>
@ -813,7 +812,8 @@ JoinPtr SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain
} }
ExpressionActionsChain::Step & step = chain.lastStep(columns_after_array_join); ExpressionActionsChain::Step & step = chain.lastStep(columns_after_array_join);
chain.steps.push_back(std::make_unique<ExpressionActionsChain::JoinStep>(syntax->analyzed_join, table_join, step.getResultColumns())); chain.steps.push_back(std::make_unique<ExpressionActionsChain::JoinStep>(
syntax->analyzed_join, table_join, step.getResultColumns()));
chain.addStep(); chain.addStep();
return table_join; return table_join;
} }
@ -906,8 +906,8 @@ JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin(
* in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`. * in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`.
* - this function shows the expression JOIN _data1. * - this function shows the expression JOIN _data1.
*/ */
auto interpreter = interpretSubquery(join_element.table_expression, getContext(), original_right_columns, query_options); auto interpreter = interpretSubquery(
join_element.table_expression, getContext(), original_right_columns, query_options.copy().setWithAllColumns());
{ {
joined_plan = std::make_unique<QueryPlan>(); joined_plan = std::make_unique<QueryPlan>();
interpreter->buildQueryPlan(*joined_plan); interpreter->buildQueryPlan(*joined_plan);

View File

@ -1,6 +1,8 @@
#include <Interpreters/IdentifierSemantic.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <Interpreters/IdentifierSemantic.h> #include <Interpreters/Context.h>
#include <Interpreters/StorageID.h> #include <Interpreters/StorageID.h>
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>
@ -280,7 +282,10 @@ IdentifierMembershipCollector::IdentifierMembershipCollector(const ASTSelectQuer
QueryAliasesNoSubqueriesVisitor(aliases).visit(with); QueryAliasesNoSubqueriesVisitor(aliases).visit(with);
QueryAliasesNoSubqueriesVisitor(aliases).visit(select.select()); QueryAliasesNoSubqueriesVisitor(aliases).visit(select.select());
tables = getDatabaseAndTablesWithColumns(getTableExpressions(select), context); const auto & settings = context->getSettingsRef();
tables = getDatabaseAndTablesWithColumns(getTableExpressions(select), context,
settings.asterisk_include_alias_columns,
settings.asterisk_include_materialized_columns);
} }
std::optional<size_t> IdentifierMembershipCollector::getIdentsMembership(ASTPtr ast) const std::optional<size_t> IdentifierMembershipCollector::getIdentsMembership(ASTPtr ast) const

View File

@ -30,7 +30,6 @@
#include <Interpreters/JoinToSubqueryTransformVisitor.h> #include <Interpreters/JoinToSubqueryTransformVisitor.h>
#include <Interpreters/CrossToInnerJoinVisitor.h> #include <Interpreters/CrossToInnerJoinVisitor.h>
#include <Interpreters/TableJoin.h> #include <Interpreters/TableJoin.h>
#include <Interpreters/JoinSwitcher.h>
#include <Interpreters/JoinedTables.h> #include <Interpreters/JoinedTables.h>
#include <Interpreters/OpenTelemetrySpanLog.h> #include <Interpreters/OpenTelemetrySpanLog.h>
#include <Interpreters/QueryAliasesVisitor.h> #include <Interpreters/QueryAliasesVisitor.h>
@ -68,7 +67,6 @@
#include <Processors/Transforms/AggregatingTransform.h> #include <Processors/Transforms/AggregatingTransform.h>
#include <Processors/Transforms/ExpressionTransform.h> #include <Processors/Transforms/ExpressionTransform.h>
#include <Processors/Transforms/FilterTransform.h> #include <Processors/Transforms/FilterTransform.h>
#include <Processors/Transforms/JoiningTransform.h>
#include <Storages/MergeTree/MergeTreeWhereOptimizer.h> #include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
#include <Storages/IStorage.h> #include <Storages/IStorage.h>
@ -313,7 +311,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
ApplyWithSubqueryVisitor().visit(query_ptr); ApplyWithSubqueryVisitor().visit(query_ptr);
} }
JoinedTables joined_tables(getSubqueryContext(context), getSelectQuery()); JoinedTables joined_tables(getSubqueryContext(context), getSelectQuery(), options.with_all_cols);
bool got_storage_from_query = false; bool got_storage_from_query = false;
if (!has_input && !storage) if (!has_input && !storage)

View File

@ -161,9 +161,10 @@ using RenameQualifiedIdentifiersVisitor = InDepthNodeVisitor<RenameQualifiedIden
} }
JoinedTables::JoinedTables(ContextPtr context_, const ASTSelectQuery & select_query) JoinedTables::JoinedTables(ContextPtr context_, const ASTSelectQuery & select_query, bool include_all_columns_)
: context(context_) : context(context_)
, table_expressions(getTableExpressions(select_query)) , table_expressions(getTableExpressions(select_query))
, include_all_columns(include_all_columns_)
, left_table_expression(extractTableExpression(select_query, 0)) , left_table_expression(extractTableExpression(select_query, 0))
, left_db_and_table(getDatabaseAndTable(select_query, 0)) , left_db_and_table(getDatabaseAndTable(select_query, 0))
{} {}
@ -220,11 +221,13 @@ StoragePtr JoinedTables::getLeftTableStorage()
bool JoinedTables::resolveTables() bool JoinedTables::resolveTables()
{ {
tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context); const auto & settings = context->getSettingsRef();
bool include_alias_cols = include_all_columns || settings.asterisk_include_alias_columns;
bool include_materialized_cols = include_all_columns || settings.asterisk_include_materialized_columns;
tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context, include_alias_cols, include_materialized_cols);
if (tables_with_columns.size() != table_expressions.size()) if (tables_with_columns.size() != table_expressions.size())
throw Exception("Unexpected tables count", ErrorCodes::LOGICAL_ERROR); throw Exception("Unexpected tables count", ErrorCodes::LOGICAL_ERROR);
const auto & settings = context->getSettingsRef();
if (settings.joined_subquery_requires_alias && tables_with_columns.size() > 1) if (settings.joined_subquery_requires_alias && tables_with_columns.size() > 1)
{ {
for (size_t i = 0; i < tables_with_columns.size(); ++i) for (size_t i = 0; i < tables_with_columns.size(); ++i)
@ -312,4 +315,11 @@ std::shared_ptr<TableJoin> JoinedTables::makeTableJoin(const ASTSelectQuery & se
return table_join; return table_join;
} }
void JoinedTables::reset(const ASTSelectQuery & select_query)
{
table_expressions = getTableExpressions(select_query);
left_table_expression = extractTableExpression(select_query, 0);
left_db_and_table = getDatabaseAndTable(select_query, 0);
}
} }

View File

@ -22,12 +22,9 @@ using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
class JoinedTables class JoinedTables
{ {
public: public:
JoinedTables(ContextPtr context, const ASTSelectQuery & select_query); JoinedTables(ContextPtr context, const ASTSelectQuery & select_query, bool include_all_columns_ = false);
void reset(const ASTSelectQuery & select_query) void reset(const ASTSelectQuery & select_query);
{
*this = JoinedTables(Context::createCopy(context), select_query);
}
StoragePtr getLeftTableStorage(); StoragePtr getLeftTableStorage();
bool resolveTables(); bool resolveTables();
@ -37,7 +34,6 @@ public:
std::shared_ptr<TableJoin> makeTableJoin(const ASTSelectQuery & select_query); std::shared_ptr<TableJoin> makeTableJoin(const ASTSelectQuery & select_query);
const TablesWithColumns & tablesWithColumns() const { return tables_with_columns; } const TablesWithColumns & tablesWithColumns() const { return tables_with_columns; }
TablesWithColumns moveTablesWithColumns() { return std::move(tables_with_columns); }
bool isLeftTableSubquery() const; bool isLeftTableSubquery() const;
bool isLeftTableFunction() const; bool isLeftTableFunction() const;
@ -51,6 +47,7 @@ private:
ContextPtr context; ContextPtr context;
std::vector<const ASTTableExpression *> table_expressions; std::vector<const ASTTableExpression *> table_expressions;
TablesWithColumns tables_with_columns; TablesWithColumns tables_with_columns;
const bool include_all_columns;
/// Legacy (duplicated left table values) /// Legacy (duplicated left table values)
ASTPtr left_table_expression; ASTPtr left_table_expression;

View File

@ -42,11 +42,14 @@ struct SelectQueryOptions
bool ignore_alias = false; bool ignore_alias = false;
bool is_internal = false; bool is_internal = false;
bool is_subquery = false; // non-subquery can also have subquery_depth > 0, e.g. insert select bool is_subquery = false; // non-subquery can also have subquery_depth > 0, e.g. insert select
bool with_all_cols = false; /// asterisk include materialized and aliased columns
SelectQueryOptions(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0, bool is_subquery_ = false) SelectQueryOptions(
QueryProcessingStage::Enum stage = QueryProcessingStage::Complete,
size_t depth = 0,
bool is_subquery_ = false)
: to_stage(stage), subquery_depth(depth), is_subquery(is_subquery_) : to_stage(stage), subquery_depth(depth), is_subquery(is_subquery_)
{ {}
}
SelectQueryOptions copy() const { return *this; } SelectQueryOptions copy() const { return *this; }
@ -114,6 +117,12 @@ struct SelectQueryOptions
is_internal = value; is_internal = value;
return *this; return *this;
} }
SelectQueryOptions & setWithAllColumns(bool value = true)
{
with_all_cols = value;
return *this;
}
}; };
} }

View File

@ -1,5 +1,4 @@
#include <Core/Settings.h> #include <Core/Settings.h>
#include <Core/Defines.h>
#include <Core/NamesAndTypes.h> #include <Core/NamesAndTypes.h>
#include <Interpreters/TreeRewriter.h> #include <Interpreters/TreeRewriter.h>
@ -32,7 +31,6 @@
#include <DataTypes/DataTypeNullable.h> #include <DataTypes/DataTypeNullable.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <IO/WriteBufferFromOStream.h>
#include <Storages/IStorage.h> #include <Storages/IStorage.h>
#include <AggregateFunctions/AggregateFunctionFactory.h> #include <AggregateFunctions/AggregateFunctionFactory.h>
@ -510,14 +508,10 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul
} }
/// Find the columns that are obtained by JOIN. /// Find the columns that are obtained by JOIN.
void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & select_query, void collectJoinedColumns(TableJoin & analyzed_join, const ASTTableJoin & table_join,
const TablesWithColumns & tables, const Aliases & aliases) const TablesWithColumns & tables, const Aliases & aliases)
{ {
const ASTTablesInSelectQueryElement * node = select_query.join(); assert(tables.size() >= 2);
if (!node || tables.size() < 2)
return;
const auto & table_join = node->table_join->as<ASTTableJoin &>();
if (table_join.using_expression_list) if (table_join.using_expression_list)
{ {
@ -896,9 +890,15 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
if (tables_with_columns.size() > 1) if (tables_with_columns.size() > 1)
{ {
result.analyzed_join->columns_from_joined_table = tables_with_columns[1].columns; const auto & right_table = tables_with_columns[1];
auto & cols_from_joined = result.analyzed_join->columns_from_joined_table;
cols_from_joined = right_table.columns;
/// query can use materialized or aliased columns from right joined table,
/// we want to request it for right table
cols_from_joined.insert(cols_from_joined.end(), right_table.hidden_columns.begin(), right_table.hidden_columns.end());
result.analyzed_join->deduplicateAndQualifyColumnNames( result.analyzed_join->deduplicateAndQualifyColumnNames(
source_columns_set, tables_with_columns[1].table.getQualifiedNamePrefix()); source_columns_set, right_table.table.getQualifiedNamePrefix());
} }
translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns); translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns);
@ -932,7 +932,16 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
setJoinStrictness( setJoinStrictness(
*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, result.analyzed_join->table_join); *select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, result.analyzed_join->table_join);
collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases); if (const auto * join_ast = select_query->join(); join_ast && tables_with_columns.size() >= 2)
{
auto & table_join_ast = join_ast->table_join->as<ASTTableJoin &>();
if (table_join_ast.using_expression_list && result.metadata_snapshot)
replaceAliasColumnsInQuery(table_join_ast.using_expression_list, result.metadata_snapshot->getColumns(), result.array_join_result_to_source, getContext());
if (table_join_ast.on_expression && result.metadata_snapshot)
replaceAliasColumnsInQuery(table_join_ast.on_expression, result.metadata_snapshot->getColumns(), result.array_join_result_to_source, getContext());
collectJoinedColumns(*result.analyzed_join, table_join_ast, tables_with_columns, result.aliases);
}
result.aggregates = getAggregates(query, *select_query); result.aggregates = getAggregates(query, *select_query);
result.window_function_asts = getWindowFunctions(query, *select_query); result.window_function_asts = getWindowFunctions(query, *select_query);

View File

@ -113,50 +113,42 @@ static NamesAndTypesList getColumnsFromTableExpression(
return names_and_type_list; return names_and_type_list;
} }
NamesAndTypesList getColumnsFromTableExpression(const ASTTableExpression & table_expression, ContextPtr context) TablesWithColumns getDatabaseAndTablesWithColumns(
{ const ASTTableExprConstPtrs & table_expressions,
NamesAndTypesList materialized; ContextPtr context,
NamesAndTypesList aliases; bool include_alias_cols,
NamesAndTypesList virtuals; bool include_materialized_cols)
return getColumnsFromTableExpression(table_expression, context, materialized, aliases, virtuals);
}
TablesWithColumns getDatabaseAndTablesWithColumns(const std::vector<const ASTTableExpression *> & table_expressions, ContextPtr context)
{ {
TablesWithColumns tables_with_columns; TablesWithColumns tables_with_columns;
if (!table_expressions.empty()) String current_database = context->getCurrentDatabase();
for (const ASTTableExpression * table_expression : table_expressions)
{ {
String current_database = context->getCurrentDatabase(); NamesAndTypesList materialized;
bool include_alias_cols = context->getSettingsRef().asterisk_include_alias_columns; NamesAndTypesList aliases;
bool include_materialized_cols = context->getSettingsRef().asterisk_include_materialized_columns; NamesAndTypesList virtuals;
NamesAndTypesList names_and_types = getColumnsFromTableExpression(
*table_expression, context, materialized, aliases, virtuals);
for (const ASTTableExpression * table_expression : table_expressions) removeDuplicateColumns(names_and_types);
tables_with_columns.emplace_back(
DatabaseAndTableWithAlias(*table_expression, current_database), names_and_types);
auto & table = tables_with_columns.back();
table.addHiddenColumns(materialized);
table.addHiddenColumns(aliases);
table.addHiddenColumns(virtuals);
if (include_alias_cols)
{ {
NamesAndTypesList materialized; table.addAliasColumns(aliases);
NamesAndTypesList aliases; }
NamesAndTypesList virtuals;
NamesAndTypesList names_and_types = getColumnsFromTableExpression(*table_expression, context, materialized, aliases, virtuals);
removeDuplicateColumns(names_and_types); if (include_materialized_cols)
{
tables_with_columns.emplace_back( table.addMaterializedColumns(materialized);
DatabaseAndTableWithAlias(*table_expression, current_database), names_and_types);
auto & table = tables_with_columns.back();
table.addHiddenColumns(materialized);
table.addHiddenColumns(aliases);
table.addHiddenColumns(virtuals);
if (include_alias_cols)
{
table.addAliasColumns(aliases);
}
if (include_materialized_cols)
{
table.addMaterializedColumns(materialized);
}
} }
} }

View File

@ -10,13 +10,17 @@ namespace DB
struct ASTTableExpression; struct ASTTableExpression;
class ASTSelectQuery; class ASTSelectQuery;
using ASTTableExprConstPtrs = std::vector<const ASTTableExpression *>;
NameSet removeDuplicateColumns(NamesAndTypesList & columns); NameSet removeDuplicateColumns(NamesAndTypesList & columns);
std::vector<const ASTTableExpression *> getTableExpressions(const ASTSelectQuery & select_query); ASTTableExprConstPtrs getTableExpressions(const ASTSelectQuery & select_query);
const ASTTableExpression * getTableExpression(const ASTSelectQuery & select, size_t table_number); const ASTTableExpression * getTableExpression(const ASTSelectQuery & select, size_t table_number);
ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number); ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number);
NamesAndTypesList getColumnsFromTableExpression(const ASTTableExpression & table_expression, ContextPtr context); TablesWithColumns getDatabaseAndTablesWithColumns(
TablesWithColumns getDatabaseAndTablesWithColumns(const std::vector<const ASTTableExpression *> & table_expressions, ContextPtr context); const ASTTableExprConstPtrs & table_expressions, ContextPtr context, bool include_alias_cols, bool include_materialized_cols);
} }

View File

@ -0,0 +1,24 @@
2020-02-02 13:00:00 fact2 t1_val2 2020-02-05 13:00:00 fact2 t1_val2
-
2020-02-02 13:00:00 fact2 t1_val2 2020-02-02 2020-02-05 13:00:00 fact2 t1_val2 2020-02-05
-
2020-01-01 2020-01-01
2020-02-02 2020-02-05
-
2020-01-01 12:00:00 fact1 t1_val1 2020-01-01 12:00:00 fact1 t2_val2
2020-01-01 13:00:00 fact3 t1_val3 2020-01-01 12:00:00 fact1 t2_val2
-
2020-01-01 12:00:00 fact1 t1_val1 2020-01-01 12:00:00 fact1 t2_val2
2020-01-01 13:00:00 fact3 t1_val3 2020-01-01 12:00:00 fact1 t2_val2
-
2020-01-01 12:00:00 fact1 t1_val1 2019-01-01 12:00:00 fact4 t2_val2
2020-01-01 12:00:00 fact1 t1_val1 2020-01-01 12:00:00 fact1 t2_val2
2020-01-01 13:00:00 fact3 t1_val3 2019-01-01 12:00:00 fact4 t2_val2
2020-01-01 13:00:00 fact3 t1_val3 2020-01-01 12:00:00 fact1 t2_val2
-
2020-02-02 13:00:00 fact2 t1_val2 2020-02-05 13:00:00 fact2 t1_val2
-
fact1t1_val1 fact1t2_val2
fact2t1_val2 fact2t1_val2
-
2020-02-02 13:00:00 2020-02-05 13:00:00

View File

@ -0,0 +1,37 @@
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
CREATE TABLE t1 (
time DateTime, foo String, dimension_1 String,
dt Date MATERIALIZED toDate(time),
dt1 Date MATERIALIZED toDayOfYear(time),
aliascol1 ALIAS foo || dimension_1
) ENGINE = MergeTree() PARTITION BY toYYYYMM(dt) ORDER BY (dt, foo);
CREATE TABLE t2 (
time DateTime, bar String, dimension_2 String,
dt Date MATERIALIZED toDate(time),
dt2 Date MATERIALIZED toDayOfYear(time),
aliascol2 ALIAS bar || dimension_2
) ENGINE = MergeTree() PARTITION BY toYYYYMM(dt) ORDER BY (dt, bar);
INSERT INTO t1 VALUES ('2020-01-01 12:00:00', 'fact1', 't1_val1'), ('2020-02-02 13:00:00', 'fact2', 't1_val2'), ('2020-01-01 13:00:00', 'fact3', 't1_val3');
INSERT INTO t2 VALUES ('2020-01-01 12:00:00', 'fact1', 't2_val2'), ('2020-02-05 13:00:00', 'fact2', 't1_val2'), ('2019-01-01 12:00:00', 'fact4', 't2_val2');
SELECT * FROM t1 JOIN t2 ON t1.foo = t2.bar WHERE t2.dt >= '2020-02-01';
SELECT '-';
SELECT t1.*, t1.dt, t2.*, t2.dt FROM t1 JOIN t2 ON t1.foo = t2.bar WHERE t2.dt >= '2020-02-01';
SELECT '-';
SELECT t1.dt, t2.dt FROM t1 JOIN t2 ON t1.foo = t2.bar ORDER BY t1.dt;
SELECT '-';
SELECT * FROM t1 ALL JOIN t2 ON t1.dt = t2.dt ORDER BY t1.time, t2.time;
SELECT '-';
SELECT * FROM t1 ALL JOIN t2 USING (dt) ORDER BY t1.time, t2.time;
SELECT '-';
SELECT * FROM t1 JOIN t2 ON t1.dt1 = t2.dt2 ORDER BY t1.time, t2.time;
SELECT '-';
SELECT * FROM t1 JOIN t2 ON t1.foo = t2.bar WHERE t2.aliascol2 == 'fact2t1_val2';
SELECT '-';
SELECT t1.aliascol1, t2.aliascol2 FROM t1 JOIN t2 ON t1.foo = t2.bar ORDER BY t1.time, t2.time;
SELECT '-';
SELECT t1.time, t2.time FROM t1 JOIN t2 ON t1.aliascol1 = t2.aliascol2 ORDER BY t1.time, t2.time;