mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge pull request #25634 from vdimir/join-materialized-columns
Support materialized and aliased columns in joins
This commit is contained in:
commit
7c17e2526d
@ -61,7 +61,7 @@ struct TableWithColumnNamesAndTypes
|
||||
names.insert(col.name);
|
||||
}
|
||||
|
||||
bool hasColumn(const String & name) const { return names.count(name); }
|
||||
bool hasColumn(const String & name) const { return names.contains(name); }
|
||||
|
||||
void addHiddenColumns(const NamesAndTypesList & addition)
|
||||
{
|
||||
@ -86,8 +86,6 @@ private:
|
||||
names.insert(col.name);
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
NameSet names;
|
||||
};
|
||||
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include <Parsers/DumpASTNode.h>
|
||||
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Columns/IColumn.h>
|
||||
|
||||
#include <Interpreters/ArrayJoinAction.h>
|
||||
@ -813,7 +812,8 @@ JoinPtr SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain
|
||||
}
|
||||
|
||||
ExpressionActionsChain::Step & step = chain.lastStep(columns_after_array_join);
|
||||
chain.steps.push_back(std::make_unique<ExpressionActionsChain::JoinStep>(syntax->analyzed_join, table_join, step.getResultColumns()));
|
||||
chain.steps.push_back(std::make_unique<ExpressionActionsChain::JoinStep>(
|
||||
syntax->analyzed_join, table_join, step.getResultColumns()));
|
||||
chain.addStep();
|
||||
return table_join;
|
||||
}
|
||||
@ -906,8 +906,8 @@ JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin(
|
||||
* in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`.
|
||||
* - this function shows the expression JOIN _data1.
|
||||
*/
|
||||
auto interpreter = interpretSubquery(join_element.table_expression, getContext(), original_right_columns, query_options);
|
||||
|
||||
auto interpreter = interpretSubquery(
|
||||
join_element.table_expression, getContext(), original_right_columns, query_options.copy().setWithAllColumns());
|
||||
{
|
||||
joined_plan = std::make_unique<QueryPlan>();
|
||||
interpreter->buildQueryPlan(*joined_plan);
|
||||
|
@ -1,6 +1,8 @@
|
||||
#include <Interpreters/IdentifierSemantic.h>
|
||||
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
#include <Interpreters/IdentifierSemantic.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/StorageID.h>
|
||||
|
||||
#include <Parsers/ASTFunction.h>
|
||||
@ -280,7 +282,10 @@ IdentifierMembershipCollector::IdentifierMembershipCollector(const ASTSelectQuer
|
||||
QueryAliasesNoSubqueriesVisitor(aliases).visit(with);
|
||||
QueryAliasesNoSubqueriesVisitor(aliases).visit(select.select());
|
||||
|
||||
tables = getDatabaseAndTablesWithColumns(getTableExpressions(select), context);
|
||||
const auto & settings = context->getSettingsRef();
|
||||
tables = getDatabaseAndTablesWithColumns(getTableExpressions(select), context,
|
||||
settings.asterisk_include_alias_columns,
|
||||
settings.asterisk_include_materialized_columns);
|
||||
}
|
||||
|
||||
std::optional<size_t> IdentifierMembershipCollector::getIdentsMembership(ASTPtr ast) const
|
||||
|
@ -30,7 +30,6 @@
|
||||
#include <Interpreters/JoinToSubqueryTransformVisitor.h>
|
||||
#include <Interpreters/CrossToInnerJoinVisitor.h>
|
||||
#include <Interpreters/TableJoin.h>
|
||||
#include <Interpreters/JoinSwitcher.h>
|
||||
#include <Interpreters/JoinedTables.h>
|
||||
#include <Interpreters/OpenTelemetrySpanLog.h>
|
||||
#include <Interpreters/QueryAliasesVisitor.h>
|
||||
@ -68,7 +67,6 @@
|
||||
#include <Processors/Transforms/AggregatingTransform.h>
|
||||
#include <Processors/Transforms/ExpressionTransform.h>
|
||||
#include <Processors/Transforms/FilterTransform.h>
|
||||
#include <Processors/Transforms/JoiningTransform.h>
|
||||
|
||||
#include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
|
||||
#include <Storages/IStorage.h>
|
||||
@ -313,7 +311,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
ApplyWithSubqueryVisitor().visit(query_ptr);
|
||||
}
|
||||
|
||||
JoinedTables joined_tables(getSubqueryContext(context), getSelectQuery());
|
||||
JoinedTables joined_tables(getSubqueryContext(context), getSelectQuery(), options.with_all_cols);
|
||||
|
||||
bool got_storage_from_query = false;
|
||||
if (!has_input && !storage)
|
||||
|
@ -161,9 +161,10 @@ using RenameQualifiedIdentifiersVisitor = InDepthNodeVisitor<RenameQualifiedIden
|
||||
|
||||
}
|
||||
|
||||
JoinedTables::JoinedTables(ContextPtr context_, const ASTSelectQuery & select_query)
|
||||
JoinedTables::JoinedTables(ContextPtr context_, const ASTSelectQuery & select_query, bool include_all_columns_)
|
||||
: context(context_)
|
||||
, table_expressions(getTableExpressions(select_query))
|
||||
, include_all_columns(include_all_columns_)
|
||||
, left_table_expression(extractTableExpression(select_query, 0))
|
||||
, left_db_and_table(getDatabaseAndTable(select_query, 0))
|
||||
{}
|
||||
@ -220,11 +221,13 @@ StoragePtr JoinedTables::getLeftTableStorage()
|
||||
|
||||
bool JoinedTables::resolveTables()
|
||||
{
|
||||
tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context);
|
||||
const auto & settings = context->getSettingsRef();
|
||||
bool include_alias_cols = include_all_columns || settings.asterisk_include_alias_columns;
|
||||
bool include_materialized_cols = include_all_columns || settings.asterisk_include_materialized_columns;
|
||||
tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context, include_alias_cols, include_materialized_cols);
|
||||
if (tables_with_columns.size() != table_expressions.size())
|
||||
throw Exception("Unexpected tables count", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
const auto & settings = context->getSettingsRef();
|
||||
if (settings.joined_subquery_requires_alias && tables_with_columns.size() > 1)
|
||||
{
|
||||
for (size_t i = 0; i < tables_with_columns.size(); ++i)
|
||||
@ -312,4 +315,11 @@ std::shared_ptr<TableJoin> JoinedTables::makeTableJoin(const ASTSelectQuery & se
|
||||
return table_join;
|
||||
}
|
||||
|
||||
void JoinedTables::reset(const ASTSelectQuery & select_query)
|
||||
{
|
||||
table_expressions = getTableExpressions(select_query);
|
||||
left_table_expression = extractTableExpression(select_query, 0);
|
||||
left_db_and_table = getDatabaseAndTable(select_query, 0);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -22,12 +22,9 @@ using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
|
||||
class JoinedTables
|
||||
{
|
||||
public:
|
||||
JoinedTables(ContextPtr context, const ASTSelectQuery & select_query);
|
||||
JoinedTables(ContextPtr context, const ASTSelectQuery & select_query, bool include_all_columns_ = false);
|
||||
|
||||
void reset(const ASTSelectQuery & select_query)
|
||||
{
|
||||
*this = JoinedTables(Context::createCopy(context), select_query);
|
||||
}
|
||||
void reset(const ASTSelectQuery & select_query);
|
||||
|
||||
StoragePtr getLeftTableStorage();
|
||||
bool resolveTables();
|
||||
@ -37,7 +34,6 @@ public:
|
||||
std::shared_ptr<TableJoin> makeTableJoin(const ASTSelectQuery & select_query);
|
||||
|
||||
const TablesWithColumns & tablesWithColumns() const { return tables_with_columns; }
|
||||
TablesWithColumns moveTablesWithColumns() { return std::move(tables_with_columns); }
|
||||
|
||||
bool isLeftTableSubquery() const;
|
||||
bool isLeftTableFunction() const;
|
||||
@ -51,6 +47,7 @@ private:
|
||||
ContextPtr context;
|
||||
std::vector<const ASTTableExpression *> table_expressions;
|
||||
TablesWithColumns tables_with_columns;
|
||||
const bool include_all_columns;
|
||||
|
||||
/// Legacy (duplicated left table values)
|
||||
ASTPtr left_table_expression;
|
||||
|
@ -42,11 +42,14 @@ struct SelectQueryOptions
|
||||
bool ignore_alias = false;
|
||||
bool is_internal = false;
|
||||
bool is_subquery = false; // non-subquery can also have subquery_depth > 0, e.g. insert select
|
||||
bool with_all_cols = false; /// asterisk include materialized and aliased columns
|
||||
|
||||
SelectQueryOptions(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0, bool is_subquery_ = false)
|
||||
SelectQueryOptions(
|
||||
QueryProcessingStage::Enum stage = QueryProcessingStage::Complete,
|
||||
size_t depth = 0,
|
||||
bool is_subquery_ = false)
|
||||
: to_stage(stage), subquery_depth(depth), is_subquery(is_subquery_)
|
||||
{
|
||||
}
|
||||
{}
|
||||
|
||||
SelectQueryOptions copy() const { return *this; }
|
||||
|
||||
@ -114,6 +117,12 @@ struct SelectQueryOptions
|
||||
is_internal = value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
SelectQueryOptions & setWithAllColumns(bool value = true)
|
||||
{
|
||||
with_all_cols = value;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,5 +1,4 @@
|
||||
#include <Core/Settings.h>
|
||||
#include <Core/Defines.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
|
||||
#include <Interpreters/TreeRewriter.h>
|
||||
@ -32,7 +31,6 @@
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/WriteBufferFromOStream.h>
|
||||
#include <Storages/IStorage.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
@ -510,14 +508,10 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul
|
||||
}
|
||||
|
||||
/// Find the columns that are obtained by JOIN.
|
||||
void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & select_query,
|
||||
void collectJoinedColumns(TableJoin & analyzed_join, const ASTTableJoin & table_join,
|
||||
const TablesWithColumns & tables, const Aliases & aliases)
|
||||
{
|
||||
const ASTTablesInSelectQueryElement * node = select_query.join();
|
||||
if (!node || tables.size() < 2)
|
||||
return;
|
||||
|
||||
const auto & table_join = node->table_join->as<ASTTableJoin &>();
|
||||
assert(tables.size() >= 2);
|
||||
|
||||
if (table_join.using_expression_list)
|
||||
{
|
||||
@ -896,9 +890,15 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
|
||||
|
||||
if (tables_with_columns.size() > 1)
|
||||
{
|
||||
result.analyzed_join->columns_from_joined_table = tables_with_columns[1].columns;
|
||||
const auto & right_table = tables_with_columns[1];
|
||||
auto & cols_from_joined = result.analyzed_join->columns_from_joined_table;
|
||||
cols_from_joined = right_table.columns;
|
||||
/// query can use materialized or aliased columns from right joined table,
|
||||
/// we want to request it for right table
|
||||
cols_from_joined.insert(cols_from_joined.end(), right_table.hidden_columns.begin(), right_table.hidden_columns.end());
|
||||
|
||||
result.analyzed_join->deduplicateAndQualifyColumnNames(
|
||||
source_columns_set, tables_with_columns[1].table.getQualifiedNamePrefix());
|
||||
source_columns_set, right_table.table.getQualifiedNamePrefix());
|
||||
}
|
||||
|
||||
translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns);
|
||||
@ -932,7 +932,16 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
|
||||
setJoinStrictness(
|
||||
*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, result.analyzed_join->table_join);
|
||||
|
||||
collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases);
|
||||
if (const auto * join_ast = select_query->join(); join_ast && tables_with_columns.size() >= 2)
|
||||
{
|
||||
auto & table_join_ast = join_ast->table_join->as<ASTTableJoin &>();
|
||||
if (table_join_ast.using_expression_list && result.metadata_snapshot)
|
||||
replaceAliasColumnsInQuery(table_join_ast.using_expression_list, result.metadata_snapshot->getColumns(), result.array_join_result_to_source, getContext());
|
||||
if (table_join_ast.on_expression && result.metadata_snapshot)
|
||||
replaceAliasColumnsInQuery(table_join_ast.on_expression, result.metadata_snapshot->getColumns(), result.array_join_result_to_source, getContext());
|
||||
|
||||
collectJoinedColumns(*result.analyzed_join, table_join_ast, tables_with_columns, result.aliases);
|
||||
}
|
||||
|
||||
result.aggregates = getAggregates(query, *select_query);
|
||||
result.window_function_asts = getWindowFunctions(query, *select_query);
|
||||
|
@ -113,50 +113,42 @@ static NamesAndTypesList getColumnsFromTableExpression(
|
||||
return names_and_type_list;
|
||||
}
|
||||
|
||||
NamesAndTypesList getColumnsFromTableExpression(const ASTTableExpression & table_expression, ContextPtr context)
|
||||
{
|
||||
NamesAndTypesList materialized;
|
||||
NamesAndTypesList aliases;
|
||||
NamesAndTypesList virtuals;
|
||||
return getColumnsFromTableExpression(table_expression, context, materialized, aliases, virtuals);
|
||||
}
|
||||
|
||||
TablesWithColumns getDatabaseAndTablesWithColumns(const std::vector<const ASTTableExpression *> & table_expressions, ContextPtr context)
|
||||
TablesWithColumns getDatabaseAndTablesWithColumns(
|
||||
const ASTTableExprConstPtrs & table_expressions,
|
||||
ContextPtr context,
|
||||
bool include_alias_cols,
|
||||
bool include_materialized_cols)
|
||||
{
|
||||
TablesWithColumns tables_with_columns;
|
||||
|
||||
if (!table_expressions.empty())
|
||||
String current_database = context->getCurrentDatabase();
|
||||
|
||||
for (const ASTTableExpression * table_expression : table_expressions)
|
||||
{
|
||||
String current_database = context->getCurrentDatabase();
|
||||
bool include_alias_cols = context->getSettingsRef().asterisk_include_alias_columns;
|
||||
bool include_materialized_cols = context->getSettingsRef().asterisk_include_materialized_columns;
|
||||
NamesAndTypesList materialized;
|
||||
NamesAndTypesList aliases;
|
||||
NamesAndTypesList virtuals;
|
||||
NamesAndTypesList names_and_types = getColumnsFromTableExpression(
|
||||
*table_expression, context, materialized, aliases, virtuals);
|
||||
|
||||
for (const ASTTableExpression * table_expression : table_expressions)
|
||||
removeDuplicateColumns(names_and_types);
|
||||
|
||||
tables_with_columns.emplace_back(
|
||||
DatabaseAndTableWithAlias(*table_expression, current_database), names_and_types);
|
||||
|
||||
auto & table = tables_with_columns.back();
|
||||
table.addHiddenColumns(materialized);
|
||||
table.addHiddenColumns(aliases);
|
||||
table.addHiddenColumns(virtuals);
|
||||
|
||||
if (include_alias_cols)
|
||||
{
|
||||
NamesAndTypesList materialized;
|
||||
NamesAndTypesList aliases;
|
||||
NamesAndTypesList virtuals;
|
||||
NamesAndTypesList names_and_types = getColumnsFromTableExpression(*table_expression, context, materialized, aliases, virtuals);
|
||||
table.addAliasColumns(aliases);
|
||||
}
|
||||
|
||||
removeDuplicateColumns(names_and_types);
|
||||
|
||||
tables_with_columns.emplace_back(
|
||||
DatabaseAndTableWithAlias(*table_expression, current_database), names_and_types);
|
||||
|
||||
auto & table = tables_with_columns.back();
|
||||
table.addHiddenColumns(materialized);
|
||||
table.addHiddenColumns(aliases);
|
||||
table.addHiddenColumns(virtuals);
|
||||
|
||||
if (include_alias_cols)
|
||||
{
|
||||
table.addAliasColumns(aliases);
|
||||
}
|
||||
|
||||
if (include_materialized_cols)
|
||||
{
|
||||
table.addMaterializedColumns(materialized);
|
||||
}
|
||||
if (include_materialized_cols)
|
||||
{
|
||||
table.addMaterializedColumns(materialized);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -10,13 +10,17 @@ namespace DB
|
||||
struct ASTTableExpression;
|
||||
class ASTSelectQuery;
|
||||
|
||||
using ASTTableExprConstPtrs = std::vector<const ASTTableExpression *>;
|
||||
|
||||
NameSet removeDuplicateColumns(NamesAndTypesList & columns);
|
||||
|
||||
std::vector<const ASTTableExpression *> getTableExpressions(const ASTSelectQuery & select_query);
|
||||
ASTTableExprConstPtrs getTableExpressions(const ASTSelectQuery & select_query);
|
||||
|
||||
const ASTTableExpression * getTableExpression(const ASTSelectQuery & select, size_t table_number);
|
||||
|
||||
ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number);
|
||||
|
||||
NamesAndTypesList getColumnsFromTableExpression(const ASTTableExpression & table_expression, ContextPtr context);
|
||||
TablesWithColumns getDatabaseAndTablesWithColumns(const std::vector<const ASTTableExpression *> & table_expressions, ContextPtr context);
|
||||
TablesWithColumns getDatabaseAndTablesWithColumns(
|
||||
const ASTTableExprConstPtrs & table_expressions, ContextPtr context, bool include_alias_cols, bool include_materialized_cols);
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,24 @@
|
||||
2020-02-02 13:00:00 fact2 t1_val2 2020-02-05 13:00:00 fact2 t1_val2
|
||||
-
|
||||
2020-02-02 13:00:00 fact2 t1_val2 2020-02-02 2020-02-05 13:00:00 fact2 t1_val2 2020-02-05
|
||||
-
|
||||
2020-01-01 2020-01-01
|
||||
2020-02-02 2020-02-05
|
||||
-
|
||||
2020-01-01 12:00:00 fact1 t1_val1 2020-01-01 12:00:00 fact1 t2_val2
|
||||
2020-01-01 13:00:00 fact3 t1_val3 2020-01-01 12:00:00 fact1 t2_val2
|
||||
-
|
||||
2020-01-01 12:00:00 fact1 t1_val1 2020-01-01 12:00:00 fact1 t2_val2
|
||||
2020-01-01 13:00:00 fact3 t1_val3 2020-01-01 12:00:00 fact1 t2_val2
|
||||
-
|
||||
2020-01-01 12:00:00 fact1 t1_val1 2019-01-01 12:00:00 fact4 t2_val2
|
||||
2020-01-01 12:00:00 fact1 t1_val1 2020-01-01 12:00:00 fact1 t2_val2
|
||||
2020-01-01 13:00:00 fact3 t1_val3 2019-01-01 12:00:00 fact4 t2_val2
|
||||
2020-01-01 13:00:00 fact3 t1_val3 2020-01-01 12:00:00 fact1 t2_val2
|
||||
-
|
||||
2020-02-02 13:00:00 fact2 t1_val2 2020-02-05 13:00:00 fact2 t1_val2
|
||||
-
|
||||
fact1t1_val1 fact1t2_val2
|
||||
fact2t1_val2 fact2t1_val2
|
||||
-
|
||||
2020-02-02 13:00:00 2020-02-05 13:00:00
|
@ -0,0 +1,37 @@
|
||||
DROP TABLE IF EXISTS t1;
|
||||
DROP TABLE IF EXISTS t2;
|
||||
|
||||
CREATE TABLE t1 (
|
||||
time DateTime, foo String, dimension_1 String,
|
||||
dt Date MATERIALIZED toDate(time),
|
||||
dt1 Date MATERIALIZED toDayOfYear(time),
|
||||
aliascol1 ALIAS foo || dimension_1
|
||||
) ENGINE = MergeTree() PARTITION BY toYYYYMM(dt) ORDER BY (dt, foo);
|
||||
|
||||
CREATE TABLE t2 (
|
||||
time DateTime, bar String, dimension_2 String,
|
||||
dt Date MATERIALIZED toDate(time),
|
||||
dt2 Date MATERIALIZED toDayOfYear(time),
|
||||
aliascol2 ALIAS bar || dimension_2
|
||||
) ENGINE = MergeTree() PARTITION BY toYYYYMM(dt) ORDER BY (dt, bar);
|
||||
|
||||
INSERT INTO t1 VALUES ('2020-01-01 12:00:00', 'fact1', 't1_val1'), ('2020-02-02 13:00:00', 'fact2', 't1_val2'), ('2020-01-01 13:00:00', 'fact3', 't1_val3');
|
||||
INSERT INTO t2 VALUES ('2020-01-01 12:00:00', 'fact1', 't2_val2'), ('2020-02-05 13:00:00', 'fact2', 't1_val2'), ('2019-01-01 12:00:00', 'fact4', 't2_val2');
|
||||
|
||||
SELECT * FROM t1 JOIN t2 ON t1.foo = t2.bar WHERE t2.dt >= '2020-02-01';
|
||||
SELECT '-';
|
||||
SELECT t1.*, t1.dt, t2.*, t2.dt FROM t1 JOIN t2 ON t1.foo = t2.bar WHERE t2.dt >= '2020-02-01';
|
||||
SELECT '-';
|
||||
SELECT t1.dt, t2.dt FROM t1 JOIN t2 ON t1.foo = t2.bar ORDER BY t1.dt;
|
||||
SELECT '-';
|
||||
SELECT * FROM t1 ALL JOIN t2 ON t1.dt = t2.dt ORDER BY t1.time, t2.time;
|
||||
SELECT '-';
|
||||
SELECT * FROM t1 ALL JOIN t2 USING (dt) ORDER BY t1.time, t2.time;
|
||||
SELECT '-';
|
||||
SELECT * FROM t1 JOIN t2 ON t1.dt1 = t2.dt2 ORDER BY t1.time, t2.time;
|
||||
SELECT '-';
|
||||
SELECT * FROM t1 JOIN t2 ON t1.foo = t2.bar WHERE t2.aliascol2 == 'fact2t1_val2';
|
||||
SELECT '-';
|
||||
SELECT t1.aliascol1, t2.aliascol2 FROM t1 JOIN t2 ON t1.foo = t2.bar ORDER BY t1.time, t2.time;
|
||||
SELECT '-';
|
||||
SELECT t1.time, t2.time FROM t1 JOIN t2 ON t1.aliascol1 = t2.aliascol2 ORDER BY t1.time, t2.time;
|
Loading…
Reference in New Issue
Block a user