some more refactoring

This commit is contained in:
Artem Zuikov 2020-03-03 17:25:45 +03:00
parent 7189296e88
commit 5829b51c6b
5 changed files with 51 additions and 72 deletions

View File

@ -151,7 +151,7 @@ String InterpreterSelectQuery::generateFilterActions(ExpressionActionsPtr & acti
table_expr->children.push_back(table_expr->database_and_table_name);
/// Using separate expression analyzer to prevent any possible alias injection
auto syntax_result = SyntaxAnalyzer(*context).analyzeSelect(query_ast, storage->getColumns().getAll());
auto syntax_result = SyntaxAnalyzer(*context).analyzeSelect(query_ast, SyntaxAnalyzerResult({}, storage));
SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, *context);
actions = analyzer.simpleSelectActions();
@ -266,7 +266,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
{
/// Read from subquery.
interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
left_table_expression, getSubqueryContext(*context), options.subquery(), required_columns);
left_table_expression, getSubqueryContext(*context), options.subquery());
source_header = interpreter_subquery->getSampleBlock();
}
@ -295,7 +295,8 @@ InterpreterSelectQuery::InterpreterSelectQuery(
auto analyze = [&] (bool try_move_to_prewhere = true)
{
syntax_analyzer_result = SyntaxAnalyzer(*context).analyzeSelect(
query_ptr, source_header.getNamesAndTypesList(), storage, options, joined_tables, required_result_column_names);
query_ptr, SyntaxAnalyzerResult(source_header.getNamesAndTypesList(), storage),
options, joined_tables.tablesWithColumns(), required_result_column_names);
/// Save scalar sub queries's results in the query context
if (context->hasQueryContext())
@ -333,8 +334,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
left_table_expression,
getSubqueryContext(*context),
options.subquery(),
required_columns);
options.subquery());
}
}

View File

@ -88,14 +88,6 @@ StoragePtr JoinedTables::getLeftTableStorage(Context & context)
return storage;
}
const NamesAndTypesList & JoinedTables::secondTableColumns() const
{
static const NamesAndTypesList empty;
if (tables_with_columns.size() > 1)
return tables_with_columns[1].columns;
return empty;
}
void JoinedTables::resolveTables(const Context & context, StoragePtr storage)
{
tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context);

View File

@ -32,7 +32,6 @@ public:
void resolveTables(const Context & context, const NamesAndTypesList & source_columns);
const std::vector<TableWithColumnNamesAndTypes> & tablesWithColumns() const { return tables_with_columns; }
const NamesAndTypesList & secondTableColumns() const;
bool isLeftTableSubquery() const;
bool isLeftTableFunction() const;

View File

@ -82,24 +82,6 @@ using CustomizeFunctionsMatcher = OneTypeMatcher<CustomizeFunctionsData>;
using CustomizeFunctionsVisitor = InDepthNodeVisitor<CustomizeFunctionsMatcher, true>;
/// Add columns from storage to source_columns list.
void collectSourceColumns(const ColumnsDescription & columns, NamesAndTypesList & source_columns, bool add_virtuals)
{
auto physical_columns = columns.getAllPhysical();
if (source_columns.empty())
source_columns.swap(physical_columns);
else
source_columns.insert(source_columns.end(), physical_columns.begin(), physical_columns.end());
if (add_virtuals)
{
const auto & storage_aliases = columns.getAliases();
const auto & storage_virtuals = columns.getVirtuals();
source_columns.insert(source_columns.end(), storage_aliases.begin(), storage_aliases.end());
source_columns.insert(source_columns.end(), storage_virtuals.begin(), storage_virtuals.end());
}
}
/// Translate qualified names such as db.table.column, table.column, table_alias.column to names' normal form.
/// Expand asterisks and qualified asterisks with column names.
/// There would be columns in normal form & column aliases after translation. Column & column alias would be normalized in QueryNormalizer.
@ -616,20 +598,32 @@ std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQu
}
/// Add columns from storage to source_columns list. Deduplicate resulted list.
void SyntaxAnalyzerResult::collectSourceColumns(bool add_virtuals)
{
if (storage)
{
const ColumnsDescription & columns = storage->getColumns();
auto columns_from_storage = add_virtuals ? columns.getAll() : columns.getAllPhysical();
if (source_columns.empty())
source_columns.swap(columns_from_storage);
else
source_columns.insert(source_columns.end(), columns_from_storage.begin(), columns_from_storage.end());
}
source_columns_set = removeDuplicateColumns(source_columns);
}
/// Calculate which columns are required to execute the expression.
/// Then, delete all other columns from the list of available columns.
/// After execution, columns will only contain the list of columns needed to read from the table.
void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query, const NamesAndTypesList & additional_source_columns)
void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query)
{
/// We calculate required_source_columns with source_columns modifications and swap them on exit
required_source_columns = source_columns;
if (!additional_source_columns.empty())
{
source_columns.insert(source_columns.end(), additional_source_columns.begin(), additional_source_columns.end());
removeDuplicateColumns(source_columns);
}
RequiredSourceColumnsVisitor::Data columns_context;
RequiredSourceColumnsVisitor(columns_context).visit(query);
@ -787,10 +781,9 @@ void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query, const NamesA
SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
ASTPtr & query,
const NamesAndTypesList & source_columns,
StoragePtr storage,
SyntaxAnalyzerResult && result,
const SelectQueryOptions & select_options,
const JoinedTables & joined_tables,
const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns,
const Names & required_result_columns) const
{
auto * select_query = query->as<ASTSelectQuery>();
@ -802,15 +795,9 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
const auto & settings = context.getSettingsRef();
SyntaxAnalyzerResult result;
result.storage = storage;
result.source_columns = source_columns;
const NameSet & source_columns_set = result.source_columns_set;
result.analyzed_join = std::make_shared<AnalyzedJoin>(settings, context.getTemporaryVolume());
if (storage)
collectSourceColumns(storage->getColumns(), result.source_columns, true);
NameSet source_columns_set = removeDuplicateColumns(result.source_columns);
if (remove_duplicates)
renameDuplicatedColumns(select_query);
@ -818,19 +805,18 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
replaceJoinedTable(*select_query);
/// TODO: Remove unneeded conversion
std::vector<TableWithColumnNames> tables_with_columns;
for (const auto & table : joined_tables.tablesWithColumns())
tables_with_columns.emplace_back(table.removeTypes());
std::vector<TableWithColumnNames> tables_with_column_names;
for (const auto & table : tables_with_columns)
tables_with_column_names.emplace_back(table.removeTypes());
result.analyzed_join->columns_from_joined_table = joined_tables.secondTableColumns();
if (result.analyzed_join->columns_from_joined_table.size())
if (tables_with_columns.size() > 1)
{
result.analyzed_join->columns_from_joined_table = tables_with_columns[1].columns;
result.analyzed_join->deduplicateAndQualifyColumnNames(
source_columns_set, tables_with_columns[1].table.getQualifiedNamePrefix());
}
translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns);
translateQualifiedNames(query, *select_query, source_columns_set, tables_with_column_names);
/// Rewrite IN and/or JOIN for distributed tables according to distributed_product_mode setting.
InJoinSubqueriesPreprocessor(context).visit(query);
@ -853,7 +839,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
optimizeIf(query, result.aliases, settings.optimize_if_chain_to_miltiif);
/// Push the predicate expression down to the subqueries.
result.rewrite_subqueries = PredicateExpressionsOptimizer(context, tables_with_columns, settings).optimize(*select_query);
result.rewrite_subqueries = PredicateExpressionsOptimizer(context, tables_with_column_names, settings).optimize(*select_query);
/// GROUP BY injective function elimination.
optimizeGroupBy(select_query, source_columns_set, context);
@ -872,11 +858,11 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
setJoinStrictness(*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys,
result.analyzed_join->table_join);
collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases);
collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_column_names, result.aliases);
}
result.aggregates = getAggregates(query, *select_query);
result.collectUsedColumns(query, {});
result.collectUsedColumns(query);
return std::make_shared<const SyntaxAnalyzerResult>(result);
}
@ -887,13 +873,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(ASTPtr & query, const NamesAndTy
const auto & settings = context.getSettingsRef();
SyntaxAnalyzerResult result;
result.storage = storage;
result.source_columns = source_columns;
if (storage)
collectSourceColumns(storage->getColumns(), result.source_columns, false);
removeDuplicateColumns(result.source_columns);
SyntaxAnalyzerResult result(source_columns, storage, false);
normalize(query, result.aliases, settings);
@ -903,7 +883,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(ASTPtr & query, const NamesAndTy
optimizeIf(query, result.aliases, settings.optimize_if_chain_to_miltiif);
assertNoAggregates(query, "in wrong place");
result.collectUsedColumns(query, {});
result.collectUsedColumns(query);
return std::make_shared<const SyntaxAnalyzerResult>(result);
}

View File

@ -4,7 +4,7 @@
#include <Core/NamesAndTypes.h>
#include <Interpreters/Aliases.h>
#include <Interpreters/SelectQueryOptions.h>
#include <Interpreters/JoinedTables.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Storages/IStorage_fwd.h>
namespace DB
@ -23,6 +23,7 @@ struct SyntaxAnalyzerResult
std::shared_ptr<AnalyzedJoin> analyzed_join;
NamesAndTypesList source_columns;
NameSet source_columns_set; /// Set of names of source_columns.
/// Set of columns that are enough to read from the table to evaluate the expression. It does not include joined columns.
NamesAndTypesList required_source_columns;
@ -50,7 +51,15 @@ struct SyntaxAnalyzerResult
bool maybe_optimize_trivial_count = false;
void collectUsedColumns(const ASTPtr & query, const NamesAndTypesList & additional_source_columns);
SyntaxAnalyzerResult(const NamesAndTypesList & source_columns_, StoragePtr storage_ = {}, bool add_virtuals = true)
: storage(storage_)
, source_columns(source_columns_)
{
collectSourceColumns(add_virtuals);
}
void collectSourceColumns(bool add_virtuals);
void collectUsedColumns(const ASTPtr & query);
Names requiredSourceColumns() const { return required_source_columns.getNames(); }
const Scalars & getScalars() const { return scalars; }
};
@ -82,10 +91,9 @@ public:
/// Analyze and rewrite select query
SyntaxAnalyzerResultPtr analyzeSelect(
ASTPtr & query,
const NamesAndTypesList & source_columns,
StoragePtr storage = {},
SyntaxAnalyzerResult && result,
const SelectQueryOptions & select_options = {},
const JoinedTables & joined_tables = {},
const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns = {},
const Names & required_result_columns = {}) const;
private: