Better UNION ALL: development #1947

This commit is contained in:
Alexey Milovidov 2018-02-27 22:00:55 +03:00
parent 5d27e43f96
commit 8d72ca25d5
4 changed files with 32 additions and 41 deletions

View File

@ -176,9 +176,11 @@ ExpressionAnalyzer::ExpressionAnalyzer(
void ExpressionAnalyzer::init()
{
select_query = typeid_cast<ASTSelectQuery *>(ast.get());
removeDuplicateColumns(source_columns);
select_query = typeid_cast<ASTSelectQuery *>(ast.get());
addAliasColumns();
translateQualifiedNames();
@ -195,9 +197,6 @@ void ExpressionAnalyzer::init()
/// Common subexpression elimination. Rewrite rules.
normalizeTree();
/// ALIAS and MATERIALIZED columns should not be substituted for ASTAsterisk, we will add them now, after normalizeTree.
addAliasAndMaterializedColumns();
DUMP(source_columns);
/// Executing scalar subqueries - replacing them with constant values.
@ -1046,8 +1045,18 @@ void ExpressionAnalyzer::normalizeTreeImpl(
if (typeid_cast<ASTAsterisk *>(asts[i].get()))
{
ASTs all_columns;
for (const auto & column_name_type : source_columns)
all_columns.emplace_back(std::make_shared<ASTIdentifier>(column_name_type.name));
if (storage)
{
/// If we select from a table, get only not MATERIALIZED, not ALIAS columns.
for (const auto & name_type : storage->getColumnsListNonMaterialized())
all_columns.emplace_back(std::make_shared<ASTIdentifier>(name_type.name));
}
else
{
for (const auto & name_type : source_columns)
all_columns.emplace_back(std::make_shared<ASTIdentifier>(name_type.name));
}
asts.erase(asts.begin() + i);
asts.insert(asts.begin() + i, all_columns.begin(), all_columns.end());
@ -1129,7 +1138,7 @@ void ExpressionAnalyzer::normalizeTreeImpl(
}
void ExpressionAnalyzer::addAliasAndMaterializedColumns()
void ExpressionAnalyzer::addAliasColumns()
{
if (!select_query)
return;
@ -1138,7 +1147,6 @@ void ExpressionAnalyzer::addAliasAndMaterializedColumns()
return;
source_columns.insert(std::end(source_columns), std::begin(storage->alias_columns), std::end(storage->alias_columns));
source_columns.insert(std::end(source_columns), std::begin(storage->materialized_columns), std::end(storage->materialized_columns));
}

View File

@ -251,8 +251,8 @@ private:
void makeSet(const ASTFunction * node, const Block & sample_block);
/// Adds a list of ALIAS and MATERIALIZED columns from the table.
void addAliasAndMaterializedColumns();
/// Adds a list of ALIAS columns from the table.
void addAliasColumns();
/// Replacing scalar subqueries with constant values.
void executeScalarSubqueries();

View File

@ -46,9 +46,6 @@
#include <Columns/Collator.h>
#include <Common/typeid_cast.h>
#include <Core/iostream_debug_helpers.h>
#include <Parsers/queryToString.h>
namespace ProfileEvents
{
@ -72,7 +69,7 @@ namespace ErrorCodes
InterpreterSelectQuery::InterpreterSelectQuery(
const ASTPtr & query_ptr_,
const Context & context_,
const Names & required_column_names_,
const Names & required_result_column_names_,
QueryProcessingStage::Enum to_stage_,
size_t subquery_depth_,
const BlockInputStreamPtr & input)
@ -84,7 +81,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
, input(input)
, log(&Logger::get("InterpreterSelectQuery"))
{
init(required_column_names_);
init(required_result_column_names_);
}
@ -103,7 +100,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(OnlyAnalyzeTag, const ASTPtr & qu
InterpreterSelectQuery::~InterpreterSelectQuery() = default;
void InterpreterSelectQuery::init(const Names & required_column_names)
void InterpreterSelectQuery::init(const Names & required_result_column_names)
{
ProfileEvents::increment(ProfileEvents::SelectQuery);
@ -154,9 +151,7 @@ void InterpreterSelectQuery::init(const Names & required_column_names)
table_lock = storage->lockStructure(false, __PRETTY_FUNCTION__);
/// Source header should contain only columns that can be substituted for asterisk.
/// Materialied and alias columns will be processed by ExpressionAnalyzer.
source_header = storage->getSampleBlockNonMaterialized();
source_header = storage->getSampleBlock();
}
}
@ -164,7 +159,7 @@ void InterpreterSelectQuery::init(const Names & required_column_names)
throw Exception("There are no available columns", ErrorCodes::THERE_IS_NO_COLUMN);
query_analyzer = std::make_unique<ExpressionAnalyzer>(
query_ptr, context, storage, source_header.getNamesAndTypesList(), required_column_names, subquery_depth, !only_analyze);
query_ptr, context, storage, source_header.getNamesAndTypesList(), required_result_column_names, subquery_depth, !only_analyze);
if (query.sample_size() && (input || !storage || !storage->supportsSampling()))
throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED);
@ -515,9 +510,6 @@ static void getLimitLengthAndOffset(ASTSelectQuery & query, size_t & length, siz
QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline & pipeline)
{
/// The subquery interpreter, if the subquery
std::unique_ptr<InterpreterSelectWithUnionQuery> interpreter_subquery;
/// List of columns to read to execute the query.
Names required_columns = query_analyzer->getRequiredSourceColumns();
@ -538,8 +530,6 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline
}
}
DUMP(alias_columns_required);
if (alias_columns_required)
{
/// We will create an expression to return all the requested columns, with the calculation of the required ALIAS columns.
@ -554,19 +544,16 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline
required_columns_expr_list->children.emplace_back(std::make_shared<ASTIdentifier>(column));
}
DUMP(queryToString(required_columns_expr_list));
alias_actions = ExpressionAnalyzer{required_columns_expr_list, context, storage, source_header.getNamesAndTypesList()}.getActions(true);
DUMP(alias_actions->dumpActions());
/// The set of required columns could be added as a result of adding an action to calculate ALIAS.
required_columns = alias_actions->getRequiredColumns();
}
DUMP(required_columns);
}
/// The subquery interpreter, if the subquery
std::unique_ptr<InterpreterSelectWithUnionQuery> interpreter_subquery;
auto query_table = query.table();
if (query_table && typeid_cast<ASTSelectWithUnionQuery *>(query_table.get()))
{
@ -725,7 +712,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline
throw Exception("Logical error in InterpreterSelectQuery: nowhere to read", ErrorCodes::LOGICAL_ERROR);
/// Aliases in table declaration.
if (alias_actions)
if (from_stage == QueryProcessingStage::FetchColumns && alias_actions)
{
pipeline.transform([&](auto & stream)
{

View File

@ -37,20 +37,16 @@ public:
* for INSERT SELECT, a value 1 is passed instead of 0.
*
* input
* - if given - read not from the table specified in the query, but from ready source.
* - if given - read not from the table specified in the query, but from prepared source.
*
* required_column_names
* - delete all columns except the specified ones from the query - it is used to delete unnecessary columns from subqueries.
*
* table_column_names
* - the list of available columns of the table.
* Used, for example, with reference to `input`.
* required_result_column_names
* - don't calculate all columns except the specified ones from the query - it is used to remove calculation of unnecessary columns from subqueries.
*/
InterpreterSelectQuery(
const ASTPtr & query_ptr_,
const Context & context_,
const Names & required_column_names = Names{},
const Names & required_result_column_names = Names{},
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
size_t subquery_depth_ = 0,
const BlockInputStreamPtr & input = nullptr);
@ -111,7 +107,7 @@ private:
const ASTPtr & query_ptr_,
const Context & context_);
void init(const Names & required_column_names);
void init(const Names & required_result_column_names);
void executeImpl(Pipeline & pipeline, const BlockInputStreamPtr & input);