mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Better UNION ALL: development #1947
This commit is contained in:
parent
c35727c7ed
commit
9ea0a603a0
@ -60,6 +60,8 @@
|
||||
#include <DataTypes/DataTypeFunction.h>
|
||||
#include <Functions/FunctionsMiscellaneous.h>
|
||||
|
||||
#include <Core/iostream_debug_helpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -187,7 +189,12 @@ ExpressionAnalyzer::ExpressionAnalyzer(
|
||||
}
|
||||
}
|
||||
|
||||
removeDuplicateColumns(source_columns);
|
||||
if (storage && source_columns.empty())
|
||||
source_columns = storage->getSampleBlock().getNamesAndTypesList();
|
||||
else
|
||||
removeDuplicateColumns(source_columns);
|
||||
|
||||
DUMP(source_columns);
|
||||
|
||||
addAliasColumns();
|
||||
|
||||
@ -2686,7 +2693,7 @@ void ExpressionAnalyzer::collectUsedColumns()
|
||||
if (required.empty())
|
||||
required.insert(ExpressionActions::getSmallestColumn(source_columns));
|
||||
|
||||
unknown_required_source_columns = required;
|
||||
NameSet unknown_required_source_columns = required;
|
||||
|
||||
for (NamesAndTypesList::iterator it = source_columns.begin(); it != source_columns.end();)
|
||||
{
|
||||
@ -2713,6 +2720,9 @@ void ExpressionAnalyzer::collectUsedColumns()
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
if (!unknown_required_source_columns.empty())
|
||||
throw Exception("Unknown identifier: " + *unknown_required_source_columns.begin(), ErrorCodes::UNKNOWN_IDENTIFIER);
|
||||
}
|
||||
|
||||
void ExpressionAnalyzer::collectJoinedColumns(NameSet & joined_columns, NamesAndTypesList & joined_columns_name_type)
|
||||
@ -2775,14 +2785,7 @@ void ExpressionAnalyzer::collectJoinedColumns(NameSet & joined_columns, NamesAnd
|
||||
|
||||
Names ExpressionAnalyzer::getRequiredSourceColumns() const
|
||||
{
|
||||
if (!unknown_required_source_columns.empty())
|
||||
throw Exception("Unknown identifier: " + *unknown_required_source_columns.begin(), ErrorCodes::UNKNOWN_IDENTIFIER);
|
||||
|
||||
Names res;
|
||||
for (const auto & column_name_type : source_columns)
|
||||
res.push_back(column_name_type.name);
|
||||
|
||||
return res;
|
||||
return source_columns.getNames();
|
||||
}
|
||||
|
||||
|
||||
|
@ -69,7 +69,7 @@ public:
|
||||
const ASTPtr & ast_,
|
||||
const Context & context_,
|
||||
const StoragePtr & storage_,
|
||||
const NamesAndTypesList & source_columns_,
|
||||
const NamesAndTypesList & source_columns_ = {},
|
||||
const Names & required_result_columns_ = {},
|
||||
size_t subquery_depth_ = 0,
|
||||
bool do_global_ = false,
|
||||
@ -146,9 +146,6 @@ private:
|
||||
Settings settings;
|
||||
size_t subquery_depth;
|
||||
|
||||
/// Columns that are mentioned in the expression, but were not specified in the constructor.
|
||||
NameSet unknown_required_source_columns;
|
||||
|
||||
/** Original columns.
|
||||
* First, all available columns of the table are placed here. Then (when analyzing the query), unused columns are deleted.
|
||||
*/
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include <DataStreams/CreatingSetsBlockInputStream.h>
|
||||
#include <DataStreams/MaterializingBlockInputStream.h>
|
||||
#include <DataStreams/ConcatBlockInputStream.h>
|
||||
#include <DataStreams/OneBlockInputStream.h>
|
||||
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTSelectWithUnionQuery.h>
|
||||
@ -113,53 +112,44 @@ void InterpreterSelectQuery::init(const Names & required_result_column_names)
|
||||
|
||||
max_streams = settings.max_threads;
|
||||
|
||||
/// Read from prepared input.
|
||||
const auto & table_expression = query.table();
|
||||
NamesAndTypesList source_columns;
|
||||
|
||||
if (input)
|
||||
{
|
||||
source_header = input->getHeader();
|
||||
/// Read from prepared input.
|
||||
source_columns = input->getHeader().getNamesAndTypesList();
|
||||
}
|
||||
else if (table_expression && typeid_cast<const ASTSelectWithUnionQuery *>(table_expression.get()))
|
||||
{
|
||||
/// Read from subquery.
|
||||
source_columns = InterpreterSelectWithUnionQuery::getSampleBlock(table_expression, context).getNamesAndTypesList();
|
||||
}
|
||||
else if (table_expression && typeid_cast<const ASTFunction *>(table_expression.get()))
|
||||
{
|
||||
/// Read from table function.
|
||||
|
||||
TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(
|
||||
typeid_cast<const ASTFunction *>(table_expression.get())->name, context);
|
||||
/// Run it and remember the result
|
||||
storage = table_function_ptr->execute(table_expression, context);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto table_expression = query.table();
|
||||
/// Read from table. Even without table expression (implicit SELECT ... FROM system.one).
|
||||
String database_name;
|
||||
String table_name;
|
||||
|
||||
/// Read from subquery.
|
||||
if (table_expression && typeid_cast<const ASTSelectWithUnionQuery *>(table_expression.get()))
|
||||
{
|
||||
source_header = InterpreterSelectWithUnionQuery::getSampleBlock(table_expression, context);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Read from table function.
|
||||
if (table_expression && typeid_cast<const ASTFunction *>(table_expression.get()))
|
||||
{
|
||||
/// Get the table function
|
||||
TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(
|
||||
typeid_cast<const ASTFunction *>(table_expression.get())->name, context);
|
||||
/// Run it and remember the result
|
||||
storage = table_function_ptr->execute(table_expression, context);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Read from table.
|
||||
String database_name;
|
||||
String table_name;
|
||||
getDatabaseAndTableNames(database_name, table_name);
|
||||
|
||||
getDatabaseAndTableNames(database_name, table_name);
|
||||
|
||||
storage = context.getTable(database_name, table_name);
|
||||
}
|
||||
|
||||
table_lock = storage->lockStructure(false, __PRETTY_FUNCTION__);
|
||||
|
||||
source_header = storage->getSampleBlock();
|
||||
}
|
||||
storage = context.getTable(database_name, table_name);
|
||||
}
|
||||
|
||||
if (!source_header)
|
||||
throw Exception("There are no available columns", ErrorCodes::THERE_IS_NO_COLUMN);
|
||||
if (storage)
|
||||
table_lock = storage->lockStructure(false, __PRETTY_FUNCTION__);
|
||||
|
||||
query_analyzer = std::make_unique<ExpressionAnalyzer>(
|
||||
query_ptr, context, storage, source_header.getNamesAndTypesList(), required_result_column_names, subquery_depth, !only_analyze);
|
||||
query_ptr, context, storage, source_columns, required_result_column_names, subquery_depth, !only_analyze);
|
||||
|
||||
if (query.sample_size() && (input || !storage || !storage->supportsSampling()))
|
||||
throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED);
|
||||
@ -208,7 +198,7 @@ void InterpreterSelectQuery::getDatabaseAndTableNames(String & database_name, St
|
||||
Block InterpreterSelectQuery::getSampleBlock()
|
||||
{
|
||||
Pipeline pipeline;
|
||||
executeImpl(pipeline, std::make_shared<OneBlockInputStream>(source_header));
|
||||
executeImpl(pipeline, input, true);
|
||||
auto res = pipeline.firstStream()->getHeader();
|
||||
return res;
|
||||
}
|
||||
@ -223,7 +213,7 @@ Block InterpreterSelectQuery::getSampleBlock(const ASTPtr & query_ptr_, const Co
|
||||
BlockIO InterpreterSelectQuery::execute()
|
||||
{
|
||||
Pipeline pipeline;
|
||||
executeImpl(pipeline, input);
|
||||
executeImpl(pipeline, input, false);
|
||||
executeUnion(pipeline);
|
||||
|
||||
BlockIO res;
|
||||
@ -234,7 +224,7 @@ BlockIO InterpreterSelectQuery::execute()
|
||||
BlockInputStreams InterpreterSelectQuery::executeWithMultipleStreams()
|
||||
{
|
||||
Pipeline pipeline;
|
||||
executeImpl(pipeline, input);
|
||||
executeImpl(pipeline, input, false);
|
||||
return pipeline.streams;
|
||||
}
|
||||
|
||||
@ -319,7 +309,7 @@ InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpression
|
||||
}
|
||||
|
||||
|
||||
void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputStreamPtr & input)
|
||||
void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputStreamPtr & input, bool dry_run)
|
||||
{
|
||||
if (input)
|
||||
pipeline.streams.push_back(input);
|
||||
@ -335,7 +325,7 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt
|
||||
*/
|
||||
|
||||
/** Read the data from Storage. from_stage - to what stage the request was completed in Storage. */
|
||||
QueryProcessingStage::Enum from_stage = executeFetchColumns(pipeline);
|
||||
QueryProcessingStage::Enum from_stage = executeFetchColumns(pipeline, dry_run);
|
||||
|
||||
LOG_TRACE(log, QueryProcessingStage::toString(from_stage) << " -> " << QueryProcessingStage::toString(to_stage));
|
||||
|
||||
@ -508,7 +498,7 @@ static void getLimitLengthAndOffset(ASTSelectQuery & query, size_t & length, siz
|
||||
}
|
||||
}
|
||||
|
||||
QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline & pipeline)
|
||||
QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline & pipeline, bool dry_run)
|
||||
{
|
||||
/// List of columns to read to execute the query.
|
||||
Names required_columns = query_analyzer->getRequiredSourceColumns();
|
||||
@ -544,7 +534,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline
|
||||
required_columns_expr_list->children.emplace_back(std::make_shared<ASTIdentifier>(column));
|
||||
}
|
||||
|
||||
alias_actions = ExpressionAnalyzer{required_columns_expr_list, context, storage, source_header.getNamesAndTypesList()}.getActions(true);
|
||||
alias_actions = ExpressionAnalyzer(required_columns_expr_list, context, storage).getActions(true);
|
||||
|
||||
/// The set of required columns could be added as a result of adding an action to calculate ALIAS.
|
||||
required_columns = alias_actions->getRequiredColumns();
|
||||
@ -668,8 +658,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline
|
||||
optimize_prewhere(*merge_tree);
|
||||
}
|
||||
|
||||
/// If there was no already prepared input.
|
||||
if (pipeline.streams.empty())
|
||||
if (!dry_run)
|
||||
pipeline.streams = storage->read(required_columns, query_info, context, from_stage, max_block_size, max_streams);
|
||||
|
||||
if (pipeline.streams.empty())
|
||||
|
@ -33,7 +33,7 @@ public:
|
||||
* You can perform till the intermediate aggregation state, which are combined from different servers for distributed query processing.
|
||||
*
|
||||
* subquery_depth
|
||||
* - to control the restrictions on the depth of nesting of subqueries. For subqueries, a value that is incremented by one is passed;
|
||||
* - to control the limit on the depth of nesting of subqueries. For subqueries, a value that is incremented by one is passed;
|
||||
* for INSERT SELECT, a value 1 is passed instead of 0.
|
||||
*
|
||||
* input
|
||||
@ -109,7 +109,7 @@ private:
|
||||
|
||||
void init(const Names & required_result_column_names);
|
||||
|
||||
void executeImpl(Pipeline & pipeline, const BlockInputStreamPtr & input);
|
||||
void executeImpl(Pipeline & pipeline, const BlockInputStreamPtr & input, bool dry_run);
|
||||
|
||||
|
||||
struct AnalysisResult
|
||||
@ -147,10 +147,10 @@ private:
|
||||
|
||||
/// Different stages of query execution.
|
||||
|
||||
/// Fetch data from the table. Returns the stage to which the query was processed in Storage.
|
||||
QueryProcessingStage::Enum executeFetchColumns(Pipeline & pipeline);
|
||||
void executeWithMultipleStreamsImpl(Pipeline & pipeline, const BlockInputStreamPtr & input, bool dry_run);
|
||||
|
||||
void executeWithMultipleStreamsImpl(Pipeline & pipeline, const BlockInputStreamPtr & input);
|
||||
/// Fetch data from the table. Returns the stage to which the query was processed in Storage.
|
||||
QueryProcessingStage::Enum executeFetchColumns(Pipeline & pipeline, bool dry_run);
|
||||
|
||||
void executeWhere(Pipeline & pipeline, const ExpressionActionsPtr & expression);
|
||||
void executeAggregation(Pipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final);
|
||||
@ -182,7 +182,6 @@ private:
|
||||
QueryProcessingStage::Enum to_stage;
|
||||
size_t subquery_depth;
|
||||
std::unique_ptr<ExpressionAnalyzer> query_analyzer;
|
||||
Block source_header;
|
||||
|
||||
/// How many streams we ask for storage to produce, and in how many threads we will do further processing.
|
||||
size_t max_streams = 1;
|
||||
|
Loading…
Reference in New Issue
Block a user