ClickHouse/dbms/src/Interpreters/InterpreterSelectQuery.h

208 lines
7.4 KiB
C++
Raw Normal View History

2011-08-28 05:13:24 +00:00
#pragma once
#include <Core/QueryProcessingStage.h>
#include <Interpreters/Context.h>
#include <Interpreters/IInterpreter.h>
#include <Interpreters/ExpressionActions.h>
#include <DataStreams/IBlockInputStream.h>
2011-08-28 05:13:24 +00:00
namespace Poco { class Logger; }
2011-08-28 05:13:24 +00:00
namespace DB
{
2015-05-06 23:35:37 +00:00
class ExpressionAnalyzer;
class ASTSelectQuery;
2016-03-07 04:35:06 +00:00
struct SubqueryForSet;
2015-05-06 23:35:37 +00:00
2017-06-02 21:37:28 +00:00
/** Interprets the SELECT query. Returns the stream of blocks with the results of the query before `to_stage` stage.
2011-08-28 05:13:24 +00:00
*/
2015-06-18 02:11:05 +00:00
class InterpreterSelectQuery : public IInterpreter
2011-08-28 05:13:24 +00:00
{
public:
2017-06-02 21:37:28 +00:00
/** `to_stage`
* - the stage to which the query is to be executed. By default - till to the end.
* You can perform till the intermediate aggregation state, which are combined from different servers for distributed query processing.
*
* subquery_depth
2017-06-02 21:37:28 +00:00
* - to control the restrictions on the depth of nesting of subqueries. For subqueries, a value that is incremented by one is passed.
*
* input
2017-06-02 21:37:28 +00:00
* - if given - read not from the table specified in the query, but from ready source.
*
* required_column_names
2017-06-02 21:37:28 +00:00
* - delete all columns except the specified ones from the query - it is used to delete unnecessary columns from subqueries.
*
* table_column_names
2017-06-02 21:37:28 +00:00
* - the list of available columns of the table.
* Used, for example, with reference to `input`.
*/
InterpreterSelectQuery(
2017-05-23 18:24:43 +00:00
const ASTPtr & query_ptr_,
const Context & context_,
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
size_t subquery_depth_ = 0,
BlockInputStreamPtr input = nullptr);
InterpreterSelectQuery(
2017-05-23 18:24:43 +00:00
const ASTPtr & query_ptr_,
const Context & context_,
const Names & required_column_names,
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
size_t subquery_depth_ = 0,
BlockInputStreamPtr input = nullptr);
InterpreterSelectQuery(
2017-05-23 18:24:43 +00:00
const ASTPtr & query_ptr_,
const Context & context_,
const Names & required_column_names,
const NamesAndTypesList & table_column_names_,
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
size_t subquery_depth_ = 0,
BlockInputStreamPtr input = nullptr);
~InterpreterSelectQuery();
2017-06-02 21:37:28 +00:00
/** Execute a query, possibly part of UNION ALL chain.
* Get the stream of blocks to read
*/
BlockIO execute() override;
2017-06-02 21:37:28 +00:00
/** Execute the query without union of threads, if it is possible.
*/
const BlockInputStreams & executeWithoutUnion();
DataTypes getReturnTypes();
Block getSampleBlock();
static Block getSampleBlock(
2017-05-23 18:24:43 +00:00
const ASTPtr & query_ptr_,
const Context & context_);
2011-08-28 05:13:24 +00:00
private:
/**
2017-06-02 21:37:28 +00:00
* - Optimization if an object is created only to call getSampleBlock(): consider only the first SELECT of the UNION ALL chain, because
* the first SELECT is sufficient to determine the required columns.
*/
struct OnlyAnalyzeTag {};
InterpreterSelectQuery(
OnlyAnalyzeTag,
2017-05-23 18:24:43 +00:00
const ASTPtr & query_ptr_,
const Context & context_);
void init(BlockInputStreamPtr input, const Names & required_column_names = Names{});
void basicInit(BlockInputStreamPtr input);
void initQueryAnalyzer();
2017-06-02 21:37:28 +00:00
/// Execute one SELECT query from the UNION ALL chain.
void executeSingleQuery();
2017-06-02 21:37:28 +00:00
/** Leave only the necessary columns of the SELECT section in each query of the UNION ALL chain.
* However, if you use at least one DISTINCT in the chain, then all the columns are considered necessary,
* since otherwise DISTINCT would work differently.
*
* Always leave arrayJoin, because it changes number of rows.
*
* TODO If query doesn't have GROUP BY, but have aggregate functions,
* then leave at least one aggregate function,
* In order that fact of aggregation has not been lost.
*/
void rewriteExpressionList(const Names & required_column_names);
2017-06-02 21:37:28 +00:00
/// Does the request contain at least one asterisk?
bool hasAsterisk() const;
2017-06-02 21:37:28 +00:00
// Rename the columns of each query for the UNION ALL chain into the same names as in the first query.
void renameColumns();
2017-06-02 21:37:28 +00:00
/** From which table to read. With JOIN, the "left" table is returned.
*/
void getDatabaseAndTableNames(String & database_name, String & table_name);
2017-06-02 21:37:28 +00:00
/** Select from the list of columns any, better - with minimum size.
*/
String getAnyColumn();
2017-06-02 21:37:28 +00:00
/// Different stages of query execution.
2017-06-02 21:37:28 +00:00
/// Fetch data from the table. Returns the stage to which the query was processed in Storage.
QueryProcessingStage::Enum executeFetchColumns();
void executeWhere(ExpressionActionsPtr expression);
void executeAggregation(ExpressionActionsPtr expression, bool overflow_row, bool final);
void executeMergeAggregated(bool overflow_row, bool final);
void executeTotalsAndHaving(bool has_having, ExpressionActionsPtr expression, bool overflow_row);
void executeHaving(ExpressionActionsPtr expression);
void executeExpression(ExpressionActionsPtr expression);
void executeOrder();
void executeMergeSorted();
void executePreLimit();
void executeUnion();
void executeLimitBy();
void executeLimit();
void executeProjection(ExpressionActionsPtr expression);
void executeDistinct(bool before_order, Names columns);
void executeSubqueriesInSetsAndJoins(std::unordered_map<String, SubqueryForSet> & subqueries_for_sets);
template <typename Transform>
void transformStreams(Transform && transform);
bool hasNoData() const;
bool hasMoreThanOneStream() const;
void ignoreWithTotals();
2017-06-02 21:37:28 +00:00
/** If there is a SETTINGS section in the SELECT query, then apply settings from it.
*
2017-06-02 21:37:28 +00:00
* Section SETTINGS - settings for a specific query.
* Normally, the settings can be passed in other ways, not inside the query.
* But the use of this section is justified if you need to set the settings for one subquery.
*/
void initSettings();
ASTPtr query_ptr;
ASTSelectQuery & query;
Context context;
QueryProcessingStage::Enum to_stage;
size_t subquery_depth;
std::unique_ptr<ExpressionAnalyzer> query_analyzer;
NamesAndTypesList table_column_names;
2017-06-02 21:37:28 +00:00
/** Streams of data.
* The source data streams are produced in the executeFetchColumns function.
* Then they are converted (wrapped in other streams) using the `execute*` functions,
* to get the whole pipeline running the query.
*/
BlockInputStreams streams;
2017-06-02 21:37:28 +00:00
/** When executing FULL or RIGHT JOIN, there will be a data stream from which you can read "not joined" rows.
* It has a special meaning, since reading from it should be done after reading from the main streams.
* It is joined to the main streams in UnionBlockInputStream or ParallelAggregatingBlockInputStream.
*/
BlockInputStreamPtr stream_with_non_joined_data;
2017-06-02 21:37:28 +00:00
/// Is it the first SELECT query of the UNION ALL chain?
bool is_first_select_inside_union_all;
2017-06-02 21:37:28 +00:00
/// The object was created only for query analysis.
bool only_analyze = false;
2017-06-02 21:37:28 +00:00
/// The next SELECT query in the UNION ALL chain, if any.
std::unique_ptr<InterpreterSelectQuery> next_select_in_union_all;
2017-06-02 21:37:28 +00:00
/// Table from where to read data, if not subquery.
StoragePtr storage;
TableStructureReadLockPtr table_lock;
2017-06-02 21:37:28 +00:00
/// Do union of streams within a SELECT query?
bool union_within_single_query = false;
Poco::Logger * log;
2011-08-28 05:13:24 +00:00
};
}