2011-08-28 05:13:24 +00:00
|
|
|
#pragma once
|
|
|
|
|
2018-07-19 13:36:21 +00:00
|
|
|
#include <memory>
|
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Core/QueryProcessingStage.h>
|
2019-03-18 10:36:13 +00:00
|
|
|
#include <Parsers/ASTSelectQuery.h>
|
2019-05-17 14:34:25 +00:00
|
|
|
#include <DataStreams/IBlockStream_fwd.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Interpreters/Context.h>
|
|
|
|
#include <Interpreters/ExpressionActions.h>
|
2018-02-23 06:00:48 +00:00
|
|
|
#include <Interpreters/ExpressionAnalyzer.h>
|
2019-03-18 12:05:51 +00:00
|
|
|
#include <Interpreters/IInterpreter.h>
|
2019-03-15 13:49:58 +00:00
|
|
|
#include <Interpreters/SelectQueryOptions.h>
|
2018-09-05 09:47:57 +00:00
|
|
|
#include <Storages/SelectQueryInfo.h>
|
2019-05-17 14:34:25 +00:00
|
|
|
#include <Storages/TableStructureLockHolder.h>
|
2019-12-10 23:18:24 +00:00
|
|
|
#include <Storages/ReadInOrderOptimizer.h>
|
2020-03-13 10:30:55 +00:00
|
|
|
#include <Interpreters/StorageID.h>
|
2011-08-28 05:13:24 +00:00
|
|
|
|
2019-03-26 18:28:37 +00:00
|
|
|
#include <Processors/QueryPipeline.h>
|
2019-10-27 18:12:40 +00:00
|
|
|
#include <Columns/FilterDescription.h>
|
2017-01-21 04:24:28 +00:00
|
|
|
|
|
|
|
namespace Poco { class Logger; }
|
|
|
|
|
2011-08-28 05:13:24 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-03-07 04:35:06 +00:00
|
|
|
struct SubqueryForSet;
|
2018-07-19 13:36:21 +00:00
|
|
|
class InterpreterSelectWithUnionQuery;
|
2015-05-06 23:35:37 +00:00
|
|
|
|
2018-11-08 15:43:14 +00:00
|
|
|
struct SyntaxAnalyzerResult;
|
|
|
|
using SyntaxAnalyzerResultPtr = std::shared_ptr<const SyntaxAnalyzerResult>;
|
2015-05-06 23:35:37 +00:00
|
|
|
|
2019-03-15 13:49:58 +00:00
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/** Interprets the SELECT query. Returns the stream of blocks with the results of the query before `to_stage` stage.
|
2011-08-28 05:13:24 +00:00
|
|
|
*/
|
2019-03-18 12:05:51 +00:00
|
|
|
class InterpreterSelectQuery : public IInterpreter
|
2011-08-28 05:13:24 +00:00
|
|
|
{
|
|
|
|
public:
|
2017-11-05 17:48:50 +00:00
|
|
|
/**
|
|
|
|
* query_ptr
|
|
|
|
* - A query AST to interpret.
|
|
|
|
*
|
2018-02-27 19:00:55 +00:00
|
|
|
* required_result_column_names
|
2018-03-02 04:05:20 +00:00
|
|
|
* - don't calculate all columns except the specified ones from the query
|
|
|
|
* - it is used to remove calculation (and reading) of unnecessary columns from subqueries.
|
|
|
|
* empty means - use all columns.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
InterpreterSelectQuery(
|
2017-05-23 18:24:43 +00:00
|
|
|
const ASTPtr & query_ptr_,
|
2017-04-01 07:20:54 +00:00
|
|
|
const Context & context_,
|
2019-03-15 13:49:58 +00:00
|
|
|
const SelectQueryOptions &,
|
2019-08-03 11:02:40 +00:00
|
|
|
const Names & required_result_column_names_ = Names{});
|
2018-07-17 13:09:33 +00:00
|
|
|
|
|
|
|
/// Read data not from the table specified in the query, but from the prepared source `input`.
|
|
|
|
InterpreterSelectQuery(
|
|
|
|
const ASTPtr & query_ptr_,
|
|
|
|
const Context & context_,
|
|
|
|
const BlockInputStreamPtr & input_,
|
2019-03-15 13:49:58 +00:00
|
|
|
const SelectQueryOptions & = {});
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-01-29 15:04:46 +00:00
|
|
|
/// Read data not from the table specified in the query, but from the prepared pipe `input`.
|
|
|
|
InterpreterSelectQuery(
|
|
|
|
const ASTPtr & query_ptr_,
|
|
|
|
const Context & context_,
|
|
|
|
Pipe input_pipe_,
|
|
|
|
const SelectQueryOptions & = {});
|
|
|
|
|
2018-07-18 12:17:48 +00:00
|
|
|
/// Read data not from the table specified in the query, but from the specified `storage_`.
|
|
|
|
InterpreterSelectQuery(
|
|
|
|
const ASTPtr & query_ptr_,
|
|
|
|
const Context & context_,
|
|
|
|
const StoragePtr & storage_,
|
2019-03-15 13:49:58 +00:00
|
|
|
const SelectQueryOptions & = {});
|
2018-07-18 12:17:48 +00:00
|
|
|
|
2018-06-03 20:39:06 +00:00
|
|
|
~InterpreterSelectQuery() override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-02-25 00:50:53 +00:00
|
|
|
/// Execute a query. Get the stream of blocks to read.
|
2017-04-01 07:20:54 +00:00
|
|
|
BlockIO execute() override;
|
|
|
|
|
2018-02-25 00:50:53 +00:00
|
|
|
/// Execute the query and return multuple streams for parallel processing.
|
2019-11-15 18:41:18 +00:00
|
|
|
BlockInputStreams executeWithMultipleStreams(QueryPipeline & parent_pipeline);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-03-26 18:28:37 +00:00
|
|
|
QueryPipeline executeWithProcessors() override;
|
|
|
|
bool canExecuteWithProcessors() const override { return true; }
|
|
|
|
|
2019-11-11 01:11:32 +00:00
|
|
|
bool ignoreLimits() const override { return options.ignore_limits; }
|
|
|
|
bool ignoreQuota() const override { return options.ignore_quota; }
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
Block getSampleBlock();
|
|
|
|
|
2018-02-25 06:34:20 +00:00
|
|
|
void ignoreWithTotals();
|
|
|
|
|
2019-02-11 19:53:55 +00:00
|
|
|
ASTPtr getQuery() const { return query_ptr; }
|
|
|
|
|
2020-03-16 18:45:39 +00:00
|
|
|
size_t getMaxStreams() const { return max_streams; }
|
|
|
|
|
2011-08-28 05:13:24 +00:00
|
|
|
private:
|
2018-07-17 13:09:33 +00:00
|
|
|
InterpreterSelectQuery(
|
|
|
|
const ASTPtr & query_ptr_,
|
|
|
|
const Context & context_,
|
|
|
|
const BlockInputStreamPtr & input_,
|
2020-01-29 15:04:46 +00:00
|
|
|
std::optional<Pipe> input_pipe,
|
2018-07-17 13:09:33 +00:00
|
|
|
const StoragePtr & storage_,
|
2019-03-15 13:49:58 +00:00
|
|
|
const SelectQueryOptions &,
|
|
|
|
const Names & required_result_column_names = {});
|
2018-07-17 13:09:33 +00:00
|
|
|
|
2019-03-15 16:14:13 +00:00
|
|
|
ASTSelectQuery & getSelectQuery() { return query_ptr->as<ASTSelectQuery &>(); }
|
2019-03-12 14:07:02 +00:00
|
|
|
|
2020-02-05 16:42:27 +00:00
|
|
|
Block getSampleBlockImpl(bool try_move_to_prewhere);
|
2018-07-17 13:09:33 +00:00
|
|
|
|
2018-02-21 03:26:06 +00:00
|
|
|
struct Pipeline
|
|
|
|
{
|
|
|
|
/** Streams of data.
|
|
|
|
* The source data streams are produced in the executeFetchColumns function.
|
|
|
|
* Then they are converted (wrapped in other streams) using the `execute*` functions,
|
|
|
|
* to get the whole pipeline running the query.
|
|
|
|
*/
|
|
|
|
BlockInputStreams streams;
|
|
|
|
|
|
|
|
/** When executing FULL or RIGHT JOIN, there will be a data stream from which you can read "not joined" rows.
|
|
|
|
* It has a special meaning, since reading from it should be done after reading from the main streams.
|
2018-03-02 04:05:20 +00:00
|
|
|
* It is appended to the main streams in UnionBlockInputStream or ParallelAggregatingBlockInputStream.
|
2018-02-21 03:26:06 +00:00
|
|
|
*/
|
|
|
|
BlockInputStreamPtr stream_with_non_joined_data;
|
2019-04-12 17:04:38 +00:00
|
|
|
bool union_stream = false;
|
2018-02-21 03:26:06 +00:00
|
|
|
|
2020-03-15 21:22:55 +00:00
|
|
|
/// Cache value of InterpreterSelectQuery::max_streams
|
|
|
|
size_t max_threads = 1;
|
|
|
|
|
2018-02-21 03:26:06 +00:00
|
|
|
BlockInputStreamPtr & firstStream() { return streams.at(0); }
|
|
|
|
|
|
|
|
template <typename Transform>
|
2019-03-29 20:31:06 +00:00
|
|
|
void transform(Transform && transformation)
|
2018-02-21 03:26:06 +00:00
|
|
|
{
|
|
|
|
for (auto & stream : streams)
|
2019-03-29 20:31:06 +00:00
|
|
|
transformation(stream);
|
2018-02-21 03:26:06 +00:00
|
|
|
|
|
|
|
if (stream_with_non_joined_data)
|
2019-03-29 20:31:06 +00:00
|
|
|
transformation(stream_with_non_joined_data);
|
2018-02-21 03:26:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool hasMoreThanOneStream() const
|
|
|
|
{
|
|
|
|
return streams.size() + (stream_with_non_joined_data ? 1 : 0) > 1;
|
|
|
|
}
|
2019-04-12 17:04:38 +00:00
|
|
|
|
2019-04-15 11:22:51 +00:00
|
|
|
/// Resulting stream is mix of other streams data. Distinct and/or order guaranties are broken.
|
2019-04-12 17:04:38 +00:00
|
|
|
bool hasMixedStreams() const
|
|
|
|
{
|
|
|
|
return hasMoreThanOneStream() || union_stream;
|
|
|
|
}
|
2018-02-21 03:26:06 +00:00
|
|
|
|
2019-04-03 11:21:38 +00:00
|
|
|
bool hasDelayedStream() const { return stream_with_non_joined_data != nullptr; }
|
|
|
|
bool initialized() const { return !streams.empty(); }
|
2020-03-15 21:22:55 +00:00
|
|
|
|
|
|
|
/// Compatibility with QueryPipeline (Processors)
|
|
|
|
void setMaxThreads(size_t max_threads_) { max_threads = max_threads_; }
|
|
|
|
size_t getNumThreads() const { return max_threads; }
|
2019-04-03 11:21:38 +00:00
|
|
|
};
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-04-03 11:21:38 +00:00
|
|
|
template <typename TPipeline>
|
2020-01-29 15:04:46 +00:00
|
|
|
void executeImpl(TPipeline & pipeline, const BlockInputStreamPtr & prepared_input, std::optional<Pipe> prepared_pipe, QueryPipeline & save_context_and_storage);
|
2018-02-23 06:00:48 +00:00
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/// Different stages of query execution.
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-02-28 04:51:09 +00:00
|
|
|
/// dry_run - don't read from table, use empty header block instead.
|
2018-02-28 01:29:55 +00:00
|
|
|
void executeWithMultipleStreamsImpl(Pipeline & pipeline, const BlockInputStreamPtr & input, bool dry_run);
|
2018-02-21 03:26:06 +00:00
|
|
|
|
2019-04-03 11:21:38 +00:00
|
|
|
template <typename TPipeline>
|
|
|
|
void executeFetchColumns(QueryProcessingStage::Enum processing_stage, TPipeline & pipeline,
|
2019-12-10 23:18:24 +00:00
|
|
|
const PrewhereInfoPtr & prewhere_info,
|
2019-11-15 18:41:18 +00:00
|
|
|
const Names & columns_to_remove_after_prewhere,
|
|
|
|
QueryPipeline & save_context_and_storage);
|
2018-02-26 21:00:42 +00:00
|
|
|
|
2018-09-05 09:47:57 +00:00
|
|
|
void executeWhere(Pipeline & pipeline, const ExpressionActionsPtr & expression, bool remove_filter);
|
2018-02-21 03:26:06 +00:00
|
|
|
void executeAggregation(Pipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final);
|
|
|
|
void executeMergeAggregated(Pipeline & pipeline, bool overflow_row, bool final);
|
2018-08-24 15:00:00 +00:00
|
|
|
void executeTotalsAndHaving(Pipeline & pipeline, bool has_having, const ExpressionActionsPtr & expression, bool overflow_row, bool final);
|
2018-02-21 03:26:06 +00:00
|
|
|
void executeHaving(Pipeline & pipeline, const ExpressionActionsPtr & expression);
|
2020-03-18 00:57:00 +00:00
|
|
|
static void executeExpression(Pipeline & pipeline, const ExpressionActionsPtr & expression);
|
2019-11-15 14:03:42 +00:00
|
|
|
void executeOrder(Pipeline & pipeline, InputSortingInfoPtr sorting_info);
|
2019-04-21 16:16:25 +00:00
|
|
|
void executeWithFill(Pipeline & pipeline);
|
2018-02-21 03:26:06 +00:00
|
|
|
void executeMergeSorted(Pipeline & pipeline);
|
|
|
|
void executePreLimit(Pipeline & pipeline);
|
2019-10-03 12:02:30 +00:00
|
|
|
void executeUnion(Pipeline & pipeline, Block header);
|
2018-02-21 03:26:06 +00:00
|
|
|
void executeLimitBy(Pipeline & pipeline);
|
|
|
|
void executeLimit(Pipeline & pipeline);
|
2020-03-18 00:57:00 +00:00
|
|
|
static void executeProjection(Pipeline & pipeline, const ExpressionActionsPtr & expression);
|
2018-02-21 03:26:06 +00:00
|
|
|
void executeDistinct(Pipeline & pipeline, bool before_order, Names columns);
|
2018-02-28 02:32:34 +00:00
|
|
|
void executeExtremes(Pipeline & pipeline);
|
2020-02-10 19:55:13 +00:00
|
|
|
void executeSubqueriesInSetsAndJoins(Pipeline & pipeline, const std::unordered_map<String, SubqueryForSet> & subqueries_for_sets);
|
2019-10-17 11:56:05 +00:00
|
|
|
void executeMergeSorted(Pipeline & pipeline, const SortDescription & sort_description, UInt64 limit);
|
2018-09-20 17:51:42 +00:00
|
|
|
|
2020-03-09 00:08:02 +00:00
|
|
|
void executeWhere(QueryPipeline & pipeline, const ExpressionActionsPtr & expression, bool remove_filter);
|
2019-03-26 18:28:37 +00:00
|
|
|
void executeAggregation(QueryPipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final);
|
|
|
|
void executeMergeAggregated(QueryPipeline & pipeline, bool overflow_row, bool final);
|
|
|
|
void executeTotalsAndHaving(QueryPipeline & pipeline, bool has_having, const ExpressionActionsPtr & expression, bool overflow_row, bool final);
|
|
|
|
void executeHaving(QueryPipeline & pipeline, const ExpressionActionsPtr & expression);
|
2020-03-18 00:57:00 +00:00
|
|
|
static void executeExpression(QueryPipeline & pipeline, const ExpressionActionsPtr & expression);
|
2019-11-15 14:03:42 +00:00
|
|
|
void executeOrder(QueryPipeline & pipeline, InputSortingInfoPtr sorting_info);
|
2019-08-23 11:20:53 +00:00
|
|
|
void executeWithFill(QueryPipeline & pipeline);
|
2019-03-26 18:28:37 +00:00
|
|
|
void executeMergeSorted(QueryPipeline & pipeline);
|
2020-03-13 13:44:08 +00:00
|
|
|
void executePreLimit(QueryPipeline & pipeline, bool do_not_skip_offset);
|
2019-03-26 18:28:37 +00:00
|
|
|
void executeLimitBy(QueryPipeline & pipeline);
|
|
|
|
void executeLimit(QueryPipeline & pipeline);
|
2020-03-18 00:57:00 +00:00
|
|
|
static void executeProjection(QueryPipeline & pipeline, const ExpressionActionsPtr & expression);
|
2019-03-26 18:28:37 +00:00
|
|
|
void executeDistinct(QueryPipeline & pipeline, bool before_order, Names columns);
|
|
|
|
void executeExtremes(QueryPipeline & pipeline);
|
2020-02-10 19:55:13 +00:00
|
|
|
void executeSubqueriesInSetsAndJoins(QueryPipeline & pipeline, const std::unordered_map<String, SubqueryForSet> & subqueries_for_sets);
|
2019-10-17 11:56:05 +00:00
|
|
|
void executeMergeSorted(QueryPipeline & pipeline, const SortDescription & sort_description, UInt64 limit);
|
2019-03-26 18:28:37 +00:00
|
|
|
|
2019-12-27 12:47:29 +00:00
|
|
|
String generateFilterActions(ExpressionActionsPtr & actions, const ASTPtr & row_policy_filter, const Names & prerequisite_columns = {}) const;
|
2019-12-05 11:42:13 +00:00
|
|
|
|
2019-08-19 18:10:40 +00:00
|
|
|
/// Add ConvertingBlockInputStream to specified header.
|
|
|
|
void unifyStreams(Pipeline & pipeline, Block header);
|
2018-11-06 11:44:45 +00:00
|
|
|
|
2018-09-20 17:51:42 +00:00
|
|
|
enum class Modificator
|
|
|
|
{
|
|
|
|
ROLLUP = 0,
|
|
|
|
CUBE = 1
|
|
|
|
};
|
2018-09-20 20:57:06 +00:00
|
|
|
|
2018-09-20 17:51:42 +00:00
|
|
|
void executeRollupOrCube(Pipeline & pipeline, Modificator modificator);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-03-26 18:28:37 +00:00
|
|
|
void executeRollupOrCube(QueryPipeline & pipeline, Modificator modificator);
|
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/** If there is a SETTINGS section in the SELECT query, then apply settings from it.
|
2017-04-01 07:20:54 +00:00
|
|
|
*
|
2017-06-02 21:37:28 +00:00
|
|
|
* Section SETTINGS - settings for a specific query.
|
|
|
|
* Normally, the settings can be passed in other ways, not inside the query.
|
|
|
|
* But the use of this section is justified if you need to set the settings for one subquery.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
void initSettings();
|
|
|
|
|
2019-11-11 01:11:32 +00:00
|
|
|
SelectQueryOptions options;
|
2017-04-01 07:20:54 +00:00
|
|
|
ASTPtr query_ptr;
|
2019-11-15 18:41:18 +00:00
|
|
|
std::shared_ptr<Context> context;
|
2018-11-08 15:43:14 +00:00
|
|
|
SyntaxAnalyzerResultPtr syntax_analyzer_result;
|
2019-08-14 19:30:30 +00:00
|
|
|
std::unique_ptr<SelectQueryExpressionAnalyzer> query_analyzer;
|
2019-07-19 10:14:27 +00:00
|
|
|
SelectQueryInfo query_info;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-10-03 11:58:52 +00:00
|
|
|
/// Is calculated in getSampleBlock. Is used later in readImpl.
|
2020-02-10 15:50:12 +00:00
|
|
|
ExpressionAnalysisResult analysis_result;
|
2019-10-03 11:58:52 +00:00
|
|
|
FilterInfoPtr filter_info;
|
|
|
|
|
2019-10-03 15:47:42 +00:00
|
|
|
QueryProcessingStage::Enum from_stage = QueryProcessingStage::FetchColumns;
|
|
|
|
|
2017-07-03 21:04:10 +00:00
|
|
|
/// How many streams we ask for storage to produce, and in how many threads we will do further processing.
|
|
|
|
size_t max_streams = 1;
|
|
|
|
|
2018-07-19 13:36:21 +00:00
|
|
|
/// List of columns to read to execute the query.
|
|
|
|
Names required_columns;
|
|
|
|
/// Structure of query source (table, subquery, etc).
|
|
|
|
Block source_header;
|
|
|
|
/// Structure of query result.
|
|
|
|
Block result_header;
|
|
|
|
|
|
|
|
/// The subquery interpreter, if the subquery
|
|
|
|
std::unique_ptr<InterpreterSelectWithUnionQuery> interpreter_subquery;
|
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/// Table from where to read data, if not subquery.
|
2017-04-01 07:20:54 +00:00
|
|
|
StoragePtr storage;
|
2019-12-30 18:20:43 +00:00
|
|
|
StorageID table_id = StorageID::createEmpty(); /// Will be initialized if storage is not nullptr
|
2019-03-07 18:04:47 +00:00
|
|
|
TableStructureReadLockHolder table_lock;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-02-21 03:26:06 +00:00
|
|
|
/// Used when we read from prepared input, not table or subquery.
|
|
|
|
BlockInputStreamPtr input;
|
2020-01-29 15:04:46 +00:00
|
|
|
std::optional<Pipe> input_pipe;
|
2018-02-21 03:26:06 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
Poco::Logger * log;
|
2011-08-28 05:13:24 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|