ClickHouse/src/Interpreters/MutationsInterpreter.h

118 lines
4.3 KiB
C++
Raw Normal View History

#pragma once
#include <DataStreams/IBlockInputStream.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/Context.h>
#include <Storages/IStorage_fwd.h>
#include <Storages/MutationCommands.h>
namespace DB
{
class Context;
2020-09-15 17:13:13 +00:00
class QueryPlan;
class QueryPipeline;
2020-09-15 17:13:13 +00:00
using QueryPipelinePtr = std::unique_ptr<QueryPipeline>;
/// Return false if the data isn't going to be changed by mutations.
2020-06-17 11:52:19 +00:00
bool isStorageTouchedByMutations(
StoragePtr storage, const StorageMetadataPtr & metadata_snapshot, const std::vector<MutationCommand> & commands, Context context_copy);
2019-04-10 17:44:39 +00:00
/// Create an input stream that will read data from storage and apply mutation commands (UPDATEs, DELETEs, MATERIALIZEs)
/// to this data.
class MutationsInterpreter
{
public:
/// Storage to mutate, array of mutations commands and context. If you really want to execute mutation
/// use can_execute = true, in other cases (validation, amount of commands) it can be false
2020-06-17 11:52:19 +00:00
MutationsInterpreter(
StoragePtr storage_,
const StorageMetadataPtr & metadata_snapshot_,
MutationCommands commands_,
const Context & context_,
bool can_execute_);
2020-06-18 16:10:47 +00:00
void validate();
2019-08-05 08:36:41 +00:00
size_t evaluateCommandsSize();
2019-08-05 18:06:05 +00:00
/// The resulting stream will return blocks containing only changed columns and columns, that we need to recalculate indices.
2020-06-18 16:10:47 +00:00
BlockInputStreamPtr execute();
2019-08-05 18:06:05 +00:00
/// Only changed columns.
const Block & getUpdatedHeader() const;
2020-07-27 09:42:37 +00:00
/// Latest mutation stage affects all columns in storage
2020-07-26 14:21:57 +00:00
bool isAffectingAllColumns() const;
private:
ASTPtr prepare(bool dry_run);
2019-08-05 18:06:05 +00:00
struct Stage;
ASTPtr prepareInterpreterSelectQuery(std::vector<Stage> &prepared_stages, bool dry_run);
2020-09-15 17:13:13 +00:00
QueryPipelinePtr addStreamsForLaterStages(const std::vector<Stage> & prepared_stages, QueryPlan & plan) const;
std::optional<SortDescription> getStorageSortDescriptionIfPossible(const Block & header) const;
StoragePtr storage;
StorageMetadataPtr metadata_snapshot;
MutationCommands commands;
2020-03-27 15:10:27 +00:00
Context context;
bool can_execute;
ASTPtr mutation_ast;
/// We have to store interpreter because it use own copy of context
/// and some streams from execute method may use it.
std::unique_ptr<InterpreterSelectQuery> select_interpreter;
/// A sequence of mutation commands is executed as a sequence of stages. Each stage consists of several
/// filters, followed by updating values of some columns. Commands can reuse expressions calculated by the
/// previous commands in the same stage, but at the end of each stage intermediate columns are thrown away
/// (they may contain wrong values because the column values have been updated).
///
/// If an UPDATE command changes some columns that some MATERIALIZED columns depend on, a stage to
/// recalculate these columns is added.
///
/// Each stage has output_columns that contain columns that are changed at the end of that stage
/// plus columns needed for the next mutations.
///
/// First stage is special: it can contain only filters and is executed using InterpreterSelectQuery
/// to take advantage of table indexes (if there are any). It's necessary because all mutations have
/// `WHERE clause` part.
struct Stage
{
2019-08-03 11:02:40 +00:00
Stage(const Context & context_) : expressions_chain(context_) {}
2018-09-07 19:14:05 +00:00
ASTs filters;
std::unordered_map<String, ASTPtr> column_to_updated;
2020-07-26 14:21:57 +00:00
/// Contains columns that are changed by this stage, columns changed by
/// the previous stages and also columns needed by the next stages.
NameSet output_columns;
std::unique_ptr<ExpressionAnalyzer> analyzer;
/// A chain of actions needed to execute this stage.
/// First steps calculate filter columns for DELETEs (in the same order as in `filter_column_names`),
/// then there is (possibly) an UPDATE step, and finally a projection step.
ExpressionActionsChain expressions_chain;
Names filter_column_names;
2020-07-26 14:21:57 +00:00
2020-07-26 14:27:31 +00:00
/// Check that stage affects all storage columns
2020-07-26 14:21:57 +00:00
bool isAffectingAllColumns(const Names & storage_columns) const;
};
2019-08-05 18:06:05 +00:00
std::unique_ptr<Block> updated_header;
std::vector<Stage> stages;
bool is_prepared = false; /// Has the sequence of stages been prepared.
};
}