mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-11 17:02:25 +00:00
137 lines
5.2 KiB
C++
137 lines
5.2 KiB
C++
#pragma once
|
|
#include <Processors/QueryPlan/ITransformingStep.h>
|
|
#include <QueryPipeline/SizeLimits.h>
|
|
#include <Storages/SelectQueryInfo.h>
|
|
#include <Interpreters/Aggregator.h>
|
|
|
|
namespace DB
|
|
{
|
|
|
|
struct GroupingSetsParams
|
|
{
|
|
GroupingSetsParams() = default;
|
|
|
|
GroupingSetsParams(Names used_keys_, Names missing_keys_) : used_keys(std::move(used_keys_)), missing_keys(std::move(missing_keys_)) { }
|
|
|
|
Names used_keys;
|
|
Names missing_keys;
|
|
};
|
|
|
|
using GroupingSetsParamsList = std::vector<GroupingSetsParams>;
|
|
|
|
Block appendGroupingSetColumn(Block header);
|
|
Block generateOutputHeader(const Block & input_header, const Names & keys, bool use_nulls);
|
|
|
|
class AggregatingProjectionStep;
|
|
|
|
/// Aggregation. See AggregatingTransform.
|
|
class AggregatingStep : public ITransformingStep
|
|
{
|
|
public:
|
|
AggregatingStep(
|
|
const DataStream & input_stream_,
|
|
Aggregator::Params params_,
|
|
GroupingSetsParamsList grouping_sets_params_,
|
|
bool final_,
|
|
size_t max_block_size_,
|
|
size_t aggregation_in_order_max_block_bytes_,
|
|
size_t merge_threads_,
|
|
size_t temporary_data_merge_threads_,
|
|
bool storage_has_evenly_distributed_read_,
|
|
bool group_by_use_nulls_,
|
|
SortDescription sort_description_for_merging_,
|
|
SortDescription group_by_sort_description_,
|
|
bool should_produce_results_in_order_of_bucket_number_,
|
|
bool memory_bound_merging_of_aggregation_results_enabled_,
|
|
bool explicit_sorting_required_for_aggregation_in_order_);
|
|
|
|
static Block appendGroupingColumn(Block block, const Names & keys, bool has_grouping, bool use_nulls);
|
|
|
|
String getName() const override { return "Aggregating"; }
|
|
|
|
void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
|
|
|
|
void describeActions(JSONBuilder::JSONMap & map) const override;
|
|
|
|
void describeActions(FormatSettings &) const override;
|
|
void describePipeline(FormatSettings & settings) const override;
|
|
|
|
const Aggregator::Params & getParams() const { return params; }
|
|
|
|
const auto & getGroupingSetsParamsList() const { return grouping_sets_params; }
|
|
|
|
bool inOrder() const { return !sort_description_for_merging.empty(); }
|
|
bool explicitSortingRequired() const { return explicit_sorting_required_for_aggregation_in_order; }
|
|
bool isGroupingSets() const { return !grouping_sets_params.empty(); }
|
|
void applyOrder(SortDescription sort_description_for_merging_, SortDescription group_by_sort_description_);
|
|
bool memoryBoundMergingWillBeUsed() const;
|
|
void skipMerging() { skip_merging = true; }
|
|
|
|
bool canUseProjection() const;
|
|
/// When we apply aggregate projection (which is full), this step will only merge data.
|
|
/// Argument input_stream replaces current single input.
|
|
/// Probably we should replace this step to MergingAggregated later? (now, aggregation-in-order will not work)
|
|
void requestOnlyMergeForAggregateProjection(const DataStream & input_stream);
|
|
/// When we apply aggregate projection (which is partial), this step should be replaced to AggregatingProjection.
|
|
/// Argument input_stream would be the second input (from projection).
|
|
std::unique_ptr<AggregatingProjectionStep> convertToAggregatingProjection(const DataStream & input_stream) const;
|
|
|
|
private:
|
|
void updateOutputStream() override;
|
|
|
|
Aggregator::Params params;
|
|
GroupingSetsParamsList grouping_sets_params;
|
|
bool final;
|
|
size_t max_block_size;
|
|
size_t aggregation_in_order_max_block_bytes;
|
|
size_t merge_threads;
|
|
size_t temporary_data_merge_threads;
|
|
bool skip_merging = false; // if we aggregate partitioned data merging is not needed
|
|
|
|
bool storage_has_evenly_distributed_read;
|
|
bool group_by_use_nulls;
|
|
|
|
/// Both sort descriptions are needed for aggregate-in-order optimisation.
|
|
/// Both sort descriptions are subset of GROUP BY key columns (or monotonic functions over it).
|
|
/// Sort description for merging is a sort description for input and a prefix of group_by_sort_description.
|
|
/// group_by_sort_description contains all GROUP BY keys and is used for final merging of aggregated data.
|
|
SortDescription sort_description_for_merging;
|
|
SortDescription group_by_sort_description;
|
|
|
|
/// These settings are used to determine if we should resize pipeline to 1 at the end.
|
|
const bool should_produce_results_in_order_of_bucket_number;
|
|
bool memory_bound_merging_of_aggregation_results_enabled;
|
|
bool explicit_sorting_required_for_aggregation_in_order;
|
|
|
|
Processors aggregating_in_order;
|
|
Processors aggregating_sorted;
|
|
Processors finalizing;
|
|
|
|
Processors aggregating;
|
|
};
|
|
|
|
class AggregatingProjectionStep : public IQueryPlanStep
|
|
{
|
|
public:
|
|
AggregatingProjectionStep(
|
|
DataStreams input_streams_,
|
|
Aggregator::Params params_,
|
|
bool final_,
|
|
size_t merge_threads_,
|
|
size_t temporary_data_merge_threads_
|
|
);
|
|
|
|
String getName() const override { return "AggregatingProjection"; }
|
|
QueryPipelineBuilderPtr updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &) override;
|
|
|
|
private:
|
|
Aggregator::Params params;
|
|
bool final;
|
|
size_t merge_threads;
|
|
size_t temporary_data_merge_threads;
|
|
|
|
Processors aggregating;
|
|
};
|
|
|
|
}
|