2013-10-01 18:09:31 +00:00
|
|
|
#pragma once
|
|
|
|
|
2019-12-22 00:19:07 +00:00
|
|
|
#include <queue>
|
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Core/Row.h>
|
|
|
|
#include <Core/ColumnNumbers.h>
|
2018-09-01 04:25:03 +00:00
|
|
|
#include <Common/AlignedBuffer.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <DataStreams/MergingSortedBlockInputStream.h>
|
2017-10-09 05:56:22 +00:00
|
|
|
#include <AggregateFunctions/IAggregateFunction.h>
|
2017-11-28 02:14:50 +00:00
|
|
|
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
|
|
|
|
2013-10-01 18:09:31 +00:00
|
|
|
|
2020-03-20 03:32:47 +00:00
|
|
|
namespace Poco { class Logger; }
|
|
|
|
|
2013-10-01 18:09:31 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-01-11 21:46:36 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/** Merges several sorted streams into one.
|
|
|
|
* For each group of consecutive identical values of the primary key (the columns by which the data is sorted),
|
|
|
|
* collapses them into one row, summing all the numeric columns except the primary key.
|
|
|
|
* If in all numeric columns, except for the primary key, the result is zero, it deletes the row.
|
2013-10-01 18:09:31 +00:00
|
|
|
*/
|
|
|
|
class SummingSortedBlockInputStream : public MergingSortedBlockInputStream
|
|
|
|
{
|
|
|
|
public:
|
2018-04-07 01:46:50 +00:00
|
|
|
SummingSortedBlockInputStream(
|
|
|
|
const BlockInputStreams & inputs_,
|
2017-04-01 07:20:54 +00:00
|
|
|
const SortDescription & description_,
|
2017-05-13 22:19:04 +00:00
|
|
|
/// List of columns to be summed. If empty, all numeric columns that are not in the description are taken.
|
2017-04-01 07:20:54 +00:00
|
|
|
const Names & column_names_to_sum_,
|
2019-02-19 01:47:27 +00:00
|
|
|
size_t max_block_size_);
|
2013-10-01 18:09:31 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
String getName() const override { return "SummingSorted"; }
|
2013-10-01 18:09:31 +00:00
|
|
|
|
2020-03-18 00:57:00 +00:00
|
|
|
/// Stores numbers of key-columns and value-columns.
|
|
|
|
struct MapDescription
|
|
|
|
{
|
|
|
|
std::vector<size_t> key_col_nums;
|
|
|
|
std::vector<size_t> val_col_nums;
|
|
|
|
};
|
|
|
|
|
2013-10-01 18:09:31 +00:00
|
|
|
protected:
|
2017-05-13 22:19:04 +00:00
|
|
|
/// Can return 1 more records than max_block_size.
|
2017-04-01 07:20:54 +00:00
|
|
|
Block readImpl() override;
|
2013-10-01 18:09:31 +00:00
|
|
|
|
|
|
|
private:
|
2020-03-20 03:32:47 +00:00
|
|
|
Poco::Logger * log;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// Read up to the end.
|
2017-04-01 07:20:54 +00:00
|
|
|
bool finished = false;
|
|
|
|
|
2017-11-16 20:37:45 +00:00
|
|
|
/// Columns with which values should be summed.
|
2017-10-09 05:56:22 +00:00
|
|
|
ColumnNumbers column_numbers_not_to_aggregate;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/** A table can have nested tables that are treated in a special way.
|
|
|
|
* If the name of the nested table ends in `Map` and it contains at least two columns,
|
|
|
|
* satisfying the following criteria:
|
|
|
|
* - the first column, as well as all columns whose names end with `ID`, `Key` or `Type` - numeric ((U)IntN, Date, DateTime);
|
|
|
|
* (a tuple of such columns will be called `keys`)
|
|
|
|
* - the remaining columns are arithmetic ((U)IntN, Float32/64), called (`values`...).
|
|
|
|
* This nested table is treated as a mapping (keys...) => (values...) and when merge
|
|
|
|
* its rows, the merge of the elements of two sets by (keys...) with summing of corresponding (values...).
|
2017-04-01 07:20:54 +00:00
|
|
|
*
|
2017-05-13 22:19:04 +00:00
|
|
|
* Example:
|
2017-04-01 07:20:54 +00:00
|
|
|
* [(1, 100)] + [(2, 150)] -> [(1, 100), (2, 150)]
|
|
|
|
* [(1, 100)] + [(1, 150)] -> [(1, 250)]
|
|
|
|
* [(1, 100)] + [(1, 150), (2, 150)] -> [(1, 250), (2, 150)]
|
|
|
|
* [(1, 100), (2, 150)] + [(1, -100)] -> [(2, 150)]
|
|
|
|
*
|
2017-05-13 22:19:04 +00:00
|
|
|
* This very unusual functionality is made exclusively for the banner system,
|
|
|
|
* is not supposed for use by anyone else,
|
|
|
|
* and can be deleted at any time.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
|
2017-10-12 21:54:49 +00:00
|
|
|
/// Stores aggregation function, state, and columns to be used as function arguments
|
2017-10-09 05:56:22 +00:00
|
|
|
struct AggregateDescription
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2019-01-25 19:36:18 +00:00
|
|
|
/// An aggregate function 'sumWithOverflow' or 'sumMapWithOverflow' for summing.
|
2017-10-09 05:56:22 +00:00
|
|
|
AggregateFunctionPtr function;
|
2017-11-16 19:03:32 +00:00
|
|
|
IAggregateFunction::AddFunc add_function = nullptr;
|
2017-10-09 05:56:22 +00:00
|
|
|
std::vector<size_t> column_numbers;
|
2017-12-15 00:01:59 +00:00
|
|
|
MutableColumnPtr merged_column;
|
2018-09-01 04:25:03 +00:00
|
|
|
AlignedBuffer state;
|
2017-10-09 05:56:22 +00:00
|
|
|
bool created = false;
|
2018-07-04 20:46:17 +00:00
|
|
|
|
|
|
|
/// In case when column has type AggregateFunction: use the aggregate function from itself instead of 'function' above.
|
|
|
|
bool is_agg_func_type = false;
|
2017-10-12 21:54:49 +00:00
|
|
|
|
2017-11-28 02:14:50 +00:00
|
|
|
void init(const char * function_name, const DataTypes & argument_types)
|
|
|
|
{
|
|
|
|
function = AggregateFunctionFactory::instance().get(function_name, argument_types);
|
2019-11-11 08:36:19 +00:00
|
|
|
add_function = function->getAddressOfAddFunction();
|
2018-09-01 04:25:03 +00:00
|
|
|
state.reset(function->sizeOfData(), function->alignOfData());
|
2017-11-28 02:14:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void createState()
|
|
|
|
{
|
|
|
|
if (created)
|
|
|
|
return;
|
2018-07-04 20:46:17 +00:00
|
|
|
if (is_agg_func_type)
|
2018-06-26 10:28:42 +00:00
|
|
|
merged_column->insertDefault();
|
|
|
|
else
|
|
|
|
function->create(state.data());
|
2017-11-28 02:14:50 +00:00
|
|
|
created = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void destroyState()
|
|
|
|
{
|
|
|
|
if (!created)
|
|
|
|
return;
|
2018-07-04 20:46:17 +00:00
|
|
|
if (!is_agg_func_type)
|
2018-06-26 10:28:42 +00:00
|
|
|
function->destroy(state.data());
|
2017-11-28 02:14:50 +00:00
|
|
|
created = false;
|
|
|
|
}
|
|
|
|
|
2017-10-12 21:54:49 +00:00
|
|
|
/// Explicitly destroy aggregation state if the stream is terminated
|
|
|
|
~AggregateDescription()
|
|
|
|
{
|
2017-11-28 02:14:50 +00:00
|
|
|
destroyState();
|
2017-10-12 21:54:49 +00:00
|
|
|
}
|
2017-11-28 02:14:50 +00:00
|
|
|
|
|
|
|
AggregateDescription() = default;
|
|
|
|
AggregateDescription(AggregateDescription &&) = default;
|
|
|
|
AggregateDescription(const AggregateDescription &) = delete;
|
2017-04-01 07:20:54 +00:00
|
|
|
};
|
|
|
|
|
2017-10-09 05:56:22 +00:00
|
|
|
std::vector<AggregateDescription> columns_to_aggregate;
|
2017-10-12 22:09:06 +00:00
|
|
|
std::vector<MapDescription> maps_to_sum;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-04-19 13:38:25 +00:00
|
|
|
SharedBlockRowRef current_key; /// The current primary key.
|
|
|
|
SharedBlockRowRef next_key; /// The primary key of the next row.
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
Row current_row;
|
2017-11-16 19:03:32 +00:00
|
|
|
bool current_row_is_zero = true; /// Are all summed columns zero (or empty)? It is updated incrementally.
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-11-16 19:03:32 +00:00
|
|
|
size_t merged_rows = 0; /// Number of rows merged into current result block
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/** We support two different cursors - with Collation and without.
|
|
|
|
* Templates are used instead of polymorphic SortCursor and calls to virtual functions.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
2019-12-22 00:19:07 +00:00
|
|
|
void merge(MutableColumns & merged_columns, SortingHeap<SortCursor> & queue);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-11-16 19:03:32 +00:00
|
|
|
/// Insert the summed row for the current group into the result and updates some of per-block flags if the row is not "zero".
|
2018-08-03 16:27:27 +00:00
|
|
|
void insertCurrentRowIfNeeded(MutableColumns & merged_columns);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-11-16 19:03:32 +00:00
|
|
|
// Add the row under the cursor to the `row`.
|
2017-12-15 01:34:30 +00:00
|
|
|
void addRow(SortCursor & cursor);
|
2013-10-01 18:09:31 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|