2019-03-25 15:21:09 +00:00
|
|
|
#include <Processors/Transforms/TotalsHavingTransform.h>
|
|
|
|
#include <Processors/Transforms/AggregatingTransform.h>
|
|
|
|
|
|
|
|
#include <Columns/ColumnAggregateFunction.h>
|
|
|
|
#include <Columns/FilterDescription.h>
|
|
|
|
|
|
|
|
#include <Common/typeid_cast.h>
|
|
|
|
#include <DataStreams/finalizeBlock.h>
|
|
|
|
#include <Interpreters/ExpressionActions.h>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2020-02-25 18:10:48 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int LOGICAL_ERROR;
|
2020-11-05 16:00:05 +00:00
|
|
|
extern const int ILLEGAL_COLUMN;
|
2020-02-25 18:10:48 +00:00
|
|
|
}
|
2019-03-25 15:21:09 +00:00
|
|
|
|
2019-04-08 13:02:19 +00:00
|
|
|
void finalizeChunk(Chunk & chunk)
|
2019-03-25 15:21:09 +00:00
|
|
|
{
|
|
|
|
auto num_rows = chunk.getNumRows();
|
|
|
|
auto columns = chunk.detachColumns();
|
|
|
|
|
|
|
|
for (auto & column : columns)
|
2020-05-14 07:59:14 +00:00
|
|
|
if (typeid_cast<const ColumnAggregateFunction *>(column.get()))
|
2020-07-08 03:16:01 +00:00
|
|
|
column = ColumnAggregateFunction::convertToValues(IColumn::mutate(std::move(column)));
|
2019-03-25 15:21:09 +00:00
|
|
|
|
2019-04-08 13:02:19 +00:00
|
|
|
chunk.setColumns(std::move(columns), num_rows);
|
2019-03-25 15:21:09 +00:00
|
|
|
}
|
|
|
|
|
2020-11-11 17:17:26 +00:00
|
|
|
Block TotalsHavingTransform::transformHeader(Block block, const ExpressionActionsPtr & expression, bool final)
|
2019-03-25 15:21:09 +00:00
|
|
|
{
|
2019-04-08 16:48:41 +00:00
|
|
|
if (final)
|
|
|
|
finalizeBlock(block);
|
2019-04-08 12:24:49 +00:00
|
|
|
|
2020-11-11 17:17:26 +00:00
|
|
|
size_t num_rows = block.rows();
|
|
|
|
|
2019-04-08 12:22:27 +00:00
|
|
|
if (expression)
|
2020-11-11 17:17:26 +00:00
|
|
|
expression->execute(block, num_rows);
|
2019-04-08 12:22:27 +00:00
|
|
|
|
2019-03-25 15:21:09 +00:00
|
|
|
return block;
|
|
|
|
}
|
|
|
|
|
|
|
|
TotalsHavingTransform::TotalsHavingTransform(
|
|
|
|
const Block & header,
|
|
|
|
bool overflow_row_,
|
|
|
|
const ExpressionActionsPtr & expression_,
|
|
|
|
const std::string & filter_column_,
|
|
|
|
TotalsMode totals_mode_,
|
|
|
|
double auto_include_threshold_,
|
|
|
|
bool final_)
|
2020-11-11 17:17:26 +00:00
|
|
|
: ISimpleTransform(header, transformHeader(header, expression_, final_), true)
|
2019-03-25 15:21:09 +00:00
|
|
|
, overflow_row(overflow_row_)
|
|
|
|
, expression(expression_)
|
|
|
|
, filter_column_name(filter_column_)
|
|
|
|
, totals_mode(totals_mode_)
|
|
|
|
, auto_include_threshold(auto_include_threshold_)
|
|
|
|
, final(final_)
|
|
|
|
{
|
2019-04-08 16:40:45 +00:00
|
|
|
if (!filter_column_name.empty())
|
|
|
|
filter_column_pos = outputs.front().getHeader().getPositionByName(filter_column_name);
|
2019-03-25 15:21:09 +00:00
|
|
|
|
2019-04-08 12:31:51 +00:00
|
|
|
finalized_header = getInputPort().getHeader();
|
|
|
|
finalizeBlock(finalized_header);
|
|
|
|
|
2019-04-18 12:43:13 +00:00
|
|
|
/// Port for Totals.
|
2019-04-30 17:37:30 +00:00
|
|
|
if (expression)
|
|
|
|
{
|
|
|
|
auto totals_header = finalized_header;
|
2020-11-03 11:28:28 +00:00
|
|
|
size_t num_rows = totals_header.rows();
|
|
|
|
expression->execute(totals_header, num_rows);
|
2019-04-30 17:37:30 +00:00
|
|
|
outputs.emplace_back(totals_header, this);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
outputs.emplace_back(finalized_header, this);
|
2019-04-18 12:43:13 +00:00
|
|
|
|
2019-03-25 15:21:09 +00:00
|
|
|
/// Initialize current totals with initial state.
|
|
|
|
current_totals.reserve(header.columns());
|
|
|
|
for (const auto & elem : header)
|
|
|
|
{
|
2019-07-16 18:29:17 +00:00
|
|
|
MutableColumnPtr new_column = elem.type->createColumn();
|
|
|
|
elem.type->insertDefaultInto(*new_column);
|
|
|
|
current_totals.emplace_back(std::move(new_column));
|
2019-03-25 15:21:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
IProcessor::Status TotalsHavingTransform::prepare()
|
|
|
|
{
|
|
|
|
if (!finished_transform)
|
|
|
|
{
|
|
|
|
auto status = ISimpleTransform::prepare();
|
|
|
|
|
|
|
|
if (status != Status::Finished)
|
|
|
|
return status;
|
|
|
|
|
|
|
|
finished_transform = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto & totals_output = getTotalsPort();
|
|
|
|
|
|
|
|
/// Check can output.
|
|
|
|
if (totals_output.isFinished())
|
|
|
|
return Status::Finished;
|
|
|
|
|
|
|
|
if (!totals_output.canPush())
|
|
|
|
return Status::PortFull;
|
|
|
|
|
|
|
|
if (!totals)
|
|
|
|
return Status::Ready;
|
|
|
|
|
|
|
|
totals_output.push(std::move(totals));
|
|
|
|
totals_output.finish();
|
|
|
|
return Status::Finished;
|
|
|
|
}
|
|
|
|
|
|
|
|
void TotalsHavingTransform::work()
|
|
|
|
{
|
|
|
|
if (finished_transform)
|
|
|
|
prepareTotals();
|
|
|
|
else
|
|
|
|
ISimpleTransform::work();
|
|
|
|
}
|
|
|
|
|
|
|
|
void TotalsHavingTransform::transform(Chunk & chunk)
|
|
|
|
{
|
|
|
|
/// Block with values not included in `max_rows_to_group_by`. We'll postpone it.
|
2019-04-16 14:37:19 +00:00
|
|
|
if (overflow_row)
|
2019-03-25 15:21:09 +00:00
|
|
|
{
|
2020-04-22 07:03:43 +00:00
|
|
|
const auto & info = chunk.getChunkInfo();
|
2019-04-16 14:37:19 +00:00
|
|
|
if (!info)
|
|
|
|
throw Exception("Chunk info was not set for chunk in TotalsHavingTransform.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
2020-04-22 07:03:43 +00:00
|
|
|
const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
|
2019-04-16 14:37:19 +00:00
|
|
|
if (!agg_info)
|
|
|
|
throw Exception("Chunk should have AggregatedChunkInfo in TotalsHavingTransform.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
if (agg_info->is_overflows)
|
|
|
|
{
|
|
|
|
overflow_aggregates = std::move(chunk);
|
|
|
|
return;
|
|
|
|
}
|
2019-03-25 15:21:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!chunk)
|
|
|
|
return;
|
|
|
|
|
2019-06-18 08:25:27 +00:00
|
|
|
auto finalized = chunk.clone();
|
2019-03-25 15:21:09 +00:00
|
|
|
if (final)
|
|
|
|
finalizeChunk(finalized);
|
|
|
|
|
|
|
|
total_keys += finalized.getNumRows();
|
|
|
|
|
|
|
|
if (filter_column_name.empty())
|
|
|
|
{
|
|
|
|
addToTotals(chunk, nullptr);
|
2019-04-08 17:02:18 +00:00
|
|
|
chunk = std::move(finalized);
|
2019-03-25 15:21:09 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/// Compute the expression in HAVING.
|
2020-04-22 07:03:43 +00:00
|
|
|
const auto & cur_header = final ? finalized_header : getInputPort().getHeader();
|
2020-11-03 11:28:28 +00:00
|
|
|
size_t num_rows = finalized.getNumRows();
|
2019-04-08 16:59:28 +00:00
|
|
|
auto finalized_block = cur_header.cloneWithColumns(finalized.detachColumns());
|
2020-11-05 16:00:05 +00:00
|
|
|
|
2020-11-06 12:04:20 +00:00
|
|
|
for (const auto & action : expression->getActions())
|
2020-11-05 16:00:05 +00:00
|
|
|
{
|
2020-11-10 14:54:59 +00:00
|
|
|
if (action.node->type == ActionsDAG::ActionType::ARRAY_JOIN)
|
2020-11-05 16:00:05 +00:00
|
|
|
throw Exception("Having clause cannot contain arrayJoin", ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
}
|
|
|
|
|
2020-11-03 11:28:28 +00:00
|
|
|
expression->execute(finalized_block, num_rows);
|
2019-03-25 15:21:09 +00:00
|
|
|
auto columns = finalized_block.getColumns();
|
|
|
|
|
2019-04-08 13:13:36 +00:00
|
|
|
ColumnPtr filter_column_ptr = columns[filter_column_pos];
|
2019-03-25 15:21:09 +00:00
|
|
|
ConstantFilterDescription const_filter_description(*filter_column_ptr);
|
|
|
|
|
|
|
|
if (const_filter_description.always_true)
|
|
|
|
{
|
|
|
|
addToTotals(chunk, nullptr);
|
2019-08-21 14:51:32 +00:00
|
|
|
chunk.setColumns(std::move(columns), num_rows);
|
2019-03-25 15:21:09 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (const_filter_description.always_false)
|
|
|
|
{
|
|
|
|
if (totals_mode == TotalsMode::BEFORE_HAVING)
|
|
|
|
addToTotals(chunk, nullptr);
|
|
|
|
|
|
|
|
chunk.clear();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
FilterDescription filter_description(*filter_column_ptr);
|
|
|
|
|
|
|
|
/// Add values to `totals` (if it was not already done).
|
|
|
|
if (totals_mode == TotalsMode::BEFORE_HAVING)
|
|
|
|
addToTotals(chunk, nullptr);
|
|
|
|
else
|
|
|
|
addToTotals(chunk, filter_description.data);
|
|
|
|
|
|
|
|
/// Filter the block by expression in HAVING.
|
|
|
|
for (auto & column : columns)
|
|
|
|
{
|
|
|
|
column = column->filter(*filter_description.data, -1);
|
|
|
|
if (column->empty())
|
|
|
|
{
|
|
|
|
chunk.clear();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-03 11:28:28 +00:00
|
|
|
num_rows = columns.front()->size();
|
2019-03-25 15:21:09 +00:00
|
|
|
chunk.setColumns(std::move(columns), num_rows);
|
|
|
|
}
|
|
|
|
|
|
|
|
passed_keys += chunk.getNumRows();
|
|
|
|
}
|
|
|
|
|
|
|
|
void TotalsHavingTransform::addToTotals(const Chunk & chunk, const IColumn::Filter * filter)
|
|
|
|
{
|
|
|
|
auto num_columns = chunk.getNumColumns();
|
|
|
|
for (size_t col = 0; col < num_columns; ++col)
|
|
|
|
{
|
|
|
|
const auto & current = chunk.getColumns()[col];
|
|
|
|
|
|
|
|
if (const auto * column = typeid_cast<const ColumnAggregateFunction *>(current.get()))
|
|
|
|
{
|
2019-07-16 18:29:17 +00:00
|
|
|
auto & totals_column = typeid_cast<ColumnAggregateFunction &>(*current_totals[col]);
|
|
|
|
assert(totals_column.size() == 1);
|
2019-03-25 15:21:09 +00:00
|
|
|
|
2019-07-16 18:29:17 +00:00
|
|
|
/// Accumulate all aggregate states from a column of a source chunk into
|
|
|
|
/// the corresponding totals column.
|
2019-03-25 15:21:09 +00:00
|
|
|
const ColumnAggregateFunction::Container & vec = column->getData();
|
|
|
|
size_t size = vec.size();
|
|
|
|
|
2020-11-05 16:00:05 +00:00
|
|
|
if (filter && filter->size() != size)
|
|
|
|
throw Exception("Filter has size which differs from column size", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
2019-03-25 15:21:09 +00:00
|
|
|
if (filter)
|
|
|
|
{
|
|
|
|
for (size_t row = 0; row < size; ++row)
|
|
|
|
if ((*filter)[row])
|
2019-07-16 18:29:17 +00:00
|
|
|
totals_column.insertMergeFrom(vec[row]);
|
2019-03-25 15:21:09 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (size_t row = 0; row < size; ++row)
|
2019-07-16 18:29:17 +00:00
|
|
|
totals_column.insertMergeFrom(vec[row]);
|
2019-03-25 15:21:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void TotalsHavingTransform::prepareTotals()
|
|
|
|
{
|
|
|
|
/// If totals_mode == AFTER_HAVING_AUTO, you need to decide whether to add aggregates to TOTALS for strings,
|
|
|
|
/// not passed max_rows_to_group_by.
|
|
|
|
if (overflow_aggregates)
|
|
|
|
{
|
|
|
|
if (totals_mode == TotalsMode::BEFORE_HAVING
|
|
|
|
|| totals_mode == TotalsMode::AFTER_HAVING_INCLUSIVE
|
|
|
|
|| (totals_mode == TotalsMode::AFTER_HAVING_AUTO
|
|
|
|
&& static_cast<double>(passed_keys) / total_keys >= auto_include_threshold))
|
|
|
|
addToTotals(overflow_aggregates, nullptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
totals = Chunk(std::move(current_totals), 1);
|
|
|
|
finalizeChunk(totals);
|
2019-04-26 15:12:39 +00:00
|
|
|
|
2019-04-30 17:37:30 +00:00
|
|
|
if (expression)
|
|
|
|
{
|
2020-11-03 11:28:28 +00:00
|
|
|
size_t num_rows = totals.getNumRows();
|
2019-04-30 17:37:30 +00:00
|
|
|
auto block = finalized_header.cloneWithColumns(totals.detachColumns());
|
2020-11-03 11:28:28 +00:00
|
|
|
expression->execute(block, num_rows);
|
2020-09-04 12:34:36 +00:00
|
|
|
/// Note: after expression totals may have several rows if `arrayJoin` was used in expression.
|
2020-11-03 11:28:28 +00:00
|
|
|
totals = Chunk(block.getColumns(), num_rows);
|
2019-04-30 17:37:30 +00:00
|
|
|
}
|
2019-03-25 15:21:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|