2017-04-01 09:19:00 +00:00
|
|
|
#include <DataStreams/SquashingTransform.h>
|
2016-07-06 21:48:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2020-02-25 18:10:48 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
|
|
|
|
}
|
2016-07-06 21:48:11 +00:00
|
|
|
|
2019-09-26 18:51:17 +00:00
|
|
|
SquashingTransform::SquashingTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_, bool reserve_memory_)
|
|
|
|
: min_block_size_rows(min_block_size_rows_)
|
|
|
|
, min_block_size_bytes(min_block_size_bytes_)
|
|
|
|
, reserve_memory(reserve_memory_)
|
2016-07-06 21:48:11 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-04-17 20:05:35 +00:00
|
|
|
Columns SquashingTransform::add(const Block & block)
|
2016-07-06 21:48:11 +00:00
|
|
|
{
|
2018-09-09 02:23:24 +00:00
|
|
|
/// End of input stream.
|
2020-04-17 20:05:35 +00:00
|
|
|
if (!block)
|
|
|
|
{
|
|
|
|
Columns to_return;
|
|
|
|
std::swap(to_return, accumulated_columns);
|
|
|
|
return to_return;
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-04-17 20:05:35 +00:00
|
|
|
auto block_columns = block.getColumns();
|
2019-09-20 12:12:32 +00:00
|
|
|
/// Just read block is already enough.
|
2020-04-17 20:05:35 +00:00
|
|
|
if (isEnoughSize(block_columns))
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
/// If no accumulated data, return just read block.
|
2018-09-08 19:23:48 +00:00
|
|
|
if (accumulated_columns.empty())
|
2020-04-17 20:05:35 +00:00
|
|
|
{
|
|
|
|
return block_columns;
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-09-09 02:23:24 +00:00
|
|
|
/// Return accumulated data (maybe it has small size) and place new block to accumulated data.
|
2020-04-17 20:05:35 +00:00
|
|
|
block_columns.swap(accumulated_columns);
|
|
|
|
return block_columns;
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Accumulated block is already enough.
|
2020-04-17 20:05:35 +00:00
|
|
|
if (isEnoughSize(accumulated_columns))
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
/// Return accumulated data and place new block to accumulated data.
|
2020-04-17 20:05:35 +00:00
|
|
|
std::swap(block_columns, accumulated_columns);
|
|
|
|
return block_columns;
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2020-04-17 20:05:35 +00:00
|
|
|
append(std::move(block_columns));
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-09-08 19:23:48 +00:00
|
|
|
if (isEnoughSize(accumulated_columns))
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2020-04-17 20:05:35 +00:00
|
|
|
Columns to_return;
|
|
|
|
std::swap(to_return, accumulated_columns);
|
|
|
|
return to_return;
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Squashed block is not ready.
|
2020-04-17 20:05:35 +00:00
|
|
|
return Columns();
|
2016-07-06 21:48:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-04-17 20:05:35 +00:00
|
|
|
void SquashingTransform::append(Columns && block_columns)
|
2016-07-06 21:48:11 +00:00
|
|
|
{
|
2018-09-08 19:23:48 +00:00
|
|
|
if (accumulated_columns.empty())
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2020-04-17 20:05:35 +00:00
|
|
|
std::swap(accumulated_columns, block_columns);
|
2017-04-01 07:20:54 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-04-17 20:05:35 +00:00
|
|
|
assert(block_columns.size() == accumulated_columns.size());
|
|
|
|
|
|
|
|
for (size_t i = 0, size = block_columns.size(); i < size; ++i)
|
2019-09-26 18:51:17 +00:00
|
|
|
{
|
2020-04-17 20:05:35 +00:00
|
|
|
auto mutable_column = std::move(*accumulated_columns[i]).mutate();
|
|
|
|
|
2019-09-26 18:51:17 +00:00
|
|
|
if (reserve_memory)
|
2020-04-17 20:05:35 +00:00
|
|
|
{
|
|
|
|
mutable_column->reserve(min_block_size_bytes);
|
|
|
|
}
|
|
|
|
mutable_column->insertRangeFrom(*block_columns[i], 0,
|
|
|
|
block_columns[i]->size());
|
|
|
|
|
|
|
|
accumulated_columns[i] = std::move(mutable_column);
|
2019-09-26 18:51:17 +00:00
|
|
|
}
|
2018-09-08 19:23:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-04-17 20:05:35 +00:00
|
|
|
bool SquashingTransform::isEnoughSize(const Columns & columns)
|
2018-09-08 19:23:48 +00:00
|
|
|
{
|
|
|
|
size_t rows = 0;
|
|
|
|
size_t bytes = 0;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-09-08 19:23:48 +00:00
|
|
|
for (const auto & column : columns)
|
2017-12-15 01:34:30 +00:00
|
|
|
{
|
2018-09-08 19:23:48 +00:00
|
|
|
if (!rows)
|
|
|
|
rows = column->size();
|
|
|
|
else if (rows != column->size())
|
|
|
|
throw Exception("Sizes of columns doesn't match", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
|
|
|
|
|
|
|
|
bytes += column->byteSize();
|
2017-12-15 01:34:30 +00:00
|
|
|
}
|
2018-09-08 19:23:48 +00:00
|
|
|
|
|
|
|
return isEnoughSize(rows, bytes);
|
2016-07-06 21:48:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool SquashingTransform::isEnoughSize(size_t rows, size_t bytes) const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
return (!min_block_size_rows && !min_block_size_bytes)
|
|
|
|
|| (min_block_size_rows && rows >= min_block_size_rows)
|
|
|
|
|| (min_block_size_bytes && bytes >= min_block_size_bytes);
|
2016-07-06 21:48:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|