ClickHouse/src/Interpreters/SquashingTransform.cpp
Azat Khuzhin 6d4af3bac1 Move SquashingTransform to Interpreters (to fix split build)
clickhouse_common_io requires clickhouse_core:

    ld.lld: error: undefined symbol: DB::blocksHaveEqualStructure(DB::Block const&, DB::Block const&)
    >>> referenced by SquashingTransform.cpp:92 (/src/ch/clickhouse/src/Common/SquashingTransform.cpp:92)
    >>>               src/CMakeFiles/clickhouse_common_io.dir/Common/SquashingTransform.cpp.o:(void DB::SquashingTransform::append<DB::Block&&>(DB::Block&&))
    >>> referenced by SquashingTransform.cpp:92 (/src/ch/clickhouse/src/Common/SquashingTransform.cpp:92)
    >>>               src/CMakeFiles/clickhouse_common_io.dir/Common/SquashingTransform.cpp.o:(void DB::SquashingTransform::append<DB::Block const&>(DB::Block const&))

while clickhouse_core requires clickhouse_common_io:

    "clickhouse_core" of type SHARED_LIBRARY
      depends on "roaring" (weak)
      depends on "clickhouse_common_io" (weak)
      depends on "clickhouse_common_config" (weak)
      depends on "clickhouse_common_zookeeper" (weak)
      depends on "clickhouse_dictionaries_embedded" (weak)
      depends on "clickhouse_parsers" (weak)

Follow-up for: #30247 (cc @KochetovNicolai)
2021-10-18 10:28:36 +03:00

137 lines
3.6 KiB
C++

#include <Interpreters/SquashingTransform.h>
namespace DB
{
namespace ErrorCodes
{
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
}
SquashingTransform::SquashingTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_, bool reserve_memory_)
: min_block_size_rows(min_block_size_rows_)
, min_block_size_bytes(min_block_size_bytes_)
, reserve_memory(reserve_memory_)
{
}
Block SquashingTransform::add(Block && input_block)
{
return addImpl<Block &&>(std::move(input_block));
}
Block SquashingTransform::add(const Block & input_block)
{
return addImpl<const Block &>(input_block);
}
/*
* To minimize copying, accept two types of argument: const reference for output
* stream, and rvalue reference for input stream, and decide whether to copy
* inside this function. This allows us not to copy Block unless we absolutely
* have to.
*/
template <typename ReferenceType>
Block SquashingTransform::addImpl(ReferenceType input_block)
{
/// End of input stream.
if (!input_block)
{
Block to_return;
std::swap(to_return, accumulated_block);
return to_return;
}
/// Just read block is already enough.
if (isEnoughSize(input_block))
{
/// If no accumulated data, return just read block.
if (!accumulated_block)
{
return std::move(input_block);
}
/// Return accumulated data (maybe it has small size) and place new block to accumulated data.
Block to_return = std::move(input_block);
std::swap(to_return, accumulated_block);
return to_return;
}
/// Accumulated block is already enough.
if (isEnoughSize(accumulated_block))
{
/// Return accumulated data and place new block to accumulated data.
Block to_return = std::move(input_block);
std::swap(to_return, accumulated_block);
return to_return;
}
append<ReferenceType>(std::move(input_block));
if (isEnoughSize(accumulated_block))
{
Block to_return;
std::swap(to_return, accumulated_block);
return to_return;
}
/// Squashed block is not ready.
return {};
}
template <typename ReferenceType>
void SquashingTransform::append(ReferenceType input_block)
{
if (!accumulated_block)
{
accumulated_block = std::move(input_block);
return;
}
assert(blocksHaveEqualStructure(input_block, accumulated_block));
for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i)
{
const auto source_column = input_block.getByPosition(i).column;
auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column));
if (reserve_memory)
{
mutable_column->reserve(min_block_size_bytes);
}
mutable_column->insertRangeFrom(*source_column, 0, source_column->size());
accumulated_block.getByPosition(i).column = std::move(mutable_column);
}
}
bool SquashingTransform::isEnoughSize(const Block & block)
{
size_t rows = 0;
size_t bytes = 0;
for (const auto & [column, type, name] : block)
{
if (!rows)
rows = column->size();
else if (rows != column->size())
throw Exception("Sizes of columns doesn't match", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
bytes += column->byteSize();
}
return isEnoughSize(rows, bytes);
}
bool SquashingTransform::isEnoughSize(size_t rows, size_t bytes) const
{
return (!min_block_size_rows && !min_block_size_bytes)
|| (min_block_size_rows && rows >= min_block_size_rows)
|| (min_block_size_bytes && bytes >= min_block_size_bytes);
}
}