ClickHouse/src/DataStreams/ColumnGathererStream.cpp

115 lines
3.8 KiB
C++
Raw Normal View History

#include <DataStreams/ColumnGathererStream.h>
#include <common/logger_useful.h>
2017-07-13 20:58:19 +00:00
#include <Common/typeid_cast.h>
2020-05-23 18:22:40 +00:00
#include <Common/formatReadable.h>
#include <IO/WriteHelpers.h>
#include <iomanip>
namespace DB
{
namespace ErrorCodes
{
extern const int INCOMPATIBLE_COLUMNS;
extern const int INCORRECT_NUMBER_OF_COLUMNS;
extern const int EMPTY_DATA_PASSED;
extern const int RECEIVED_EMPTY_DATA;
}
ColumnGathererStream::ColumnGathererStream(
const String & column_name_, const BlockInputStreams & source_streams, ReadBuffer & row_sources_buf_,
size_t block_preferred_size_)
: column_name(column_name_), sources(source_streams.size()), row_sources_buf(row_sources_buf_)
2020-05-30 21:57:37 +00:00
, block_preferred_size(block_preferred_size_), log(&Poco::Logger::get("ColumnGathererStream"))
{
if (source_streams.empty())
throw Exception("There are no streams to gather", ErrorCodes::EMPTY_DATA_PASSED);
children.assign(source_streams.begin(), source_streams.end());
2017-12-27 21:45:05 +00:00
for (size_t i = 0; i < children.size(); ++i)
{
const Block & header = children[i]->getHeader();
/// Sometimes MergeTreeReader injects additional column with partitioning key
if (header.columns() > 2)
throw Exception(
"Block should have 1 or 2 columns, but contains " + toString(header.columns()),
ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS);
if (i == 0)
{
column.name = column_name;
column.type = header.getByName(column_name).type;
column.column = column.type->createColumn();
}
else if (header.getByName(column_name).column->getName() != column.column->getName())
throw Exception("Column types don't match", ErrorCodes::INCOMPATIBLE_COLUMNS);
}
}
Block ColumnGathererStream::readImpl()
{
/// Special case: single source and there are no skipped rows
if (children.size() == 1 && row_sources_buf.eof())
return children[0]->read();
if (!source_to_fully_copy && row_sources_buf.eof())
return Block();
MutableColumnPtr output_column = column.column->cloneEmpty();
2019-03-28 16:28:54 +00:00
output_block = Block{column.cloneEmpty()};
2021-01-11 12:03:00 +00:00
/// Surprisingly this call may directly change output_block, bypassing
/// output_column. See ColumnGathererStream::gather.
output_column->gather(*this);
2019-03-28 16:48:14 +00:00
if (!output_column->empty())
output_block.getByPosition(0).column = std::move(output_column);
2021-01-11 12:03:00 +00:00
return output_block;
}
void ColumnGathererStream::fetchNewBlock(Source & source, size_t source_num)
{
try
{
source.block = children[source_num]->read();
source.update(column_name);
}
catch (Exception & e)
{
2018-02-18 03:46:18 +00:00
e.addMessage("Cannot fetch required block. Stream " + children[source_num]->getName() + ", part " + toString(source_num));
throw;
}
if (0 == source.size)
{
2018-02-18 03:46:18 +00:00
throw Exception("Fetched block is empty. Stream " + children[source_num]->getName() + ", part " + toString(source_num),
ErrorCodes::RECEIVED_EMPTY_DATA);
}
}
void ColumnGathererStream::readSuffixImpl()
{
const BlockStreamProfileInfo & profile_info = getProfileInfo();
/// Don't print info for small parts (< 10M rows)
if (profile_info.rows < 10000000)
return;
double seconds = profile_info.total_stopwatch.elapsedSeconds();
2020-05-23 18:22:40 +00:00
if (!seconds)
2020-05-23 22:24:01 +00:00
LOG_DEBUG(log, "Gathered column {} ({} bytes/elem.) in 0 sec.",
2020-05-23 18:22:40 +00:00
column_name, static_cast<double>(profile_info.bytes) / profile_info.rows);
else
2020-05-23 22:24:01 +00:00
LOG_DEBUG(log, "Gathered column {} ({} bytes/elem.) in {} sec., {} rows/sec., {}/sec.",
2020-05-23 18:22:40 +00:00
column_name, static_cast<double>(profile_info.bytes) / profile_info.rows, seconds,
2020-05-30 21:35:52 +00:00
profile_info.rows / seconds, ReadableSize(profile_info.bytes / seconds));
}
}