2017-04-01 09:19:00 +00:00
|
|
|
#include <DataStreams/ColumnGathererStream.h>
|
2017-01-21 04:24:28 +00:00
|
|
|
#include <common/logger_useful.h>
|
2017-07-13 20:58:19 +00:00
|
|
|
#include <Common/typeid_cast.h>
|
2018-06-03 17:43:56 +00:00
|
|
|
#include <IO/WriteHelpers.h>
|
2016-12-13 16:19:57 +00:00
|
|
|
#include <iomanip>
|
2016-11-03 12:00:44 +00:00
|
|
|
|
2017-01-21 04:24:28 +00:00
|
|
|
|
2016-11-03 12:00:44 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
|
|
|
extern const int INCOMPATIBLE_COLUMNS;
|
|
|
|
extern const int INCORRECT_NUMBER_OF_COLUMNS;
|
2017-07-04 12:38:53 +00:00
|
|
|
extern const int NOT_FOUND_COLUMN_IN_BLOCK;
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const int EMPTY_DATA_PASSED;
|
|
|
|
extern const int RECEIVED_EMPTY_DATA;
|
2016-11-03 12:00:44 +00:00
|
|
|
}
|
|
|
|
|
2017-07-04 12:38:53 +00:00
|
|
|
ColumnGathererStream::ColumnGathererStream(
|
|
|
|
const String & column_name_, const BlockInputStreams & source_streams, ReadBuffer & row_sources_buf_,
|
|
|
|
size_t block_preferred_size_)
|
2018-08-23 12:04:30 +00:00
|
|
|
: column_name(column_name_), sources(source_streams.size()), row_sources_buf(row_sources_buf_)
|
2017-07-04 12:38:53 +00:00
|
|
|
, block_preferred_size(block_preferred_size_), log(&Logger::get("ColumnGathererStream"))
|
2016-11-03 12:00:44 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
if (source_streams.empty())
|
|
|
|
throw Exception("There are no streams to gather", ErrorCodes::EMPTY_DATA_PASSED);
|
2016-11-03 12:00:44 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
children.assign(source_streams.begin(), source_streams.end());
|
2016-11-03 12:00:44 +00:00
|
|
|
|
2017-12-27 21:45:05 +00:00
|
|
|
for (size_t i = 0; i < children.size(); ++i)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2018-08-23 12:04:30 +00:00
|
|
|
const Block & header = children[i]->getHeader();
|
2016-11-03 12:00:44 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Sometimes MergeTreeReader injects additional column with partitioning key
|
2018-08-23 12:04:30 +00:00
|
|
|
if (header.columns() > 2)
|
2017-07-04 12:38:53 +00:00
|
|
|
throw Exception(
|
2018-08-23 12:04:30 +00:00
|
|
|
"Block should have 1 or 2 columns, but contains " + toString(header.columns()),
|
2018-06-03 17:43:56 +00:00
|
|
|
ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS);
|
2016-11-03 12:00:44 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (i == 0)
|
|
|
|
{
|
2018-06-03 17:43:56 +00:00
|
|
|
column.name = column_name;
|
2018-08-23 12:04:30 +00:00
|
|
|
column.type = header.getByName(column_name).type;
|
2017-04-01 07:20:54 +00:00
|
|
|
column.column = column.type->createColumn();
|
|
|
|
}
|
2018-08-23 12:04:30 +00:00
|
|
|
else if (header.getByName(column_name).column->getName() != column.column->getName())
|
2017-04-01 07:20:54 +00:00
|
|
|
throw Exception("Column types don't match", ErrorCodes::INCOMPATIBLE_COLUMNS);
|
|
|
|
}
|
2016-11-03 12:00:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Block ColumnGathererStream::readImpl()
|
|
|
|
{
|
2017-04-01 21:43:25 +00:00
|
|
|
/// Special case: single source and there are no skipped rows
|
2017-07-04 12:38:53 +00:00
|
|
|
if (children.size() == 1 && row_sources_buf.eof())
|
2017-04-01 07:20:54 +00:00
|
|
|
return children[0]->read();
|
|
|
|
|
2017-07-14 19:06:40 +00:00
|
|
|
if (!source_to_fully_copy && row_sources_buf.eof())
|
2017-04-01 07:20:54 +00:00
|
|
|
return Block();
|
|
|
|
|
2019-03-25 01:43:54 +00:00
|
|
|
MutableColumnPtr output_column = column.column->cloneEmpty();
|
2019-03-28 16:28:54 +00:00
|
|
|
output_block = Block{column.cloneEmpty()};
|
2017-12-16 06:33:54 +00:00
|
|
|
output_column->gather(*this);
|
2019-03-28 16:48:14 +00:00
|
|
|
if (!output_column->empty())
|
|
|
|
output_block.getByPosition(0).column = std::move(output_column);
|
2017-12-17 05:21:04 +00:00
|
|
|
return output_block;
|
2016-11-03 12:00:44 +00:00
|
|
|
}
|
|
|
|
|
2016-12-13 16:19:57 +00:00
|
|
|
|
2016-12-20 14:58:23 +00:00
|
|
|
void ColumnGathererStream::fetchNewBlock(Source & source, size_t source_num)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
try
|
|
|
|
{
|
|
|
|
source.block = children[source_num]->read();
|
2018-06-03 17:43:56 +00:00
|
|
|
source.update(column_name);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
catch (Exception & e)
|
|
|
|
{
|
2018-02-18 03:46:18 +00:00
|
|
|
e.addMessage("Cannot fetch required block. Stream " + children[source_num]->getName() + ", part " + toString(source_num));
|
2017-04-01 07:20:54 +00:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (0 == source.size)
|
|
|
|
{
|
2018-02-18 03:46:18 +00:00
|
|
|
throw Exception("Fetched block is empty. Stream " + children[source_num]->getName() + ", part " + toString(source_num),
|
2017-04-01 07:20:54 +00:00
|
|
|
ErrorCodes::RECEIVED_EMPTY_DATA);
|
|
|
|
}
|
2016-12-20 14:58:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-12-13 16:19:57 +00:00
|
|
|
void ColumnGathererStream::readSuffixImpl()
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
const BlockStreamProfileInfo & profile_info = getProfileInfo();
|
2017-09-14 13:56:54 +00:00
|
|
|
|
|
|
|
/// Don't print info for small parts (< 10M rows)
|
|
|
|
if (profile_info.rows < 10000000)
|
|
|
|
return;
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
double seconds = profile_info.total_stopwatch.elapsedSeconds();
|
2018-09-10 18:23:11 +00:00
|
|
|
|
|
|
|
std::stringstream message;
|
|
|
|
message << std::fixed << std::setprecision(2)
|
2018-06-03 17:43:56 +00:00
|
|
|
<< "Gathered column " << column_name
|
2017-04-01 07:20:54 +00:00
|
|
|
<< " (" << static_cast<double>(profile_info.bytes) / profile_info.rows << " bytes/elem.)"
|
2018-09-10 18:23:11 +00:00
|
|
|
<< " in " << seconds << " sec.";
|
|
|
|
|
|
|
|
if (seconds)
|
|
|
|
message << ", " << profile_info.rows / seconds << " rows/sec., "
|
|
|
|
<< profile_info.bytes / 1048576.0 / seconds << " MiB/sec.";
|
|
|
|
|
|
|
|
LOG_TRACE(log, message.str());
|
2016-12-13 16:19:57 +00:00
|
|
|
}
|
|
|
|
|
2016-11-03 12:00:44 +00:00
|
|
|
}
|