ClickHouse/dbms/src/DataStreams/AggregatingBlockInputStream.cpp

80 lines
2.3 KiB
C++
Raw Normal View History

2017-03-14 14:32:48 +00:00
#include <DB/Common/ClickHouseRevision.h>
#include <DB/DataStreams/BlocksListBlockInputStream.h>
#include <DB/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h>
2011-09-19 03:34:23 +00:00
#include <DB/DataStreams/AggregatingBlockInputStream.h>
#include <DB/DataStreams/NativeBlockInputStream.h>
2011-09-19 03:34:23 +00:00
namespace ProfileEvents
{
extern const Event ExternalAggregationMerge;
}
2011-09-19 03:34:23 +00:00
namespace DB
{
Block AggregatingBlockInputStream::readImpl()
{
if (!executed)
{
executed = true;
AggregatedDataVariantsPtr data_variants = std::make_shared<AggregatedDataVariants>();
2015-04-16 14:27:56 +00:00
Aggregator::CancellationHook hook = [&]() { return this->isCancelled(); };
aggregator.setCancellationHook(hook);
2015-12-09 02:55:35 +00:00
aggregator.execute(children.back(), *data_variants);
if (!aggregator.hasTemporaryFiles())
{
2015-12-09 02:55:35 +00:00
ManyAggregatedDataVariants many_data { data_variants };
impl = aggregator.mergeAndConvertToBlocks(many_data, final, 1);
}
else
{
/** If there are temporary files with partially-aggregated data on the disk,
* then read and merge them, spending the minimum amount of memory.
*/
ProfileEvents::increment(ProfileEvents::ExternalAggregationMerge);
2015-12-06 15:29:16 +00:00
if (!isCancelled())
{
/// Flush data in the RAM to disk also. It's easier.
2015-12-09 02:55:35 +00:00
size_t rows = data_variants->sizeWithoutOverflowRow();
2015-12-06 15:29:16 +00:00
if (rows)
2015-12-09 02:55:35 +00:00
aggregator.writeToTemporaryFile(*data_variants, rows);
2015-12-06 15:29:16 +00:00
}
const auto & files = aggregator.getTemporaryFiles();
BlockInputStreams input_streams;
for (const auto & file : files.files)
{
temporary_inputs.emplace_back(std::make_unique<TemporaryFileStream>(file->path()));
input_streams.emplace_back(temporary_inputs.back()->block_in);
}
LOG_TRACE(log, "Will merge " << files.files.size() << " temporary files of size "
<< (files.sum_size_compressed / 1048576.0) << " MiB compressed, "
<< (files.sum_size_uncompressed / 1048576.0) << " MiB uncompressed.");
impl = std::make_unique<MergingAggregatedMemoryEfficientBlockInputStream>(input_streams, params, final, 1, 1);
}
}
Block res;
if (isCancelled() || !impl)
return res;
return impl->read();
2011-09-19 03:34:23 +00:00
}
AggregatingBlockInputStream::TemporaryFileStream::TemporaryFileStream(const std::string & path)
: file_in(path), compressed_in(file_in), block_in(std::make_shared<NativeBlockInputStream>(compressed_in, ClickHouseRevision::get())) {}
2011-09-19 03:34:23 +00:00
}