More accurate prediction. [#CLICKHOUSE-2902]

This commit is contained in:
Vitaliy Lyudvichenko 2017-04-07 14:43:24 +03:00 committed by alexey-milovidov
parent f519100946
commit c78177a28a
3 changed files with 26 additions and 27 deletions

View File

@ -71,6 +71,9 @@ Block MergeTreeBaseBlockInputStream::readFromPart()
size_t res_block_size_bytes = 0;
bool bytes_exceeded = false;
if (task->size_predictor)
task->size_predictor->startBlock();
if (prewhere_actions)
{
do

View File

@ -127,12 +127,6 @@ MergeTreeBlockSizePredictor::MergeTreeBlockSizePredictor(
void MergeTreeBlockSizePredictor::startBlock()
{
if (!is_initialized)
{
is_initialized = true;
//init();
}
block_size_bytes = 0;
block_size_rows = 0;
for (auto & info : dynamic_columns_infos)
@ -140,28 +134,28 @@ void MergeTreeBlockSizePredictor::startBlock()
}
void MergeTreeBlockSizePredictor::update(const Block& block, size_t read_marks)
void MergeTreeBlockSizePredictor::update(const Block & block, size_t read_marks)
{
size_t dif_rows = read_marks * index_granularity;
block_size_rows += dif_rows;
block_size_bytes = block_size_rows * fixed_columns_bytes_per_row;
bytes_per_row_current = fixed_columns_bytes_per_row;
/// Make recursive updates for each read mark
double alpha = std::pow(1. - decay, read_marks);
for (auto & info : dynamic_columns_infos)
{
size_t dif_rows = read_marks * index_granularity;
block_size_rows += dif_rows;
block_size_bytes = block_size_rows * fixed_columns_bytes_per_row;
bytes_per_row_current = fixed_columns_bytes_per_row;
size_t new_size = block.getByName(info.name).column->byteSize();
size_t dif_size = new_size - info.size_bytes;
for (auto & info : dynamic_columns_infos)
{
size_t new_size = block.getByName(info.name).column->byteSize();
size_t dif_size = new_size - info.size_bytes;
double local_bytes_per_row = static_cast<double>(dif_size) / dif_rows;
info.bytes_per_row = alpha * info.bytes_per_row + (1. - alpha) * local_bytes_per_row;
double local_bytes_per_row = static_cast<double>(dif_size) / dif_rows;
/// Make recursive updates for each read mark
for (size_t i = 0; i < read_marks; ++i)
info.bytes_per_row += decay * (local_bytes_per_row - info.bytes_per_row);
info.size_bytes = new_size;
block_size_bytes += new_size;
bytes_per_row_current += info.bytes_per_row;
}
info.size_bytes = new_size;
block_size_bytes += new_size;
bytes_per_row_current += info.bytes_per_row;
}
}
}

View File

@ -80,9 +80,11 @@ struct MergeTreeBlockSizePredictor
}
MergeTreeData::DataPartPtr data_part;
bool is_initialized = false;
size_t index_granularity;
const double decay = 0.75;
/// Aggressiveness of bytes_per_row updates
/// One update per mark
static constexpr double decay = 0.5;
struct ColumnInfo
{