Fix bad size of marks

This commit is contained in:
alesapin 2019-08-30 17:29:08 +03:00
parent 5851316742
commit f6120558df
6 changed files with 47 additions and 9 deletions

View File

@ -333,7 +333,7 @@ void IMergedBlockOutputStream::calculateAndSerializeSkipIndices(
{
/// Creating block for update
Block indices_update_block(skip_indexes_columns);
size_t skip_index_current_mark = 0;
size_t skip_index_current_data_mark = 0;
/// Filling and writing skip indices like in IMergedBlockOutputStream::writeColumn
for (size_t i = 0; i < skip_indices.size(); ++i)
@ -341,7 +341,7 @@ void IMergedBlockOutputStream::calculateAndSerializeSkipIndices(
const auto index = skip_indices[i];
auto & stream = *skip_indices_streams[i];
size_t prev_pos = 0;
skip_index_current_mark = skip_index_mark;
skip_index_current_data_mark = skip_index_data_mark;
while (prev_pos < rows)
{
UInt64 limit = 0;
@ -351,7 +351,7 @@ void IMergedBlockOutputStream::calculateAndSerializeSkipIndices(
}
else
{
limit = index_granularity.getMarkRows(skip_index_current_mark);
limit = index_granularity.getMarkRows(skip_index_current_data_mark);
if (skip_indices_aggregators[i]->empty())
{
skip_indices_aggregators[i] = index->createIndexAggregator();
@ -366,9 +366,9 @@ void IMergedBlockOutputStream::calculateAndSerializeSkipIndices(
/// to be compatible with normal .mrk2 file format
if (can_use_adaptive_granularity)
writeIntBinary(1UL, stream.marks);
++skip_index_current_mark;
}
/// this mark is aggregated, go to the next one
skip_index_current_data_mark++;
}
size_t pos = prev_pos;
@ -388,7 +388,7 @@ void IMergedBlockOutputStream::calculateAndSerializeSkipIndices(
prev_pos = pos;
}
}
skip_index_mark = skip_index_current_mark;
skip_index_data_mark = skip_index_current_data_mark;
}
void IMergedBlockOutputStream::finishSkipIndicesSerialization(

View File

@ -141,7 +141,10 @@ protected:
size_t aio_threshold;
size_t current_mark = 0;
size_t skip_index_mark = 0;
/// Number of mark in data from which skip indicies have to start
/// aggregation. I.e. it's data mark number, not skip indices mark.
size_t skip_index_data_mark = 0;
const bool can_use_adaptive_granularity;
const std::string marks_file_extension;

View File

@ -332,7 +332,7 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm
else if (skip_indexes_column_name_to_position.end() != skip_index_column_it)
{
const auto & index_column = *skip_indexes_columns[skip_index_column_it->second].column;
writeColumn(column.name, *column.type, index_column, offset_columns, false, serialization_states[i], current_mark);
std::tie(std::ignore, new_index_offset) = writeColumn(column.name, *column.type, index_column, offset_columns, false, serialization_states[i], current_mark);
}
else
{
@ -349,6 +349,8 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm
rows_count += rows;
/// Should be written before index offset update, because we calculate,
/// indices of currently written granules
calculateAndSerializeSkipIndices(skip_indexes_columns, rows);
{

View File

@ -68,7 +68,6 @@ void MergedColumnOnlyOutputStream::write(const Block & block)
if (!rows)
return;
calculateAndSerializeSkipIndices(skip_indexes_columns, rows);
size_t new_index_offset = 0;
size_t new_current_mark = 0;
@ -79,6 +78,10 @@ void MergedColumnOnlyOutputStream::write(const Block & block)
std::tie(new_current_mark, new_index_offset) = writeColumn(column.name, *column.type, *column.column, offset_columns, skip_offsets, serialization_states[i], current_mark);
}
/// Should be written before index offset update, because we calculate,
/// indices of currently written granules
calculateAndSerializeSkipIndices(skip_indexes_columns, rows);
index_offset = new_index_offset;
current_mark = new_current_mark;
}

View File

@ -0,0 +1,28 @@
SET allow_experimental_data_skipping_indices=1;
DROP TABLE IF EXISTS bad_skip_idx;
CREATE TABLE bad_skip_idx
(
id UInt64,
value String
) ENGINE MergeTree()
ORDER BY id SETTINGS index_granularity_bytes = 64, vertical_merge_algorithm_min_rows_to_activate = 0, vertical_merge_algorithm_min_columns_to_activate = 0; -- actually vertical merge is not required condition for this bug, but it's more easy to reproduce (becuse we don't recalc granularities)
-- 7 rows per granule
INSERT INTO bad_skip_idx SELECT number, concat('x', toString(number)) FROM numbers(1000);
-- 3 rows per granule
INSERT INTO bad_skip_idx SELECT number, concat('xxxxxxxxxx', toString(number)) FROM numbers(1000,1000);
SELECT COUNT(*) from bad_skip_idx WHERE value = 'xxxxxxxxxx1015'; -- check no exception
INSERT INTO bad_skip_idx SELECT number, concat('x', toString(number)) FROM numbers(1000);
ALTER TABLE bad_skip_idx ADD INDEX idx value TYPE bloom_filter(0.01) GRANULARITY 4;
OPTIMIZE TABLE bad_skip_idx FINAL;
SELECT COUNT(*) from bad_skip_idx WHERE value = 'xxxxxxxxxx1015'; -- check no exception
DROP TABLE IF EXISTS bad_skip_idx;