Merge pull request #6126 from yandex/fix_index_write_with_adaptive_granularity

Fix secondary indices write with adaptive granularity
This commit is contained in:
alexey-milovidov 2019-07-25 01:16:21 +03:00 committed by GitHub
commit 05ca583f22
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 65 additions and 4 deletions

View File

@ -381,18 +381,18 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm
}
rows_count += rows;
{
/// Creating block for update
Block indices_update_block(skip_indexes_columns);
size_t skip_index_current_mark = 0;
/// Filling and writing skip indices like in IMergedBlockOutputStream::writeColumn
for (size_t i = 0; i < storage.skip_indices.size(); ++i)
{
const auto index = storage.skip_indices[i];
auto & stream = *skip_indices_streams[i];
size_t prev_pos = 0;
size_t skip_index_current_mark = 0;
skip_index_current_mark = skip_index_mark;
while (prev_pos < rows)
{
UInt64 limit = 0;
@ -417,6 +417,8 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm
/// to be compatible with normal .mrk2 file format
if (storage.canUseAdaptiveGranularity())
writeIntBinary(1UL, stream.marks);
++skip_index_current_mark;
}
}
@ -435,9 +437,9 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm
}
}
prev_pos = pos;
++skip_index_current_mark;
}
}
skip_index_mark = skip_index_current_mark;
}
{

View File

@ -68,6 +68,7 @@ private:
String part_path;
size_t rows_count = 0;
size_t skip_index_mark = 0;
std::unique_ptr<WriteBufferFromFile> index_file_stream;
std::unique_ptr<HashingWriteBuffer> index_stream;

View File

@ -0,0 +1,56 @@
SET allow_experimental_data_skipping_indices = 1;
DROP TABLE IF EXISTS indexed_table;
CREATE TABLE indexed_table
(
`tm` DateTime,
`log_message` String,
INDEX log_message log_message TYPE tokenbf_v1(4096, 2, 0) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY (tm)
SETTINGS index_granularity_bytes = 50;
INSERT INTO indexed_table SELECT toDateTime('2019-05-27 10:00:00') + number % 100, 'h' FROM numbers(1000);
INSERT INTO indexed_table
SELECT
toDateTime('2019-05-27 10:00:00') + number % 100,
concat('hhhhhhhhhhhhhhhhhhhhhhhhh', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'yyyyyyyyyyyyyyyyyyyyyyyyyy', toString(rand()))
FROM numbers(1000);
OPTIMIZE TABLE indexed_table FINAL;
SELECT COUNT() FROM indexed_table WHERE log_message like '%x%';
DROP TABLE IF EXISTS indexed_table;
DROP TABLE IF EXISTS another_indexed_table;
CREATE TABLE another_indexed_table
(
`tm` DateTime,
`log_message` String,
INDEX log_message log_message TYPE tokenbf_v1(4096, 2, 0) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY (tm)
SETTINGS index_granularity_bytes = 50,
vertical_merge_algorithm_min_rows_to_activate=0,
vertical_merge_algorithm_min_columns_to_activate=0;
INSERT INTO another_indexed_table SELECT toDateTime('2019-05-27 10:00:00') + number % 100, 'h' FROM numbers(1000);
INSERT INTO another_indexed_table
SELECT
toDateTime('2019-05-27 10:00:00') + number % 100,
concat('hhhhhhhhhhhhhhhhhhhhhhhhh', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'yyyyyyyyyyyyyyyyyyyyyyyyyy', toString(rand()))
FROM numbers(1000);
OPTIMIZE TABLE another_indexed_table FINAL;
SELECT COUNT() FROM another_indexed_table WHERE log_message like '%x%';
DROP TABLE IF EXISTS another_indexed_table;