Merge pull request #6126 from yandex/fix_index_write_with_adaptive_granularity

Fix secondary indices write with adaptive granularity
2024-11-22 15:42:02 +00:00 · 2019-07-25 01:16:21 +03:00 · 2019-07-25 01:16:21 +03:00 · 05ca583f22
commit 05ca583f22
parent 9de5b0d21c 75c2f4950b
4 changed files with 65 additions and 4 deletions
--- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp
+++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp
@ -381,18 +381,18 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm
    }

    rows_count += rows;
-
    {
        /// Creating block for update
        Block indices_update_block(skip_indexes_columns);
+        size_t skip_index_current_mark = 0;
+
        /// Filling and writing skip indices like in IMergedBlockOutputStream::writeColumn
        for (size_t i = 0; i < storage.skip_indices.size(); ++i)
        {
            const auto index = storage.skip_indices[i];
            auto & stream = *skip_indices_streams[i];
            size_t prev_pos = 0;
-
-            size_t skip_index_current_mark = 0;
+            skip_index_current_mark = skip_index_mark;
            while (prev_pos < rows)
            {
                UInt64 limit = 0;
@ -417,6 +417,8 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm
                        /// to be compatible with normal .mrk2 file format
                        if (storage.canUseAdaptiveGranularity())
                            writeIntBinary(1UL, stream.marks);
+
+                        ++skip_index_current_mark;
                    }
                }

@ -435,9 +437,9 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm
                    }
                }
                prev_pos = pos;
-                ++skip_index_current_mark;
            }
        }
+        skip_index_mark = skip_index_current_mark;
    }

    {
--- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h
+++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h
@ -68,6 +68,7 @@ private:
    String part_path;

    size_t rows_count = 0;
+    size_t skip_index_mark = 0;

    std::unique_ptr<WriteBufferFromFile> index_file_stream;
    std::unique_ptr<HashingWriteBuffer> index_stream;
--- a/dbms/tests/queries/0_stateless/00974_adaptive_granularity_secondary_index.reference
+++ b/dbms/tests/queries/0_stateless/00974_adaptive_granularity_secondary_index.reference
@ -0,0 +1,2 @@
+1000
+1000
--- a/dbms/tests/queries/0_stateless/00974_adaptive_granularity_secondary_index.sql
+++ b/dbms/tests/queries/0_stateless/00974_adaptive_granularity_secondary_index.sql
@ -0,0 +1,56 @@
+SET allow_experimental_data_skipping_indices = 1;
+
+DROP TABLE IF EXISTS indexed_table;
+
+CREATE TABLE indexed_table
+(
+    `tm` DateTime,
+    `log_message` String,
+    INDEX log_message log_message TYPE tokenbf_v1(4096, 2, 0) GRANULARITY 1
+)
+ENGINE = MergeTree
+ORDER BY (tm)
+SETTINGS index_granularity_bytes = 50;
+
+INSERT INTO indexed_table SELECT toDateTime('2019-05-27 10:00:00') + number % 100, 'h' FROM numbers(1000);
+
+INSERT INTO indexed_table
+SELECT
+    toDateTime('2019-05-27 10:00:00') + number % 100,
+    concat('hhhhhhhhhhhhhhhhhhhhhhhhh', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'yyyyyyyyyyyyyyyyyyyyyyyyyy', toString(rand()))
+FROM numbers(1000);
+
+OPTIMIZE TABLE indexed_table FINAL;
+
+SELECT COUNT() FROM indexed_table WHERE log_message like '%x%';
+
+DROP TABLE IF EXISTS indexed_table;
+
+DROP TABLE IF EXISTS another_indexed_table;
+
+CREATE TABLE another_indexed_table
+(
+  `tm` DateTime,
+  `log_message` String,
+  INDEX log_message log_message TYPE tokenbf_v1(4096, 2, 0) GRANULARITY 1
+)
+ENGINE = MergeTree
+ORDER BY (tm)
+SETTINGS index_granularity_bytes = 50,
+         vertical_merge_algorithm_min_rows_to_activate=0,
+         vertical_merge_algorithm_min_columns_to_activate=0;
+
+
+INSERT INTO another_indexed_table SELECT toDateTime('2019-05-27 10:00:00') + number % 100, 'h' FROM numbers(1000);
+
+INSERT INTO another_indexed_table
+SELECT
+  toDateTime('2019-05-27 10:00:00') + number % 100,
+  concat('hhhhhhhhhhhhhhhhhhhhhhhhh', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'yyyyyyyyyyyyyyyyyyyyyyyyyy', toString(rand()))
+  FROM numbers(1000);
+
+OPTIMIZE TABLE another_indexed_table FINAL;
+
+SELECT COUNT() FROM another_indexed_table WHERE log_message like '%x%';
+
+DROP TABLE IF EXISTS another_indexed_table;