From b1bd34f66e82173bfc48c7e1a612a967562fcbc6 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Thu, 4 Apr 2024 20:25:49 +0000
Subject: [PATCH 1/5] fix

---
 src/Processors/QueryPlan/PartsSplitter.cpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp
index 2af1bcb0260..ec51875587e 100644
--- a/src/Processors/QueryPlan/PartsSplitter.cpp
+++ b/src/Processors/QueryPlan/PartsSplitter.cpp
@@ -1,4 +1,5 @@
 #include <algorithm>
+#include <limits>
 #include <memory>
 #include <numeric>
 #include <queue>
@@ -125,14 +126,18 @@ int compareValues(const Values & lhs, const Values & rhs)
 class IndexAccess
 {
 public:
-    explicit IndexAccess(const RangesInDataParts & parts_) : parts(parts_) { }
+    explicit IndexAccess(const RangesInDataParts & parts_) : parts(parts_)
+    {
+        for (const auto & part : parts)
+            loaded_columns = std::min(loaded_columns, part.data_part->getIndex().size());
+    }
 
     Values getValue(size_t part_idx, size_t mark) const
     {
         const auto & index = parts[part_idx].data_part->getIndex();
-        size_t size = index.size();
-        Values values(size);
-        for (size_t i = 0; i < size; ++i)
+        chassert(index.size() >= loaded_columns);
+        Values values(loaded_columns);
+        for (size_t i = 0; i < loaded_columns; ++i)
         {
             index[i]->get(mark, values[i]);
             if (values[i].isNull())
@@ -199,6 +204,7 @@ public:
     }
 private:
     const RangesInDataParts & parts;
+    size_t loaded_columns = std::numeric_limits<size_t>::max();
 };
 
 class RangesInDataPartsBuilder

From 6be747bf32a7f1fcd9fee8f86c72dd2b03e48c02 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Thu, 4 Apr 2024 22:28:29 +0000
Subject: [PATCH 2/5] add test

---
 .../__init__.py                               |  0
 .../test.py                                   | 47 +++++++++++++++++++
 2 files changed, 47 insertions(+)
 create mode 100644 tests/integration/test_final_bug_with_pk_columns_loading/__init__.py
 create mode 100644 tests/integration/test_final_bug_with_pk_columns_loading/test.py

diff --git a/tests/integration/test_final_bug_with_pk_columns_loading/__init__.py b/tests/integration/test_final_bug_with_pk_columns_loading/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_final_bug_with_pk_columns_loading/test.py b/tests/integration/test_final_bug_with_pk_columns_loading/test.py
new file mode 100644
index 00000000000..e710b9942dc
--- /dev/null
+++ b/tests/integration/test_final_bug_with_pk_columns_loading/test.py
@@ -0,0 +1,47 @@
+import pytest
+import logging
+
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+node = cluster.add_instance("node", stay_alive=True)
+
+
+@pytest.fixture(scope="module")
+def start_cluster():
+    try:
+        logging.info("Starting cluster...")
+        cluster.start()
+        logging.info("Cluster started")
+
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def test_simple_query_after_restart(start_cluster):
+    node.query(
+        """
+        create table t(a UInt32, b UInt32) engine=MergeTree order by (a, b) settings index_granularity=1;
+
+        insert into t select 42, number from numbers_mt(100);
+        insert into t select number, number from numbers_mt(100);
+        """
+    )
+
+    node.restart_clickhouse()
+
+    assert (
+        int(
+            node.query(
+                "select count() from t where not ignore(*)",
+                settings={
+                    "max_threads": 4,
+                    "merge_tree_min_bytes_for_concurrent_read": 1,
+                    "merge_tree_min_rows_for_concurrent_read": 1,
+                    "merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability": 1,
+                },
+            )
+        )
+        == 200
+    )

From 54ceb3d32a7bb490ba7f202a511607f0ea21ae5b Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Fri, 5 Apr 2024 12:47:00 +0000
Subject: [PATCH 3/5] add some comments

---
 src/Processors/QueryPlan/PartsSplitter.cpp                | 2 ++
 .../test_final_bug_with_pk_columns_loading/test.py        | 8 +++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp
index ec51875587e..64af48dd53c 100644
--- a/src/Processors/QueryPlan/PartsSplitter.cpp
+++ b/src/Processors/QueryPlan/PartsSplitter.cpp
@@ -128,6 +128,8 @@ class IndexAccess
 public:
     explicit IndexAccess(const RangesInDataParts & parts_) : parts(parts_)
     {
+        /// Some suffix of index columns might not be loaded (see `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns`)
+        /// and we need to use the same set of index columns across all parts.
         for (const auto & part : parts)
             loaded_columns = std::min(loaded_columns, part.data_part->getIndex().size());
     }
diff --git a/tests/integration/test_final_bug_with_pk_columns_loading/test.py b/tests/integration/test_final_bug_with_pk_columns_loading/test.py
index e710b9942dc..61559913e05 100644
--- a/tests/integration/test_final_bug_with_pk_columns_loading/test.py
+++ b/tests/integration/test_final_bug_with_pk_columns_loading/test.py
@@ -19,18 +19,24 @@ def start_cluster():
         cluster.shutdown()
 
 
-def test_simple_query_after_restart(start_cluster):
+def test_simple_query_after_index_reload(start_cluster):
     node.query(
         """
         create table t(a UInt32, b UInt32) engine=MergeTree order by (a, b) settings index_granularity=1;
 
+        -- for this part the first columns is useless, so we have to use both
         insert into t select 42, number from numbers_mt(100);
+
+        -- for this part the first columns is enough
         insert into t select number, number from numbers_mt(100);
         """
     )
 
+    # force reloading index
     node.restart_clickhouse()
 
+    # the bug happened when we used (a, b) index values for one part and only (a) for another in PartsSplitter. even a simple count query is enough,
+    # because some granules were assinged to wrong layers and hence not returned from the reading step (because they were filtered out by `FilterSortedStreamByRange`)
     assert (
         int(
             node.query(

From 39d706ba9f0c8e7f8c8d757e215f639f7d510fe2 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Fri, 5 Apr 2024 14:45:51 +0000
Subject: [PATCH 4/5] rework test

---
 .../__init__.py                               |  0
 .../test.py                                   | 53 -------------------
 ...s_splitter_bug_and_index_loading.reference |  1 +
 ...3_parts_splitter_bug_and_index_loading.sql | 17 ++++++
 4 files changed, 18 insertions(+), 53 deletions(-)
 delete mode 100644 tests/integration/test_final_bug_with_pk_columns_loading/__init__.py
 delete mode 100644 tests/integration/test_final_bug_with_pk_columns_loading/test.py
 create mode 100644 tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.reference
 create mode 100644 tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql

diff --git a/tests/integration/test_final_bug_with_pk_columns_loading/__init__.py b/tests/integration/test_final_bug_with_pk_columns_loading/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/integration/test_final_bug_with_pk_columns_loading/test.py b/tests/integration/test_final_bug_with_pk_columns_loading/test.py
deleted file mode 100644
index 61559913e05..00000000000
--- a/tests/integration/test_final_bug_with_pk_columns_loading/test.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import pytest
-import logging
-
-from helpers.cluster import ClickHouseCluster
-
-cluster = ClickHouseCluster(__file__)
-node = cluster.add_instance("node", stay_alive=True)
-
-
-@pytest.fixture(scope="module")
-def start_cluster():
-    try:
-        logging.info("Starting cluster...")
-        cluster.start()
-        logging.info("Cluster started")
-
-        yield cluster
-    finally:
-        cluster.shutdown()
-
-
-def test_simple_query_after_index_reload(start_cluster):
-    node.query(
-        """
-        create table t(a UInt32, b UInt32) engine=MergeTree order by (a, b) settings index_granularity=1;
-
-        -- for this part the first columns is useless, so we have to use both
-        insert into t select 42, number from numbers_mt(100);
-
-        -- for this part the first columns is enough
-        insert into t select number, number from numbers_mt(100);
-        """
-    )
-
-    # force reloading index
-    node.restart_clickhouse()
-
-    # the bug happened when we used (a, b) index values for one part and only (a) for another in PartsSplitter. even a simple count query is enough,
-    # because some granules were assinged to wrong layers and hence not returned from the reading step (because they were filtered out by `FilterSortedStreamByRange`)
-    assert (
-        int(
-            node.query(
-                "select count() from t where not ignore(*)",
-                settings={
-                    "max_threads": 4,
-                    "merge_tree_min_bytes_for_concurrent_read": 1,
-                    "merge_tree_min_rows_for_concurrent_read": 1,
-                    "merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability": 1,
-                },
-            )
-        )
-        == 200
-    )
diff --git a/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.reference b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.reference
new file mode 100644
index 00000000000..08839f6bb29
--- /dev/null
+++ b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.reference
@@ -0,0 +1 @@
+200
diff --git a/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql
new file mode 100644
index 00000000000..541ac67fd24
--- /dev/null
+++ b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql
@@ -0,0 +1,17 @@
+create table t(a UInt32, b UInt32) engine=MergeTree order by (a, b) settings index_granularity=1;
+
+-- for this part the first columns is useless, so we have to use both
+insert into t select 42, number from numbers_mt(100);
+
+-- for this part the first columns is enough
+insert into t select number, number from numbers_mt(100);
+
+-- force reloading index
+detach table t;
+attach table t;
+
+set merge_tree_min_bytes_for_concurrent_read=1, merge_tree_min_rows_for_concurrent_read=1, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=1.0, max_threads=4;
+
+-- the bug happened when we used (a, b) index values for one part and only (a) for another in PartsSplitter. even a simple count query is enough,
+-- because some granules were assinged to wrong layers and hence not returned from the reading step (because they were filtered out by `FilterSortedStreamByRange`)
+select count() from t where not ignore(*);

From 378d330d9dfa289c413f80c2addaf6dee5503093 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Fri, 5 Apr 2024 17:07:43 +0000
Subject: [PATCH 5/5] better

---
 .../0_stateless/03033_parts_splitter_bug_and_index_loading.sql  | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql
index 541ac67fd24..25ec1c8fd80 100644
--- a/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql
+++ b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql
@@ -1,5 +1,7 @@
 create table t(a UInt32, b UInt32) engine=MergeTree order by (a, b) settings index_granularity=1;
 
+system stop merges t;
+
 -- for this part the first columns is useless, so we have to use both
 insert into t select 42, number from numbers_mt(100);