From 8da36da3a9d9fe122720c1f0e3be024d5b6b02e8 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 8 Oct 2024 18:07:48 +0000 Subject: [PATCH] Backport #70374 to 24.8: Fix crash during GROUP BY JSON sub-object subcolumn --- src/Columns/ColumnObject.cpp | 13 +++++++++++++ src/Columns/ColumnObject.h | 1 + src/DataTypes/DataTypeObject.cpp | 10 +++------- ...250_json_group_by_sub_object_subcolumn.reference | 2 ++ .../03250_json_group_by_sub_object_subcolumn.sql | 10 ++++++++++ 5 files changed, 29 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/03250_json_group_by_sub_object_subcolumn.reference create mode 100644 tests/queries/0_stateless/03250_json_group_by_sub_object_subcolumn.sql diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 70eed4788fe..b962887f7b5 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -323,6 +323,19 @@ void ColumnObject::setDynamicPaths(const std::vector & paths) } } +void ColumnObject::setDynamicPaths(const std::vector> & paths) +{ + if (paths.size() > max_dynamic_paths) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set dynamic paths to Object column, the number of paths ({}) exceeds the limit ({})", paths.size(), max_dynamic_paths); + + for (const auto & [path, column] : paths) + { + auto it = dynamic_paths.emplace(path, column).first; + dynamic_paths_ptrs[path] = assert_cast(it->second.get()); + sorted_dynamic_paths.insert(it->first); + } +} + void ColumnObject::insert(const Field & x) { const auto & object = x.safeGet(); diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index 9c5ee4fa089..a2f9964a243 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -222,6 +222,7 @@ public: void addNewDynamicPath(std::string_view path); void setDynamicPaths(const std::vector & paths); + void setDynamicPaths(const std::vector> & paths); void setMaxDynamicPaths(size_t max_dynamic_paths_); void setStatistics(const StatisticsPtr & statistics_) { statistics = statistics_; } diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp index e5c3f795495..b4462287583 100644 --- a/src/DataTypes/DataTypeObject.cpp +++ b/src/DataTypes/DataTypeObject.cpp @@ -348,17 +348,13 @@ std::unique_ptr DataTypeObject::getDynamicSubcolu result_typed_columns[getSubPath(path, prefix)] = column; } - auto & result_dynamic_columns = result_object_column.getDynamicPaths(); - auto & result_dynamic_columns_ptrs = result_object_column.getDynamicPathsPtrs(); + std::vector> result_dynamic_paths; for (const auto & [path, column] : object_column.getDynamicPaths()) { if (path.starts_with(prefix) && path.size() != prefix.size()) - { - auto sub_path = getSubPath(path, prefix); - result_dynamic_columns[sub_path] = column; - result_dynamic_columns_ptrs[sub_path] = assert_cast(result_dynamic_columns[sub_path].get()); - } + result_dynamic_paths.emplace_back(getSubPath(path, prefix), column); } + result_object_column.setDynamicPaths(result_dynamic_paths); const auto & shared_data_offsets = object_column.getSharedDataOffsets(); const auto [shared_data_paths, shared_data_values] = object_column.getSharedDataPathsAndValues(); diff --git a/tests/queries/0_stateless/03250_json_group_by_sub_object_subcolumn.reference b/tests/queries/0_stateless/03250_json_group_by_sub_object_subcolumn.reference new file mode 100644 index 00000000000..4f221a67e58 --- /dev/null +++ b/tests/queries/0_stateless/03250_json_group_by_sub_object_subcolumn.reference @@ -0,0 +1,2 @@ +{"b":{"c":0,"d":"0","e":"str_0"}} +{"b":{"c":0,"d":"1","e":"str_1"}} diff --git a/tests/queries/0_stateless/03250_json_group_by_sub_object_subcolumn.sql b/tests/queries/0_stateless/03250_json_group_by_sub_object_subcolumn.sql new file mode 100644 index 00000000000..7955e2bfe8b --- /dev/null +++ b/tests/queries/0_stateless/03250_json_group_by_sub_object_subcolumn.sql @@ -0,0 +1,10 @@ +set allow_experimental_json_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; + +drop table if exists test; +create table test (json JSON(max_dynamic_paths = 20, `a.b.c` UInt32)) engine = Memory; +insert into test select toJSONString(map('a.b.d', number::UInt32, 'a.b.e', 'str_' || toString(number))) from numbers(2); +select json.^a from test group by json.^a order by toString(json.^a); +drop table test; +