From 16eaaba31342404c4af21c881bfcc0fb17700b6e Mon Sep 17 00:00:00 2001 From: Anton Kvasha Date: Wed, 6 May 2020 19:21:51 +0300 Subject: [PATCH] Polygons orderer by area --- dbms/src/Dictionaries/PolygonDictionary.cpp | 19 ++++++++++-- dbms/src/Dictionaries/PolygonDictionary.h | 1 - .../PolygonDictionaryImplementations.cpp | 29 +++---------------- .../Dictionaries/PolygonDictionaryUtils.cpp | 10 +++---- .../src/Dictionaries/PolygonDictionaryUtils.h | 4 --- 5 files changed, 26 insertions(+), 37 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 3f3404cc9e1..b612defc52c 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -222,12 +222,27 @@ void IPolygonDictionary::loadData() blockToAttributes(block); stream->readSuffix(); + std::vector areas; areas.reserve(polygons.size()); - for (auto & polygon : polygons) - { + + std::vector> polygon_ids; + polygon_ids.reserve(polygons.size()); + for (size_t i = 0; i < polygons.size(); ++i) { + auto & polygon = polygons[i]; bg::correct(polygon); areas.push_back(bg::area(polygon)); + polygon_ids.emplace_back(polygon, i); } + sort(polygon_ids.begin(), polygon_ids.end(), [& areas](const auto & lhs, const auto & rhs) { + return areas[lhs.second] < areas[rhs.second]; + }); + std::vector correct_ids; + for (size_t i = 0; i < polygon_ids.size(); ++i) { + auto & polygon = polygon_ids[i]; + correct_ids.emplace_back(ids[polygon.second]); + polygons[i] = polygon.first; + } + ids = correct_ids; } void IPolygonDictionary::calculateBytesAllocated() diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h index a11bb88700c..1951bef2de1 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.h +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -193,7 +193,6 @@ protected: virtual bool find(const Point & point, size_t & id) const = 0; std::vector polygons; - std::vector areas; /** Since the original data may have been in the form of multi-polygons, an id is stored for each single polygon * corresponding to the row in which any other attributes for this entry are located. */ diff --git a/dbms/src/Dictionaries/PolygonDictionaryImplementations.cpp b/dbms/src/Dictionaries/PolygonDictionaryImplementations.cpp index 55f9e2a5507..c85a1bf3ca2 100644 --- a/dbms/src/Dictionaries/PolygonDictionaryImplementations.cpp +++ b/dbms/src/Dictionaries/PolygonDictionaryImplementations.cpp @@ -39,18 +39,13 @@ std::shared_ptr SimplePolygonDictionary::clone() const bool SimplePolygonDictionary::find(const Point &point, size_t & id) const { bool found = false; - double area = 0; for (size_t i = 0; i < polygons.size(); ++i) { if (bg::covered_by(point, polygons[i])) { - double new_area = areas[i]; - if (!found || new_area < area) - { - found = true; - id = i; - area = new_area; - } + id = i; + found = true; + break; } } return found; @@ -65,16 +60,7 @@ GridPolygonDictionary::GridPolygonDictionary( InputType input_type_, PointType point_type_): IPolygonDictionary(database_, name_, dict_struct_, std::move(source_ptr_), dict_lifetime_, input_type_, point_type_), - grid(kMinIntersections, kMaxDepth, polygons) -{ - std::vector order(polygons.size()); - std::iota(order.begin(), order.end(), 0); - std::sort(order.begin(), order.end(), [&](auto lhs, auto rhs) - { - return areas[lhs] < areas[rhs]; - }); - grid.init(order); -} + grid(kMinIntersections, kMaxDepth, polygons) {} std::shared_ptr GridPolygonDictionary::clone() const { @@ -119,13 +105,6 @@ SmartPolygonDictionary::SmartPolygonDictionary( : IPolygonDictionary(database_, name_, dict_struct_, std::move(source_ptr_), dict_lifetime_, input_type_, point_type_), grid(kMinIntersections, kMaxDepth, polygons) { - std::vector order(polygons.size()); - std::iota(order.begin(), order.end(), 0); - std::sort(order.begin(), order.end(), [&](auto lhs, auto rhs) - { - return areas[lhs] < areas[rhs]; - }); - grid.init(order); auto log = &Logger::get("BucketsPolygonIndex"); buckets.reserve(polygons.size()); for (size_t i = 0; i < polygons.size(); ++i) diff --git a/dbms/src/Dictionaries/PolygonDictionaryUtils.cpp b/dbms/src/Dictionaries/PolygonDictionaryUtils.cpp index 12a2f1edb56..841bf417f30 100644 --- a/dbms/src/Dictionaries/PolygonDictionaryUtils.cpp +++ b/dbms/src/Dictionaries/PolygonDictionaryUtils.cpp @@ -6,6 +6,7 @@ #include #include +#include namespace DB { @@ -38,12 +39,11 @@ const FinalCell * DividedCell::find(Float64 x, Float64 y) const } GridRoot::GridRoot(const size_t min_intersections_, const size_t max_depth_, const std::vector & polygons_): -kMinIntersections(min_intersections_), kMaxDepth(max_depth_), polygons(polygons_) {} - -void GridRoot::init(const std::vector & order_) -{ +kMinIntersections(min_intersections_), kMaxDepth(max_depth_), polygons(polygons_) { setBoundingBox(); - root = makeCell(min_x, min_y, max_x, max_y, order_); + std::vector order(polygons.size()); + std::iota(order.begin(), order.end(), 0); + root = makeCell(min_x, min_y, max_x, max_y, order); } const FinalCell * GridRoot::find(Float64 x, Float64 y) const diff --git a/dbms/src/Dictionaries/PolygonDictionaryUtils.h b/dbms/src/Dictionaries/PolygonDictionaryUtils.h index 0587f3a5051..ec08778e241 100644 --- a/dbms/src/Dictionaries/PolygonDictionaryUtils.h +++ b/dbms/src/Dictionaries/PolygonDictionaryUtils.h @@ -60,10 +60,6 @@ class GridRoot : public ICell { public: GridRoot(size_t min_intersections_, size_t max_depth_, const std::vector & polygons_); - /** Initializes and builds the grid, saving the intersecting polygons for each cell accordingly. - * The order of indexes is always a subsequence of the order specified in this function call. - */ - void init(const std::vector & order_); /** Retrieves the cell containing a given point. * A null pointer is returned when the point falls outside the grid. */