From c1ec72df0b8fa5af8dcabb84fd81388161921c16 Mon Sep 17 00:00:00 2001 From: Andrey Chulkov Date: Thu, 7 May 2020 00:28:29 +0300 Subject: [PATCH] trying some improvements --- contrib/base64 | 2 +- contrib/poco | 2 +- .../Dictionaries/PolygonDictionaryUtils.cpp | 258 ++++++++++++++++++ .../src/Dictionaries/PolygonDictionaryUtils.h | 66 +++++ 4 files changed, 326 insertions(+), 2 deletions(-) diff --git a/contrib/base64 b/contrib/base64 index 95ba56a9b04..5257626d2be 160000 --- a/contrib/base64 +++ b/contrib/base64 @@ -1 +1 @@ -Subproject commit 95ba56a9b041f9933f5cd2bbb2ee4e083468c20a +Subproject commit 5257626d2be17a3eb23f79be17fe55ebba394ad2 diff --git a/contrib/poco b/contrib/poco index 860574c9398..1f3e4638f25 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 860574c93980d887a89df141edd9ca2fb0024fa3 +Subproject commit 1f3e4638f250ad4d028a2499af20d4185463e07d diff --git a/dbms/src/Dictionaries/PolygonDictionaryUtils.cpp b/dbms/src/Dictionaries/PolygonDictionaryUtils.cpp index 4e257a7cfe4..06ce4693bdb 100644 --- a/dbms/src/Dictionaries/PolygonDictionaryUtils.cpp +++ b/dbms/src/Dictionaries/PolygonDictionaryUtils.cpp @@ -421,4 +421,262 @@ bool BucketsPolygonIndex::find(const Point & point, size_t & id) const return found; } +BucketsSinglePolygonIndex::BucketsSinglePolygonIndex( + const Polygon & polygon) + : sorted_x(uniqueX(polygon)) +{ + indexBuild(polygon); +} + +std::vector BucketsSinglePolygonIndex::uniqueX(const Polygon & polygon) +{ + std::vector all_x; + + for (auto & point : polygon.outer()) + { + all_x.push_back(point.x()); + } + + for (auto & inner : polygon.inners()) + { + for (auto & point : inner) + { + all_x.push_back(point.x()); + } + } + + /** making all_x sorted and distinct */ + std::sort(all_x.begin(), all_x.end()); + all_x.erase(std::unique(all_x.begin(), all_x.end()), all_x.end()); + + return all_x; +} + +void BucketsSinglePolygonIndex::indexBuild(const Polygon & polygon) +{ + indexAddRing(polygon.outer()); + + for (auto & inner : polygon.inners()) + { + indexAddRing(inner); + } + + /** sorting edges consisting of (left_point, right_point, polygon_id) in that order */ + std::sort(this->all_edges.begin(), this->all_edges.end(), Edge::compare1); + for (size_t i = 0; i != this->all_edges.size(); ++i) + { + this->all_edges[i].edge_id = i; + } + + /** total number of edges */ + size_t m = this->all_edges.size(); + + /** using custom comparator for fetching edges in right_point order, like in scanline */ + auto cmp = [](const Edge & a, const Edge & b) + { + return Edge::compare2(a, b); + }; + std::set interesting_edges(cmp); + + /** size of index (number of different x coordinates) */ + size_t n = 0; + if (!this->sorted_x.empty()) + { + n = this->sorted_x.size() - 1; + } + this->edges_index_tree.resize(2 * n); + + /** Map of interesting edge ids to the index of left x, the index of right x */ + std::vector edge_left(m, n), edge_right(m, n); + + size_t total_index_edges = 0; + size_t edges_it = 0; + for (size_t l = 0, r = 1; r < this->sorted_x.size(); ++l, ++r) + { + const Float64 lx = this->sorted_x[l]; + const Float64 rx = this->sorted_x[r]; + + /** removing edges where right_point.x < lx */ + while (!interesting_edges.empty() && interesting_edges.begin()->r.x() < lx) + { + edge_right[interesting_edges.begin()->edge_id] = l; + interesting_edges.erase(interesting_edges.begin()); + } + + /** adding edges where left_point.x <= rx */ + for (; edges_it < this->all_edges.size() && this->all_edges[edges_it].l.x() <= rx; ++edges_it) + { + interesting_edges.insert(this->all_edges[edges_it]); + edge_left[this->all_edges[edges_it].edge_id] = l; + } + } + + for (size_t i = 0; i != this->all_edges.size(); i++) + { + size_t l = edge_left[i]; + size_t r = edge_right[i]; + if (l == n) + { + continue; + } + + /** adding [l, r) to the segment tree */ + for (l += n, r += n; l < r; l >>= 1, r >>= 1) + { + if (l & 1) + { + this->edges_index_tree[l++].push_back(i); + ++total_index_edges; + } + if (r & 1) + { + this->edges_index_tree[--r].push_back(i); + ++total_index_edges; + } + } + } + +} + +void BucketsSinglePolygonIndex::indexAddRing(const Ring & ring) +{ + for (size_t i = 0, prev = ring.size() - 1; i < ring.size(); prev = i, ++i) + { + Point a = ring[prev]; + Point b = ring[i]; + + // making a.x <= b.x + if (a.x() > b.x()) + { + std::swap(a, b); + } + + if (a.x() == b.x() && a.y() > b.y()) + { + std::swap(a, b); + } + + this->all_edges.emplace_back(Edge{a, b, 0}); + } +} + +bool BucketsSinglePolygonIndex::Edge::compare1(const Edge & a, const Edge & b) +{ + /** comparing left point */ + if (a.l.x() != b.l.x()) + { + return a.l.x() < b.l.x(); + } + if (a.l.y() != b.l.y()) + { + return a.l.y() < b.l.y(); + } + + /** comparing right point */ + if (a.r.x() != b.r.x()) + { + return a.r.x() < b.r.x(); + } + if (a.r.y() != b.r.y()) + { + return a.r.y() < b.r.y(); + } + + return true; +} + +bool BucketsSinglePolygonIndex::Edge::compare2(const Edge & a, const Edge & b) +{ + /** comparing right point */ + if (a.r.x() != b.r.x()) + { + return a.r.x() < b.r.x(); + } + if (a.r.y() != b.r.y()) + { + return a.r.y() < b.r.y(); + } + + /** comparing left point */ + if (a.l.x() != b.l.x()) + { + return a.l.x() < b.l.x(); + } + if (a.l.y() != b.l.y()) + { + return a.l.y() < b.l.y(); + } + + return true; +} + +bool BucketsSinglePolygonIndex::find(const Point & point) const +{ + /** TODO: maybe we should check for vertical line? */ + if (this->sorted_x.size() < 2) + { + return false; + } + + Float64 x = point.x(); + Float64 y = point.y(); + + if (x < this->sorted_x[0] || x > this->sorted_x.back()) + { + return false; + } + + /** point is considired inside when ray down from point crosses odd number of edges */ + std::vector intersections; + intersections.reserve(10); + + size_t pos = std::upper_bound(this->sorted_x.begin() + 1, this->sorted_x.end() - 1, x) - this->sorted_x.begin() - 1; + + size_t cnt = 0; + /** Here we doing: pos += n */ + pos += this->edges_index_tree.size() / 2; + do + { + /** iterating over interesting edges */ + for (size_t edge_id : this->edges_index_tree[pos]) + { + const auto & edge = this->all_edges[edge_id]; + + const Point & l = edge.l; + const Point & r = edge.r; + + /** check for vertical edge, seem like never happens */ + if (l.x() == r.x()) + { + if (l.x() == x && y >= l.y() && y <= r.y()) + { + return true; + } + continue; + } + + /** check if point outside of edge's x bounds */ + if (x < l.x() || x >= r.x()) + { + continue; + } + + Float64 edge_y = l.y() + (r.y() - l.y()) / (r.x() - l.x()) * (x - l.x()); + if (edge_y > y) + { + continue; + } + if (edge_y == y) + { + return true; + } + + ++cnt; + } + pos >>= 1; + } while (pos != 0); + + return cnt % 2 == 1; +} + } diff --git a/dbms/src/Dictionaries/PolygonDictionaryUtils.h b/dbms/src/Dictionaries/PolygonDictionaryUtils.h index ec08778e241..c58896a0236 100644 --- a/dbms/src/Dictionaries/PolygonDictionaryUtils.h +++ b/dbms/src/Dictionaries/PolygonDictionaryUtils.h @@ -155,4 +155,70 @@ private: std::vector> edges_index_tree; }; +/** Generate edge indexes during its construction in + * the following way: sort all polygon's vertexes by x coordinate, and then store all interesting + * polygon edges for each adjacent x coordinates. For each query finds interesting edges and + * iterates over them, finding required polygon. If there is more than one any such polygon may be returned. + */ +class BucketsSinglePolygonIndex +{ +public: + /** A two-dimensional point in Euclidean coordinates. */ + using Point = IPolygonDictionary::Point; + /** A polygon in boost is a an outer ring of points with zero or more cut out inner rings. */ + using Polygon = IPolygonDictionary::Polygon; + /** A ring in boost used for describing the polygons. */ + using Ring = IPolygonDictionary::Ring; + + /** Builds an index by splitting all edges with all points x coordinates. */ + BucketsSinglePolygonIndex(const Polygon & polygon); + + /** Finds polygon id the same way as IPolygonIndex. */ + bool find(const Point & point) const; + +private: + /** Returns unique x coordinates among all points. */ + std::vector uniqueX(const Polygon & polygon); + + /** Builds indexes described above. */ + void indexBuild(const Polygon & polygon); + + /** Auxiliary function for adding ring to index */ + void indexAddRing(const Ring & ring); + + /** Edge describes edge (adjacent points) of any polygon, and contains polygon's id. + * Invariant here is first point has x not greater than second point. + */ + struct Edge + { + Point l; + Point r; + size_t edge_id; + + static bool compare1(const Edge & a, const Edge & b); + static bool compare2(const Edge & a, const Edge & b); + }; + + /** Sorted distinct coordinates of all vertexes. */ + std::vector sorted_x; + std::vector all_edges; + + /** Edges from all polygons, classified by sorted_x borders. + * edges_index[i] stores all interesting edges in range ( sorted_x[i]; sorted_x[i + 1] ] + * That means edges_index.size() + 1 == sorted_x.size() + * + * std::vector> edges_index; + */ + + /** TODO: fix this and previous comments. + * This edges_index_tree stores the same info as edges_index, but more efficiently. + * To do that, edges_index_tree is actually a segment tree of segments between x coordinates. + * edges_index_tree.size() == edges_index.size() * 2 == n * 2, and as in usual segment tree, + * edges_index_tree[i] combines segments edges_index_tree[i*2] and edges_index_tree[i*2+1]. + * Every polygon's edge covers a segment of x coordinates, and can be added to this tree by + * placing it into O(log n) vertexes of this tree. + */ + std::vector> edges_index_tree; +}; + }