diff --git a/dbms/src/Dictionaries/PolygonDictionaryImplementations.cpp b/dbms/src/Dictionaries/PolygonDictionaryImplementations.cpp index c85a1bf3ca2..7e5067238e6 100644 --- a/dbms/src/Dictionaries/PolygonDictionaryImplementations.cpp +++ b/dbms/src/Dictionaries/PolygonDictionaryImplementations.cpp @@ -105,12 +105,10 @@ SmartPolygonDictionary::SmartPolygonDictionary( : IPolygonDictionary(database_, name_, dict_struct_, std::move(source_ptr_), dict_lifetime_, input_type_, point_type_), grid(kMinIntersections, kMaxDepth, polygons) { - auto log = &Logger::get("BucketsPolygonIndex"); buckets.reserve(polygons.size()); for (size_t i = 0; i < polygons.size(); ++i) { - buckets.emplace_back(std::vector{polygons[i]}); - LOG_TRACE(log, "Finished polygon" << i); + buckets.emplace_back(polygons[i]); } } @@ -154,8 +152,7 @@ bool SmartPolygonDictionary::find(const Point & point, size_t & id) const for (size_t i = 0; i < (cell->polygon_ids).size(); ++i) { const auto & candidate = (cell->polygon_ids)[i]; - size_t unused = 0; - if ((cell->is_covered_by)[i] || buckets[candidate].find(point, unused)) + if ((cell->is_covered_by)[i] || buckets[candidate].find(point)) { found = true; id = candidate; diff --git a/dbms/src/Dictionaries/PolygonDictionaryImplementations.h b/dbms/src/Dictionaries/PolygonDictionaryImplementations.h index 8808581bcca..09828f18eaf 100644 --- a/dbms/src/Dictionaries/PolygonDictionaryImplementations.h +++ b/dbms/src/Dictionaries/PolygonDictionaryImplementations.h @@ -76,7 +76,7 @@ public: private: bool find(const Point & point, size_t & id) const override; - std::vector buckets; + std::vector buckets; GridRoot grid; static constexpr size_t kMinIntersections = 1; static constexpr size_t kMaxDepth = 5; diff --git a/dbms/src/Dictionaries/PolygonDictionaryUtils.cpp b/dbms/src/Dictionaries/PolygonDictionaryUtils.cpp index dccfef1eeb9..49b807fe7ba 100644 --- a/dbms/src/Dictionaries/PolygonDictionaryUtils.cpp +++ b/dbms/src/Dictionaries/PolygonDictionaryUtils.cpp @@ -420,4 +420,246 @@ bool BucketsPolygonIndex::find(const Point & point, size_t & id) const return found; } +BucketsSinglePolygonIndex::BucketsSinglePolygonIndex( + const Polygon & polygon) + : sorted_x(uniqueX(polygon)) +{ + indexBuild(polygon); +} + +std::vector BucketsSinglePolygonIndex::uniqueX(const Polygon & polygon) +{ + std::vector all_x; + + for (auto & point : polygon.outer()) + { + all_x.push_back(point.x()); + } + + for (auto & inner : polygon.inners()) + { + for (auto & point : inner) + { + all_x.push_back(point.x()); + } + } + + /** making all_x sorted and distinct */ + std::sort(all_x.begin(), all_x.end()); + all_x.erase(std::unique(all_x.begin(), all_x.end()), all_x.end()); + + return all_x; +} + +void BucketsSinglePolygonIndex::indexBuild(const Polygon & polygon) +{ + indexAddRing(polygon.outer()); + + for (auto & inner : polygon.inners()) + { + indexAddRing(inner); + } + + /** sorting edges consisting of (left_point, right_point, polygon_id) in that order */ + std::sort(this->all_edges.begin(), this->all_edges.end(), Edge::compare1); + for (size_t i = 0; i != this->all_edges.size(); ++i) + { + this->all_edges[i].edge_id = i; + } + + /** total number of edges */ + size_t m = this->all_edges.size(); + + /** using custom comparator for fetching edges in right_point order, like in scanline */ + auto cmp = [](const Edge & a, const Edge & b) + { + return Edge::compare2(a, b); + }; + std::set interesting_edges(cmp); + + /** size of index (number of different x coordinates) */ + size_t n = 0; + if (!this->sorted_x.empty()) + { + n = this->sorted_x.size() - 1; + } + this->edges_index_tree.resize(2 * n); + + /** Map of interesting edge ids to the index of left x, the index of right x */ + std::vector edge_left(m, n), edge_right(m, n); + + size_t total_index_edges = 0; + size_t edges_it = 0; + for (size_t l = 0, r = 1; r < this->sorted_x.size(); ++l, ++r) + { + const Float64 lx = this->sorted_x[l]; + const Float64 rx = this->sorted_x[r]; + + /** removing edges where right_point.x < lx */ + while (!interesting_edges.empty() && interesting_edges.begin()->r.x() < lx) + { + edge_right[interesting_edges.begin()->edge_id] = l; + interesting_edges.erase(interesting_edges.begin()); + } + + /** adding edges where left_point.x <= rx */ + for (; edges_it < this->all_edges.size() && this->all_edges[edges_it].l.x() <= rx; ++edges_it) + { + interesting_edges.insert(this->all_edges[edges_it]); + edge_left[this->all_edges[edges_it].edge_id] = l; + } + } + + for (size_t i = 0; i != this->all_edges.size(); i++) + { + size_t l = edge_left[i]; + size_t r = edge_right[i]; + if (l == n) + { + continue; + } + + /** adding [l, r) to the segment tree */ + for (l += n, r += n; l < r; l >>= 1, r >>= 1) + { + if (l & 1) + { + this->edges_index_tree[l++].emplace_back(all_edges[i]); + ++total_index_edges; + } + if (r & 1) + { + this->edges_index_tree[--r].emplace_back(all_edges[i]); + ++total_index_edges; + } + } + } + +} + +void BucketsSinglePolygonIndex::indexAddRing(const Ring & ring) +{ + for (size_t i = 0, prev = ring.size() - 1; i < ring.size(); prev = i, ++i) + { + Point a = ring[prev]; + Point b = ring[i]; + + // making a.x <= b.x + if (a.x() > b.x()) + { + std::swap(a, b); + } + + if (a.x() == b.x() && a.y() > b.y()) + { + std::swap(a, b); + } + + this->all_edges.emplace_back(Edge{a, b, 0}); + } +} + +bool BucketsSinglePolygonIndex::Edge::compare1(const Edge & a, const Edge & b) +{ + /** comparing left point */ + if (a.l.x() != b.l.x()) + { + return a.l.x() < b.l.x(); + } + if (a.l.y() != b.l.y()) + { + return a.l.y() < b.l.y(); + } + + /** comparing right point */ + if (a.r.x() != b.r.x()) + { + return a.r.x() < b.r.x(); + } + return a.r.y() < b.r.y(); +} + +bool BucketsSinglePolygonIndex::Edge::compare2(const Edge & a, const Edge & b) +{ + /** comparing right point */ + if (a.r.x() != b.r.x()) + { + return a.r.x() < b.r.x(); + } + if (a.r.y() != b.r.y()) + { + return a.r.y() < b.r.y(); + } + + /** comparing left point */ + if (a.l.x() != b.l.x()) + { + return a.l.x() < b.l.x(); + } + return a.l.y() < b.l.y(); +} + +bool BucketsSinglePolygonIndex::find(const Point & point) const +{ + /** TODO: maybe we should check for vertical line? */ + if (this->sorted_x.size() < 2) + { + return false; + } + + Float64 x = point.x(); + Float64 y = point.y(); + + if (x < this->sorted_x[0] || x > this->sorted_x.back()) + { + return false; + } + + size_t pos = std::upper_bound(this->sorted_x.begin() + 1, this->sorted_x.end() - 1, x) - this->sorted_x.begin() - 1; + + size_t cnt = 0; + /** Here we doing: pos += n */ + pos += this->edges_index_tree.size() / 2; + do + { + /** iterating over interesting edges */ + for (const auto & edge : this->edges_index_tree[pos]) + { + const Point & l = edge.l; + const Point & r = edge.r; + + /** check for vertical edge, seem like never happens */ + if (l.x() == r.x()) + { + if (l.x() == x && y >= l.y() && y <= r.y()) + { + return true; + } + continue; + } + + /** check if point outside of edge's x bounds */ + if (x < l.x() || x >= r.x()) + { + continue; + } + + Float64 edge_y = l.y() + (r.y() - l.y()) / (r.x() - l.x()) * (x - l.x()); + if (edge_y > y) + { + continue; + } + if (edge_y == y) + { + return true; + } + + ++cnt; + } + pos >>= 1; + } while (pos != 0); + + return cnt % 2 == 1; +} + } diff --git a/dbms/src/Dictionaries/PolygonDictionaryUtils.h b/dbms/src/Dictionaries/PolygonDictionaryUtils.h index 4bccc98eef9..ddc17c28894 100644 --- a/dbms/src/Dictionaries/PolygonDictionaryUtils.h +++ b/dbms/src/Dictionaries/PolygonDictionaryUtils.h @@ -155,4 +155,77 @@ private: std::vector> edges_index_tree; }; +/** Generate edge indexes during its construction in + * the following way: sort all polygon's vertexes by x coordinate, and then store all interesting + * polygon edges for each adjacent x coordinates. For each query finds interesting edges and + * iterates over them, finding required polygon. If there is more than one any such polygon may be returned. + */ +class BucketsSinglePolygonIndex +{ +public: + /** A two-dimensional point in Euclidean coordinates. */ + using Point = IPolygonDictionary::Point; + /** A polygon in boost is a an outer ring of points with zero or more cut out inner rings. */ + using Polygon = IPolygonDictionary::Polygon; + /** A ring in boost used for describing the polygons. */ + using Ring = IPolygonDictionary::Ring; + + /** Builds an index by splitting all edges with all points x coordinates. */ + BucketsSinglePolygonIndex(const Polygon & polygon); + + /** Finds polygon id the same way as IPolygonIndex. */ + bool find(const Point & point) const; + +private: + /** Returns unique x coordinates among all points. */ + std::vector uniqueX(const Polygon & polygon); + + /** Builds indexes described above. */ + void indexBuild(const Polygon & polygon); + + /** Auxiliary function for adding ring to index */ + void indexAddRing(const Ring & ring); + + /** Edge describes edge (adjacent points) of any polygon, and contains polygon's id. + * Invariant here is first point has x not greater than second point. + */ + struct Edge + { + Point l; + Point r; + size_t edge_id; + + static bool compare1(const Edge & a, const Edge & b); + static bool compare2(const Edge & a, const Edge & b); + }; + + struct EdgeNoId + { + explicit EdgeNoId(const Edge & e): l(e.l), r(e.r) {} + Point l; + Point r; + }; + + /** Sorted distinct coordinates of all vertexes. */ + std::vector sorted_x; + std::vector all_edges; + + /** Edges from all polygons, classified by sorted_x borders. + * edges_index[i] stores all interesting edges in range ( sorted_x[i]; sorted_x[i + 1] ] + * That means edges_index.size() + 1 == sorted_x.size() + * + * std::vector> edges_index; + */ + + /** TODO: fix this and previous comments. + * This edges_index_tree stores the same info as edges_index, but more efficiently. + * To do that, edges_index_tree is actually a segment tree of segments between x coordinates. + * edges_index_tree.size() == edges_index.size() * 2 == n * 2, and as in usual segment tree, + * edges_index_tree[i] combines segments edges_index_tree[i*2] and edges_index_tree[i*2+1]. + * Every polygon's edge covers a segment of x coordinates, and can be added to this tree by + * placing it into O(log n) vertexes of this tree. + */ + std::vector> edges_index_tree; +}; + }