mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-14 10:22:10 +00:00
294 lines
11 KiB
C++
294 lines
11 KiB
C++
#pragma once
|
|
|
|
#include <base/types.h>
|
|
#include <Common/iota.h>
|
|
#include <Common/ThreadPool.h>
|
|
#include <Poco/Logger.h>
|
|
|
|
#include <boost/geometry.hpp>
|
|
#include <boost/geometry/geometries/box.hpp>
|
|
#include <boost/geometry/geometries/point_xy.hpp>
|
|
#include <boost/geometry/geometries/polygon.hpp>
|
|
|
|
#include "PolygonDictionary.h"
|
|
|
|
#include <numeric>
|
|
|
|
|
|
namespace DB
|
|
{
|
|
|
|
namespace bg = boost::geometry;
|
|
|
|
using Coord = IPolygonDictionary::Coord;
|
|
using Point = IPolygonDictionary::Point;
|
|
using Polygon = IPolygonDictionary::Polygon;
|
|
using Ring = IPolygonDictionary::Ring;
|
|
using Box = bg::model::box<IPolygonDictionary::Point>;
|
|
|
|
/** SlabsPolygonIndex builds index based on shooting ray down from point.
|
|
* When this ray crosses odd number of edges in single polygon, point is considered inside.
|
|
*
|
|
* SlabsPolygonIndex divides plane into vertical slabs, separated by vertical lines going through all points.
|
|
* For each slab, all edges falling in that slab are effectively stored.
|
|
* For each find query, required slab is found with binary search, and result is computed
|
|
* by iterating over all edges in that slab.
|
|
*/
|
|
class SlabsPolygonIndex
|
|
{
|
|
public:
|
|
SlabsPolygonIndex() = default;
|
|
|
|
/** Builds an index by splitting all edges with all points x coordinates. */
|
|
explicit SlabsPolygonIndex(const std::vector<Polygon> & polygons);
|
|
|
|
/** Finds polygon id the same way as IPolygonIndex. */
|
|
bool find(const Point & point, size_t & id) const;
|
|
|
|
/** Edge describes edge (adjacent points) of any polygon, and contains polygon's id.
|
|
* Invariant here is first point has x not greater than second point.
|
|
*/
|
|
struct Edge
|
|
{
|
|
Point l;
|
|
Point r;
|
|
size_t polygon_id;
|
|
size_t edge_id;
|
|
|
|
Coord k;
|
|
Coord b;
|
|
|
|
Edge(const Point & l, const Point & r, size_t polygon_id, size_t edge_id);
|
|
|
|
static bool compareByLeftPoint(const Edge & a, const Edge & b);
|
|
static bool compareByRightPoint(const Edge & a, const Edge & b);
|
|
};
|
|
|
|
/** EdgeLine is optimized version of Edge. */
|
|
struct EdgeLine
|
|
{
|
|
explicit EdgeLine(const Edge & e): k(e.k), b(e.b), polygon_id(e.polygon_id) {}
|
|
Coord k;
|
|
Coord b;
|
|
size_t polygon_id;
|
|
};
|
|
|
|
private:
|
|
/** Returns unique x coordinates among all points */
|
|
static std::vector<Coord> uniqueX(const std::vector<Polygon> & polygons);
|
|
|
|
/** Builds index described above */
|
|
void indexBuild(const std::vector<Polygon> & polygons);
|
|
|
|
/** Auxiliary function for adding ring to the index */
|
|
void indexAddRing(const Ring & ring, size_t polygon_id);
|
|
|
|
Poco::Logger * log;
|
|
|
|
/** Sorted distinct coordinates of all vertices */
|
|
std::vector<Coord> sorted_x;
|
|
std::vector<Edge> all_edges;
|
|
|
|
/** This edges_index_tree stores all slabs with edges efficiently, using segment tree algorithm.
|
|
* edges_index_tree[i] node combines segments from edges_index_tree[i*2] and edges_index_tree[i*2+1].
|
|
* Every polygon's edge covers a segment of x coordinates, and can be added to this tree by
|
|
* placing it into O(log n) nodes of this tree.
|
|
*/
|
|
std::vector<std::vector<EdgeLine>> edges_index_tree;
|
|
};
|
|
|
|
template <class ReturnCell>
|
|
class ICell
|
|
{
|
|
public:
|
|
virtual ~ICell() = default;
|
|
[[nodiscard]] virtual const ReturnCell * find(Coord x, Coord y) const = 0;
|
|
};
|
|
|
|
/** This leaf cell implementation simply stores the indexes of the intersections.
|
|
* As an additional optimization, if a polygon covers the cell completely its index is stored in
|
|
* the first_covered field and all following polygon indexes are discarded,
|
|
* since they won't ever be useful.
|
|
*/
|
|
class FinalCell : public ICell<FinalCell>
|
|
{
|
|
public:
|
|
explicit FinalCell(const std::vector<size_t> & polygon_ids_, const std::vector<Polygon> &, const Box &, bool is_last_covered_);
|
|
std::vector<size_t> polygon_ids;
|
|
size_t first_covered = kNone;
|
|
|
|
static constexpr size_t kNone = -1;
|
|
|
|
private:
|
|
[[nodiscard]] const FinalCell * find(Coord x, Coord y) const override;
|
|
};
|
|
|
|
/** This leaf cell implementation intersects the given polygons with the cell's box and builds a
|
|
* slab index for the result.
|
|
* Since the intersections can produce multiple polygons a vector of corresponding ids is stored.
|
|
* If the slab index returned the id x for a query the correct polygon id is corresponding_ids[x].
|
|
* As an additional optimization, if a polygon covers the cell completely its index stored in the
|
|
* first_covered field and all following polygons are not used for building the slab index.
|
|
*/
|
|
class FinalCellWithSlabs : public ICell<FinalCellWithSlabs>
|
|
{
|
|
public:
|
|
explicit FinalCellWithSlabs(const std::vector<size_t> & polygon_ids_, const std::vector<Polygon> & polygons_, const Box & box_, bool is_last_covered_);
|
|
|
|
SlabsPolygonIndex index;
|
|
std::vector<size_t> corresponding_ids;
|
|
size_t first_covered = kNone;
|
|
|
|
static constexpr size_t kNone = -1;
|
|
|
|
private:
|
|
[[nodiscard]] const FinalCellWithSlabs * find(Coord x, Coord y) const override;
|
|
};
|
|
|
|
template <class ReturnCell>
|
|
class DividedCell : public ICell<ReturnCell>
|
|
{
|
|
public:
|
|
explicit DividedCell(std::vector<std::unique_ptr<ICell<ReturnCell>>> children_): children(std::move(children_)) {}
|
|
|
|
[[nodiscard]] const ReturnCell * find(Coord x, Coord y) const override
|
|
{
|
|
auto x_ratio = x * kSplit;
|
|
auto y_ratio = y * kSplit;
|
|
auto x_bin = static_cast<int>(x_ratio);
|
|
auto y_bin = static_cast<int>(y_ratio);
|
|
return children[y_bin + x_bin * kSplit]->find(x_ratio - x_bin, y_ratio - y_bin);
|
|
}
|
|
|
|
/** When a cell is split every side is split into kSplit pieces producing kSplit * kSplit equal smaller cells. */
|
|
static constexpr size_t kSplit = 4;
|
|
|
|
private:
|
|
std::vector<std::unique_ptr<ICell<ReturnCell>>> children;
|
|
};
|
|
|
|
/** A recursively built grid containing information about polygons intersecting each cell.
|
|
* The starting cell is the bounding box of the given polygons which are passed by reference.
|
|
* For every cell a vector of indices of intersecting polygons is calculated, in the order originally provided upon
|
|
* construction. A cell is recursively split into kSplit * kSplit equal cells up to the point where the cell
|
|
* intersects a small enough number of polygons or the maximum allowed depth is exceeded.
|
|
* Both of these parameters are set in the constructor.
|
|
* Once these conditions are fulfilled some index is built and stored in the leaf cells.
|
|
* The ReturnCell class passed in the template parameter is responsible for this.
|
|
*/
|
|
template <class ReturnCell>
|
|
class GridRoot : public ICell<ReturnCell>
|
|
{
|
|
public:
|
|
GridRoot(size_t min_intersections_, size_t max_depth_, const std::vector<Polygon> & polygons_):
|
|
k_min_intersections(min_intersections_), k_max_depth(max_depth_), polygons(polygons_)
|
|
{
|
|
setBoundingBox();
|
|
std::vector<size_t> order(polygons.size());
|
|
iota(order.data(), order.size(), size_t(0));
|
|
root = makeCell(min_x, min_y, max_x, max_y, order);
|
|
}
|
|
|
|
/** Retrieves the cell containing a given point.
|
|
* A null pointer is returned when the point falls outside the grid.
|
|
*/
|
|
[[nodiscard]] const ReturnCell * find(Coord x, Coord y) const override
|
|
{
|
|
if (x < min_x || x >= max_x)
|
|
return nullptr;
|
|
if (y < min_y || y >= max_y)
|
|
return nullptr;
|
|
return root->find((x - min_x) / (max_x - min_x), (y - min_y) / (max_y - min_y));
|
|
}
|
|
|
|
/** Until this depth is reached each row of cells is calculated concurrently in a new thread. */
|
|
static constexpr size_t kMultiProcessingDepth = 2;
|
|
|
|
/** A constant used to avoid errors with points falling on the boundaries of cells. */
|
|
static constexpr Coord kEps = 1e-4f;
|
|
|
|
private:
|
|
std::unique_ptr<ICell<ReturnCell>> root = nullptr;
|
|
Coord min_x = 0, min_y = 0;
|
|
Coord max_x = 0, max_y = 0;
|
|
const size_t k_min_intersections;
|
|
const size_t k_max_depth;
|
|
|
|
const std::vector<Polygon> & polygons;
|
|
|
|
std::unique_ptr<ICell<ReturnCell>> makeCell(Coord current_min_x, Coord current_min_y, Coord current_max_x, Coord current_max_y, std::vector<size_t> possible_ids, size_t depth = 0)
|
|
{
|
|
auto current_box = Box(Point(current_min_x, current_min_y), Point(current_max_x, current_max_y));
|
|
Polygon tmp_poly;
|
|
bg::convert(current_box, tmp_poly);
|
|
std::erase_if(possible_ids, [&](const auto id)
|
|
{
|
|
return !bg::intersects(current_box, polygons[id]);
|
|
});
|
|
int covered = 0;
|
|
#ifndef __clang_analyzer__ /// Triggers a warning in boost geometry.
|
|
auto it = std::find_if(possible_ids.begin(), possible_ids.end(), [&](const auto id)
|
|
{
|
|
return bg::covered_by(tmp_poly, polygons[id]);
|
|
});
|
|
if (it != possible_ids.end())
|
|
{
|
|
possible_ids.erase(it + 1, possible_ids.end());
|
|
covered = 1;
|
|
}
|
|
#endif
|
|
size_t intersections = possible_ids.size() - covered;
|
|
if (intersections <= k_min_intersections || depth++ == k_max_depth)
|
|
return std::make_unique<ReturnCell>(possible_ids, polygons, current_box, covered);
|
|
auto x_shift = (current_max_x - current_min_x) / DividedCell<ReturnCell>::kSplit;
|
|
auto y_shift = (current_max_y - current_min_y) / DividedCell<ReturnCell>::kSplit;
|
|
std::vector<std::unique_ptr<ICell<ReturnCell>>> children;
|
|
children.resize(DividedCell<ReturnCell>::kSplit * DividedCell<ReturnCell>::kSplit);
|
|
std::vector<ThreadFromGlobalPool> threads{};
|
|
for (size_t i = 0; i < DividedCell<ReturnCell>::kSplit; current_min_x += x_shift, ++i)
|
|
{
|
|
auto handle_row = [this, &children, &y_shift, &x_shift, &possible_ids, &depth, i](Coord x, Coord y)
|
|
{
|
|
for (size_t j = 0; j < DividedCell<ReturnCell>::kSplit; y += y_shift, ++j)
|
|
{
|
|
children[i * DividedCell<ReturnCell>::kSplit + j] = makeCell(x, y, x + x_shift, y + y_shift, possible_ids, depth);
|
|
}
|
|
};
|
|
if (depth <= kMultiProcessingDepth)
|
|
threads.emplace_back(handle_row, current_min_x, current_min_y);
|
|
else
|
|
handle_row(current_min_x, current_min_y);
|
|
}
|
|
for (auto & thread : threads)
|
|
thread.join();
|
|
return std::make_unique<DividedCell<ReturnCell>>(std::move(children));
|
|
}
|
|
|
|
void setBoundingBox()
|
|
{
|
|
bool first = true;
|
|
std::for_each(polygons.begin(), polygons.end(), [&](const auto & polygon)
|
|
{
|
|
bg::for_each_point(polygon, [&](const Point & point)
|
|
{
|
|
auto x = point.x();
|
|
auto y = point.y();
|
|
if (first || x < min_x)
|
|
min_x = x;
|
|
if (first || x > max_x)
|
|
max_x = x;
|
|
if (first || y < min_y)
|
|
min_y = y;
|
|
if (first || y > max_y)
|
|
max_y = y;
|
|
if (first)
|
|
first = false;
|
|
});
|
|
});
|
|
max_x += kEps;
|
|
max_y += kEps;
|
|
}
|
|
};
|
|
|
|
}
|