ClickHouse/src/Functions/geometryConverters.h

524 lines
14 KiB
C++
Raw Normal View History

2020-06-07 13:42:09 +00:00
#pragma once
2020-06-07 12:33:49 +00:00
#include <Core/ColumnWithTypeAndName.h>
#include <Core/Types.h>
#include <boost/variant.hpp>
#include <boost/geometry/geometries/geometries.hpp>
#include <boost/geometry.hpp>
#include <boost/geometry/geometries/point_xy.hpp>
2020-06-07 14:28:46 +00:00
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/IDataType.h>
2020-06-07 16:47:56 +00:00
#include <IO/WriteHelpers.h>
#include <Interpreters/castColumn.h>
2020-06-07 14:28:46 +00:00
#include <common/logger_useful.h>
2021-01-19 17:16:10 +00:00
namespace DB
{
2020-06-07 12:33:49 +00:00
2020-06-07 14:28:46 +00:00
namespace ErrorCodes
{
2020-06-14 17:04:10 +00:00
extern const int BAD_ARGUMENTS;
2020-06-07 14:28:46 +00:00
}
2020-06-07 12:33:49 +00:00
2021-01-18 23:51:34 +00:00
namespace bg = boost::geometry;
2021-01-19 14:52:53 +00:00
template <typename Point>
using Ring = bg::model::ring<Point>;
template <typename Point>
using Polygon = bg::model::polygon<Point>;
template <typename Point>
using MultiPolygon = bg::model::multi_polygon<Polygon<Point>>;
template <typename Point>
using Geometry = boost::variant<Point, Ring<Point>, Polygon<Point>, MultiPolygon<Point>>;
2021-01-18 23:51:34 +00:00
using CartesianPoint = bg::model::d2::point_xy<Float64>;
2021-01-19 14:52:53 +00:00
using CartesianRing = Ring<CartesianPoint>;
using CartesianPolygon = Polygon<CartesianPoint>;
using CartesianMultiPolygon = MultiPolygon<CartesianPoint>;
using CartesianGeometry = Geometry<CartesianPoint>;
2021-01-18 23:51:34 +00:00
using GeographicPoint = bg::model::point<Float64, 2, bg::cs::geographic<bg::degree>>;
2021-01-19 14:52:53 +00:00
using GeographicRing = Ring<GeographicPoint>;
using GeographicPolygon = Polygon<GeographicPoint>;
using GeographicMultiPolygon = MultiPolygon<GeographicPoint>;
using GeographicGeometry = Geometry<GeographicPoint>;
2021-01-18 23:51:34 +00:00
/**
2021-01-19 17:16:10 +00:00
* Class which takes some boost type and returns a pair of numbers.
* They are (x,y) in case of cartesian coordinated and (lon,lat) in case of geographic.
2021-01-18 23:51:34 +00:00
*/
template <typename PointType>
class PointFromColumnParser
2020-06-07 13:42:09 +00:00
{
public:
2021-01-18 23:51:34 +00:00
PointFromColumnParser(ColumnPtr col_) : col(col_)
2020-06-07 14:28:46 +00:00
{
2020-06-14 17:04:10 +00:00
const auto & tuple = static_cast<const ColumnTuple &>(*col_);
const auto & tuple_columns = tuple.getColumns();
2020-06-07 14:28:46 +00:00
2021-01-18 23:51:34 +00:00
#ifndef NDEBUG
size = tuple.size();
#endif
2020-06-14 17:04:10 +00:00
const auto & x_data = static_cast<const ColumnFloat64 &>(*tuple_columns[0]);
2021-01-18 23:51:34 +00:00
first = x_data.getData().data();
2020-06-07 14:28:46 +00:00
2020-06-14 17:04:10 +00:00
const auto & y_data = static_cast<const ColumnFloat64 &>(*tuple_columns[1]);
2021-01-18 23:51:34 +00:00
second = y_data.getData().data();
2020-06-07 14:28:46 +00:00
}
2021-01-18 23:51:34 +00:00
template<class Q = PointType>
typename std::enable_if_t<std::is_same_v<Q, CartesianPoint>, CartesianGeometry> createContainer() const
{
return CartesianPoint();
}
template<class Q = PointType>
typename std::enable_if_t<std::is_same_v<Q, GeographicPoint>, GeographicGeometry> createContainer() const
{
return GeographicPoint();
}
2020-06-14 17:04:10 +00:00
2021-01-18 23:51:34 +00:00
template<class Q = PointType>
void get(std::enable_if_t<std::is_same_v<Q, CartesianPoint>, CartesianGeometry> & container, size_t i) const
2020-06-07 14:28:46 +00:00
{
2021-01-18 23:51:34 +00:00
#ifndef NDEBUG
assert(i < size);
#endif
get(boost::get<PointType>(container), i);
2020-06-07 14:28:46 +00:00
}
2021-01-18 23:51:34 +00:00
template<class Q = PointType>
void get(std::enable_if_t<std::is_same_v<Q, GeographicPoint>, GeographicGeometry> & container, size_t i) const
2020-06-07 16:04:35 +00:00
{
2021-01-18 23:51:34 +00:00
#ifndef NDEBUG
assert(i < size);
#endif
get(boost::get<PointType>(container), i);
2020-06-07 16:04:35 +00:00
}
2021-01-18 23:51:34 +00:00
void get(PointType & container, size_t i) const
2020-06-07 14:28:46 +00:00
{
2021-01-18 23:51:34 +00:00
#ifndef NDEBUG
assert(i < size);
#endif
boost::geometry::set<0>(container, first[i]);
boost::geometry::set<1>(container, second[i]);
2020-06-07 14:28:46 +00:00
}
2021-01-18 23:51:34 +00:00
2020-06-07 13:42:09 +00:00
private:
2021-01-18 23:51:34 +00:00
/// Note, this is needed to prevent use-after-free.
2020-06-14 17:04:10 +00:00
ColumnPtr col;
2021-01-18 23:51:34 +00:00
#ifndef NDEBUG
size_t size;
#endif
const Float64 * first;
const Float64 * second;
2020-06-07 13:42:09 +00:00
};
2021-01-19 14:52:53 +00:00
template<class Point>
2020-06-07 14:28:46 +00:00
class RingFromColumnParser
{
public:
2020-06-14 17:04:10 +00:00
RingFromColumnParser(ColumnPtr col_)
: offsets(static_cast<const ColumnArray &>(*col_).getOffsets())
2021-01-19 14:52:53 +00:00
, point_parser(static_cast<const ColumnArray &>(*col_).getDataPtr())
2020-06-07 18:26:18 +00:00
{
}
2020-06-07 14:28:46 +00:00
2021-01-19 14:52:53 +00:00
Geometry<Point> createContainer() const
2020-06-07 14:28:46 +00:00
{
2021-01-19 14:52:53 +00:00
return Ring<Point>();
2020-06-07 14:28:46 +00:00
}
2021-01-19 14:52:53 +00:00
void get(Geometry<Point> & container, size_t i) const
2020-06-07 16:04:35 +00:00
{
2021-01-19 14:52:53 +00:00
get(boost::get<Ring<Point>>(container), i);
2020-06-07 16:04:35 +00:00
}
2021-01-19 14:52:53 +00:00
void get(Ring<Point> & container, size_t i) const
2020-06-07 14:28:46 +00:00
{
size_t l = offsets[i - 1];
size_t r = offsets[i];
2020-06-07 14:58:34 +00:00
// reserve extra point for case when polygon is open
container.reserve(r - l + 1);
2020-06-07 14:28:46 +00:00
container.resize(r - l);
2021-01-19 17:16:10 +00:00
for (size_t j = l; j < r; j++)
2021-01-19 14:52:53 +00:00
point_parser.get(container[j - l], j);
2020-06-07 14:58:34 +00:00
// make ring closed
if (!boost::geometry::equals(container[0], container.back()))
{
container.push_back(container[0]);
2020-06-07 14:28:46 +00:00
}
}
private:
const IColumn::Offsets & offsets;
2021-01-19 14:52:53 +00:00
const PointFromColumnParser<Point> point_parser;
2020-06-07 14:28:46 +00:00
};
2021-01-19 14:52:53 +00:00
template<class Point>
2020-06-07 14:58:34 +00:00
class PolygonFromColumnParser
{
public:
2020-06-14 17:04:10 +00:00
PolygonFromColumnParser(ColumnPtr col_)
: offsets(static_cast<const ColumnArray &>(*col_).getOffsets())
2021-01-19 14:52:53 +00:00
, ring_parser(static_cast<const ColumnArray &>(*col_).getDataPtr())
2020-06-07 14:58:34 +00:00
{}
2021-01-19 14:52:53 +00:00
Geometry<Point> createContainer() const
2020-06-07 14:58:34 +00:00
{
2021-01-19 14:52:53 +00:00
return Polygon<Point>();
2020-06-07 14:58:34 +00:00
}
2021-01-19 14:52:53 +00:00
void get(Geometry<Point> & container, size_t i) const
2020-06-07 16:04:35 +00:00
{
2021-01-19 14:52:53 +00:00
get(boost::get<Polygon<Point>>(container), i);
2020-06-07 16:04:35 +00:00
}
2021-01-19 14:52:53 +00:00
void get(Polygon<Point> & container, size_t i) const
2020-06-07 14:58:34 +00:00
{
size_t l = offsets[i - 1];
size_t r = offsets[i];
2021-01-19 14:52:53 +00:00
ring_parser.get(container.outer(), l);
2020-06-07 14:58:34 +00:00
container.inners().resize(r - l - 1);
for (size_t j = l + 1; j < r; j++)
{
2021-01-19 14:52:53 +00:00
ring_parser.get(container.inners()[j - l - 1], j);
2020-06-07 14:58:34 +00:00
}
}
private:
const IColumn::Offsets & offsets;
2021-01-19 14:52:53 +00:00
const RingFromColumnParser<Point> ring_parser;
2020-06-07 14:58:34 +00:00
};
2021-01-19 14:52:53 +00:00
template<class Point>
2020-06-07 14:58:34 +00:00
class MultiPolygonFromColumnParser
{
public:
2020-06-14 17:04:10 +00:00
MultiPolygonFromColumnParser(ColumnPtr col_)
: offsets(static_cast<const ColumnArray &>(*col_).getOffsets())
2021-01-19 14:52:53 +00:00
, polygon_parser(static_cast<const ColumnArray &>(*col_).getDataPtr())
2020-06-07 14:58:34 +00:00
{}
2021-01-19 14:52:53 +00:00
Geometry<Point> createContainer() const
2020-06-07 14:58:34 +00:00
{
2021-01-19 14:52:53 +00:00
return MultiPolygon<Point>();
2020-06-07 14:58:34 +00:00
}
2021-01-19 14:52:53 +00:00
void get(Geometry<Point> & container, size_t i) const
2020-06-07 14:58:34 +00:00
{
2021-01-19 14:52:53 +00:00
auto & multi_polygon = boost::get<MultiPolygon<Point>>(container);
2020-06-07 14:58:34 +00:00
size_t l = offsets[i - 1];
size_t r = offsets[i];
2020-06-07 16:04:35 +00:00
multi_polygon.resize(r - l);
2020-06-07 14:58:34 +00:00
for (size_t j = l; j < r; j++)
{
2021-01-19 14:52:53 +00:00
polygon_parser.get(multi_polygon[j - l], j);
2020-06-07 14:58:34 +00:00
}
}
private:
const IColumn::Offsets & offsets;
2021-01-19 14:52:53 +00:00
const PolygonFromColumnParser<Point> polygon_parser;
2020-06-07 14:58:34 +00:00
};
2021-01-19 14:52:53 +00:00
template <typename Point>
using GeometryFromColumnParser = boost::variant<
PointFromColumnParser<Point>,
RingFromColumnParser<Point>,
PolygonFromColumnParser<Point>,
MultiPolygonFromColumnParser<Point>
2020-06-07 14:58:34 +00:00
>;
2020-06-07 13:42:09 +00:00
2021-01-19 14:52:53 +00:00
template <typename Point>
Geometry<Point> createContainer(const GeometryFromColumnParser<Point> & parser);
2021-01-18 23:51:34 +00:00
2021-01-19 14:52:53 +00:00
extern template Geometry<CartesianPoint> createContainer(const GeometryFromColumnParser<CartesianPoint> & parser);
extern template Geometry<GeographicPoint> createContainer(const GeometryFromColumnParser<GeographicPoint> & parser);
2021-01-18 23:51:34 +00:00
2021-01-19 14:52:53 +00:00
template <typename Point>
void get(const GeometryFromColumnParser<Point> & parser, Geometry<Point> & container, size_t i);
2021-01-18 23:51:34 +00:00
2021-01-19 14:52:53 +00:00
extern template void get(const GeometryFromColumnParser<CartesianPoint> & parser, Geometry<CartesianPoint> & container, size_t i);
extern template void get(const GeometryFromColumnParser<GeographicPoint> & parser, Geometry<GeographicPoint> & container, size_t i);
2021-01-18 23:51:34 +00:00
2021-01-19 14:52:53 +00:00
template <typename Point>
GeometryFromColumnParser<Point> makeGeometryFromColumnParser(const ColumnWithTypeAndName & col);
2021-01-18 23:51:34 +00:00
2021-01-19 14:52:53 +00:00
extern template GeometryFromColumnParser<CartesianPoint> makeGeometryFromColumnParser(const ColumnWithTypeAndName & col);
extern template GeometryFromColumnParser<GeographicPoint> makeGeometryFromColumnParser(const ColumnWithTypeAndName & col);
2020-06-07 16:04:35 +00:00
2021-01-19 14:52:53 +00:00
/// To serialize Geographic or Cartesian point (a pair of numbers in both cases).
template <typename Point>
class PointSerializerVisitor : public boost::static_visitor<void>
2020-06-14 17:04:10 +00:00
{
2020-06-14 17:37:15 +00:00
public:
2021-01-19 14:52:53 +00:00
PointSerializerVisitor()
: first(ColumnFloat64::create())
, second(ColumnFloat64::create())
2020-06-14 17:04:10 +00:00
{}
2021-01-19 14:52:53 +00:00
PointSerializerVisitor(size_t n)
: first(ColumnFloat64::create(n))
, second(ColumnFloat64::create(n))
2020-06-14 17:04:10 +00:00
{}
2021-01-19 14:52:53 +00:00
void operator()(const Point & point)
2020-06-14 17:04:10 +00:00
{
2021-01-19 14:52:53 +00:00
first->insertValue(point.template get<0>());
second->insertValue(point.template get<1>());
2020-06-14 17:04:10 +00:00
}
2021-01-19 14:52:53 +00:00
void operator()(const Ring<Point> & ring)
2020-06-14 17:04:10 +00:00
{
2021-01-19 17:16:10 +00:00
if (ring.size() != 1)
2020-06-14 17:04:10 +00:00
throw Exception("Unable to write ring of size " + toString(ring.size()) + " != 1 to point column", ErrorCodes::BAD_ARGUMENTS);
2021-01-19 17:16:10 +00:00
2020-06-14 17:04:10 +00:00
(*this)(ring[0]);
}
2021-01-19 14:52:53 +00:00
void operator()(const Polygon<Point> & polygon)
2020-06-14 17:04:10 +00:00
{
2021-01-19 17:16:10 +00:00
if (polygon.inners().size() != 0)
2020-06-14 17:04:10 +00:00
throw Exception("Unable to write polygon with holes to point column", ErrorCodes::BAD_ARGUMENTS);
2021-01-19 17:16:10 +00:00
2020-06-14 17:04:10 +00:00
(*this)(polygon.outer());
}
2021-01-19 14:52:53 +00:00
void operator()(const MultiPolygon<Point> & multi_polygon)
2020-06-14 17:04:10 +00:00
{
2021-01-19 17:16:10 +00:00
if (multi_polygon.size() != 1)
2020-06-14 17:04:10 +00:00
throw Exception("Unable to write multi-polygon of size " + toString(multi_polygon.size()) + " != 1 to point column", ErrorCodes::BAD_ARGUMENTS);
2021-01-19 17:16:10 +00:00
2020-06-14 17:04:10 +00:00
(*this)(multi_polygon[0]);
}
ColumnPtr finalize()
{
Columns columns(2);
2021-01-19 14:52:53 +00:00
columns[0] = std::move(first);
columns[1] = std::move(second);
2020-06-14 17:04:10 +00:00
return ColumnTuple::create(columns);
}
2020-06-14 17:37:15 +00:00
private:
2021-01-19 14:52:53 +00:00
ColumnFloat64::MutablePtr first;
ColumnFloat64::MutablePtr second;
2020-06-14 17:37:15 +00:00
};
2021-01-19 14:52:53 +00:00
template <typename Point>
class RingSerializerVisitor : public boost::static_visitor<void>
2020-06-14 17:37:15 +00:00
{
public:
2021-01-19 14:52:53 +00:00
RingSerializerVisitor()
2020-06-14 17:37:15 +00:00
: offsets(ColumnUInt64::create())
{}
2021-01-19 14:52:53 +00:00
RingSerializerVisitor(size_t n)
2020-06-14 17:37:15 +00:00
: offsets(ColumnUInt64::create(n))
{}
2021-01-19 14:52:53 +00:00
void operator()(const Point & point)
2020-06-14 17:37:15 +00:00
{
size++;
offsets->insertValue(size);
pointSerializer(point);
}
2021-01-19 14:52:53 +00:00
void operator()(const Ring<Point> & ring)
2020-06-14 17:37:15 +00:00
{
size += ring.size();
offsets->insertValue(size);
for (const auto & point : ring)
{
pointSerializer(point);
}
}
2021-01-19 14:52:53 +00:00
void operator()(const Polygon<Point> & polygon)
2020-06-14 17:37:15 +00:00
{
2021-01-19 17:16:10 +00:00
if (polygon.inners().size() != 0)
2020-06-14 17:37:15 +00:00
throw Exception("Unable to write polygon with holes to ring column", ErrorCodes::BAD_ARGUMENTS);
2021-01-19 17:16:10 +00:00
2020-06-14 17:37:15 +00:00
(*this)(polygon.outer());
}
2021-01-19 14:52:53 +00:00
void operator()(const MultiPolygon<Point> & multi_polygon)
2020-06-14 17:37:15 +00:00
{
2021-01-19 17:16:10 +00:00
if (multi_polygon.size() != 1)
2020-06-14 17:37:15 +00:00
throw Exception("Unable to write multi-polygon of size " + toString(multi_polygon.size()) + " != 1 to ring column", ErrorCodes::BAD_ARGUMENTS);
2021-01-19 17:16:10 +00:00
2020-06-14 17:37:15 +00:00
(*this)(multi_polygon[0]);
}
ColumnPtr finalize()
{
return ColumnArray::create(pointSerializer.finalize(), std::move(offsets));
}
private:
2020-06-21 15:18:09 +00:00
size_t size = 0;
2021-01-19 14:52:53 +00:00
PointSerializerVisitor<Point> pointSerializer;
2020-06-14 17:37:15 +00:00
ColumnUInt64::MutablePtr offsets;
};
2021-01-19 14:52:53 +00:00
template <typename Point>
class PolygonSerializerVisitor : public boost::static_visitor<void>
2020-06-14 17:37:15 +00:00
{
public:
2021-01-19 14:52:53 +00:00
PolygonSerializerVisitor()
2020-06-14 17:37:15 +00:00
: offsets(ColumnUInt64::create())
{}
2021-01-19 14:52:53 +00:00
PolygonSerializerVisitor(size_t n)
2020-06-14 17:37:15 +00:00
: offsets(ColumnUInt64::create(n))
{}
2021-01-19 14:52:53 +00:00
void operator()(const Point & point)
2020-06-14 17:37:15 +00:00
{
size++;
offsets->insertValue(size);
ringSerializer(point);
}
2021-01-19 14:52:53 +00:00
void operator()(const Ring<Point> & ring)
2020-06-14 17:37:15 +00:00
{
size++;
offsets->insertValue(size);
ringSerializer(ring);
}
2021-01-19 14:52:53 +00:00
void operator()(const Polygon<Point> & polygon)
2020-06-14 17:37:15 +00:00
{
size += 1 + polygon.inners().size();
offsets->insertValue(size);
ringSerializer(polygon.outer());
for (const auto & ring : polygon.inners())
{
ringSerializer(ring);
}
}
2021-01-19 14:52:53 +00:00
void operator()(const MultiPolygon<Point> & multi_polygon)
2020-06-14 17:37:15 +00:00
{
2021-01-19 17:16:10 +00:00
if (multi_polygon.size() != 1)
2020-06-14 17:37:15 +00:00
throw Exception("Unable to write multi-polygon of size " + toString(multi_polygon.size()) + " != 1 to polygon column", ErrorCodes::BAD_ARGUMENTS);
2021-01-19 17:16:10 +00:00
2020-06-14 17:37:15 +00:00
(*this)(multi_polygon[0]);
}
ColumnPtr finalize()
{
return ColumnArray::create(ringSerializer.finalize(), std::move(offsets));
}
private:
2020-06-21 15:18:09 +00:00
size_t size = 0;
2021-01-19 14:52:53 +00:00
RingSerializerVisitor<Point> ringSerializer;
2020-06-14 17:37:15 +00:00
ColumnUInt64::MutablePtr offsets;
};
2021-01-19 14:52:53 +00:00
template <typename Point>
class MultiPolygonSerializerVisitor : public boost::static_visitor<void>
2020-06-14 17:37:15 +00:00
{
public:
2021-01-19 14:52:53 +00:00
MultiPolygonSerializerVisitor()
2020-06-14 17:37:15 +00:00
: offsets(ColumnUInt64::create())
{}
2021-01-19 14:52:53 +00:00
MultiPolygonSerializerVisitor(size_t n)
2020-06-14 17:37:15 +00:00
: offsets(ColumnUInt64::create(n))
{}
2021-01-19 14:52:53 +00:00
void operator()(const Point & point)
2020-06-14 17:37:15 +00:00
{
size++;
offsets->insertValue(size);
polygonSerializer(point);
}
2021-01-19 14:52:53 +00:00
void operator()(const Ring<Point> & ring)
2020-06-14 17:37:15 +00:00
{
size++;
offsets->insertValue(size);
polygonSerializer(ring);
}
2021-01-19 14:52:53 +00:00
void operator()(const Polygon<Point> & polygon)
2020-06-14 17:37:15 +00:00
{
size++;
offsets->insertValue(size);
polygonSerializer(polygon);
}
2021-01-19 14:52:53 +00:00
void operator()(const MultiPolygon<Point> & multi_polygon)
2020-06-14 17:37:15 +00:00
{
size += multi_polygon.size();
2020-06-14 17:37:15 +00:00
offsets->insertValue(size);
for (const auto & polygon : multi_polygon)
{
polygonSerializer(polygon);
}
}
ColumnPtr finalize()
{
return ColumnArray::create(polygonSerializer.finalize(), std::move(offsets));
}
private:
2020-06-21 15:18:09 +00:00
size_t size = 0;
2021-01-19 14:52:53 +00:00
PolygonSerializerVisitor<Point> polygonSerializer;
2020-06-14 17:37:15 +00:00
ColumnUInt64::MutablePtr offsets;
};
2020-06-14 17:04:10 +00:00
template <class Geometry, class Visitor>
class GeometrySerializer
{
public:
void add(const Geometry & geometry)
{
boost::apply_visitor(visitor, geometry);
}
ColumnPtr finalize()
{
return visitor.finalize();
}
private:
Visitor visitor;
2020-06-14 17:37:15 +00:00
};
2020-06-14 17:04:10 +00:00
2021-01-19 14:52:53 +00:00
template <typename Point>
using PointSerializer = GeometrySerializer<Geometry<Point>, PointSerializerVisitor<Point>>;
template <typename Point>
using RingSerializer = GeometrySerializer<Geometry<Point>, RingSerializerVisitor<Point>>;
template <typename Point>
using PolygonSerializer = GeometrySerializer<Geometry<Point>, PolygonSerializerVisitor<Point>>;
template <typename Point>
using MultiPolygonSerializer = GeometrySerializer<Geometry<Point>, MultiPolygonSerializerVisitor<Point>>;
2020-06-14 17:04:10 +00:00
2020-06-07 12:33:49 +00:00
}