Merge pull request #67647 from jacobrec/multilinestring

Added support for reading MultiLineString WKTs
This commit is contained in:
Alexey Milovidov 2024-08-09 21:35:55 +00:00 committed by GitHub
commit a79eff9dcf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 255 additions and 5 deletions

View File

@ -52,6 +52,48 @@ Result:
└───────────────────────────────┴───────────────┘
```
## LineString
`LineString` is a line stored as an array of points: [Array](array.md)([Point](#point)).
**Example**
Query:
```sql
CREATE TABLE geo_linestring (l LineString) ENGINE = Memory();
INSERT INTO geo_linestring VALUES([(0, 0), (10, 0), (10, 10), (0, 10)]);
SELECT l, toTypeName(l) FROM geo_linestring;
```
Result:
``` text
┌─r─────────────────────────────┬─toTypeName(r)─┐
│ [(0,0),(10,0),(10,10),(0,10)] │ LineString │
└───────────────────────────────┴───────────────┘
```
## MultiLineString
`MultiLineString` is multiple lines stored as an array of `LineString`: [Array](array.md)([LineString](#linestring)).
**Example**
Query:
```sql
CREATE TABLE geo_multilinestring (l MultiLineString) ENGINE = Memory();
INSERT INTO geo_multilinestring VALUES([[(0, 0), (10, 0), (10, 10), (0, 10)], [(1, 1), (2, 2), (3, 3)]]);
SELECT l, toTypeName(l) FROM geo_multilinestring;
```
Result:
``` text
┌─l───────────────────────────────────────────────────┬─toTypeName(l)───┐
│ [[(0,0),(10,0),(10,10),(0,10)],[(1,1),(2,2),(3,3)]] │ MultiLineString │
└─────────────────────────────────────────────────────┴─────────────────┘
```
## Polygon
`Polygon` is a polygon with holes stored as an array of rings: [Array](array.md)([Ring](#ring)). First element of outer array is the outer shape of polygon and all the following elements are holes.

View File

@ -6,11 +6,13 @@ title: "Functions for Working with Polygons"
## WKT
Returns a WKT (Well Known Text) geometric object from various [Geo Data Types](../../data-types/geo.md). Supported WKT objects are:
Returns a WKT (Well Known Text) geometric object from various [Geo Data Types](../../data-types/geo.md). Supported WKT objects are:
- POINT
- POLYGON
- MULTIPOLYGON
- LINESTRING
- MULTILINESTRING
**Syntax**
@ -26,12 +28,16 @@ WKT(geo_data)
- [Ring](../../data-types/geo.md#ring)
- [Polygon](../../data-types/geo.md#polygon)
- [MultiPolygon](../../data-types/geo.md#multipolygon)
- [LineString](../../data-types/geo.md#linestring)
- [MultiLineString](../../data-types/geo.md#multilinestring)
**Returned value**
- WKT geometric object `POINT` is returned for a Point.
- WKT geometric object `POLYGON` is returned for a Polygon
- WKT geometric object `MULTIPOLYGON` is returned for a MultiPolygon.
- WKT geometric object `MULTIPOLYGON` is returned for a MultiPolygon.
- WKT geometric object `LINESTRING` is returned for a LineString.
- WKT geometric object `MULTILINESTRING` is returned for a MultiLineString.
**Examples**
@ -84,7 +90,7 @@ SELECT
### Input parameters
String starting with `MULTIPOLYGON`
String starting with `MULTIPOLYGON`
### Returned value
@ -170,6 +176,34 @@ SELECT readWKTLineString('LINESTRING (1 1, 2 2, 3 3, 1 1)');
[(1,1),(2,2),(3,3),(1,1)]
```
## readWKTMultiLineString
Parses a Well-Known Text (WKT) representation of a MultiLineString geometry and returns it in the internal ClickHouse format.
### Syntax
```sql
readWKTMultiLineString(wkt_string)
```
### Arguments
- `wkt_string`: The input WKT string representing a MultiLineString geometry.
### Returned value
The function returns a ClickHouse internal representation of the multilinestring geometry.
### Example
```sql
SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3), (4 4, 5 5, 6 6))');
```
```response
[[(1,1),(2,2),(3,3)],[(4,4),(5,5),(6,6)]]
```
## readWKTRing
Parses a Well-Known Text (WKT) representation of a Polygon geometry and returns a ring (closed linestring) in the internal ClickHouse format.
@ -219,7 +253,7 @@ UInt8, 0 for false, 1 for true
## polygonsDistanceSpherical
Calculates the minimal distance between two points where one point belongs to the first polygon and the second to another polygon. Spherical means that coordinates are interpreted as coordinates on a pure and ideal sphere, which is not true for the Earth. Using this type of coordinate system speeds up execution, but of course is not precise.
Calculates the minimal distance between two points where one point belongs to the first polygon and the second to another polygon. Spherical means that coordinates are interpreted as coordinates on a pure and ideal sphere, which is not true for the Earth. Using this type of coordinate system speeds up execution, but of course is not precise.
### Example

View File

@ -24,6 +24,13 @@ void registerDataTypeDomainGeo(DataTypeFactory & factory)
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeLineStringName>()));
});
// Custom type for multiple lines stored as Array(LineString)
factory.registerSimpleDataTypeCustom("MultiLineString", []
{
return std::make_pair(DataTypeFactory::instance().get("Array(LineString)"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeMultiLineStringName>()));
});
// Custom type for simple polygon without holes stored as Array(Point)
factory.registerSimpleDataTypeCustom("Ring", []
{

View File

@ -17,6 +17,12 @@ public:
DataTypeLineStringName() : DataTypeCustomFixedName("LineString") {}
};
class DataTypeMultiLineStringName : public DataTypeCustomFixedName
{
public:
DataTypeMultiLineStringName() : DataTypeCustomFixedName("MultiLineString") {}
};
class DataTypeRingName : public DataTypeCustomFixedName
{
public:

View File

@ -31,6 +31,9 @@ namespace ErrorCodes
template <typename Point>
using LineString = boost::geometry::model::linestring<Point>;
template <typename Point>
using MultiLineString = boost::geometry::model::multi_linestring<LineString<Point>>;
template <typename Point>
using Ring = boost::geometry::model::ring<Point>;
@ -42,12 +45,14 @@ using MultiPolygon = boost::geometry::model::multi_polygon<Polygon<Point>>;
using CartesianPoint = boost::geometry::model::d2::point_xy<Float64>;
using CartesianLineString = LineString<CartesianPoint>;
using CartesianMultiLineString = MultiLineString<CartesianPoint>;
using CartesianRing = Ring<CartesianPoint>;
using CartesianPolygon = Polygon<CartesianPoint>;
using CartesianMultiPolygon = MultiPolygon<CartesianPoint>;
using SphericalPoint = boost::geometry::model::point<Float64, 2, boost::geometry::cs::spherical_equatorial<boost::geometry::degree>>;
using SphericalLineString = LineString<SphericalPoint>;
using SphericalMultiLineString = MultiLineString<SphericalPoint>;
using SphericalRing = Ring<SphericalPoint>;
using SphericalPolygon = Polygon<SphericalPoint>;
using SphericalMultiPolygon = MultiPolygon<SphericalPoint>;
@ -113,6 +118,28 @@ struct ColumnToLineStringsConverter
}
};
/**
* Class which converts Column with type Array(Array(Tuple(Float64, Float64))) to a vector of boost multi_linestring type.
*/
template <typename Point>
struct ColumnToMultiLineStringsConverter
{
static std::vector<MultiLineString<Point>> convert(ColumnPtr col)
{
const IColumn::Offsets & offsets = typeid_cast<const ColumnArray &>(*col).getOffsets();
size_t prev_offset = 0;
std::vector<MultiLineString<Point>> answer(offsets.size());
auto all_linestrings = ColumnToLineStringsConverter<Point>::convert(typeid_cast<const ColumnArray &>(*col).getDataPtr());
for (size_t iter = 0; iter < offsets.size() && iter < all_linestrings.size(); ++iter)
{
for (size_t linestring_iter = prev_offset; linestring_iter < offsets[iter]; ++linestring_iter)
answer[iter].emplace_back(std::move(all_linestrings[linestring_iter]));
prev_offset = offsets[iter];
}
return answer;
}
};
/**
* Class which converts Column with type Array(Tuple(Float64, Float64)) to a vector of boost ring type.
*/
@ -268,6 +295,38 @@ private:
ColumnUInt64::MutablePtr offsets;
};
/// Serialize Point, MultiLineString as MultiLineString
template <typename Point>
class MultiLineStringSerializer
{
public:
MultiLineStringSerializer()
: offsets(ColumnUInt64::create())
{}
explicit MultiLineStringSerializer(size_t n)
: offsets(ColumnUInt64::create(n))
{}
void add(const MultiLineString<Point> & multilinestring)
{
size += multilinestring.size();
offsets->insertValue(size);
for (const auto & linestring : multilinestring)
linestring_serializer.add(linestring);
}
ColumnPtr finalize()
{
return ColumnArray::create(linestring_serializer.finalize(), std::move(offsets));
}
private:
size_t size = 0;
LineStringSerializer<Point> linestring_serializer;
ColumnUInt64::MutablePtr offsets;
};
/// Almost the same as LineStringSerializer
/// Serialize Point, Ring as Ring
template <typename Point>
@ -411,6 +470,11 @@ static void callOnGeometryDataType(DataTypePtr type, F && f)
else if (factory.get("LineString")->equals(*type) && type->getCustomName() && type->getCustomName()->getName() == "LineString")
return f(ConverterType<ColumnToLineStringsConverter<Point>>());
/// We should take the name into consideration to avoid ambiguity.
/// Because for example both MultiLineString and Polygon are resolved to Array(Array(Point)).
else if (factory.get("MultiLineString")->equals(*type) && type->getCustomName() && type->getCustomName()->getName() == "MultiLineString")
return f(ConverterType<ColumnToMultiLineStringsConverter<Point>>());
/// For backward compatibility if we call this function not on a custom type, we will consider Array(Tuple(Point)) as type Ring.
else if (factory.get("Ring")->equals(*type))
return f(ConverterType<ColumnToRingsConverter<Point>>());

View File

@ -75,6 +75,8 @@ public:
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName());
else if constexpr (std::is_same_v<ColumnToLineStringsConverter<Point>, LeftConverter> || std::is_same_v<ColumnToLineStringsConverter<Point>, RightConverter>)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName());
else if constexpr (std::is_same_v<ColumnToMultiLineStringsConverter<Point>, LeftConverter> || std::is_same_v<ColumnToMultiLineStringsConverter<Point>, RightConverter>)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be MultiLineString", getName());
else
{
auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst());

View File

@ -73,6 +73,8 @@ public:
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName());
else if constexpr (std::is_same_v<ColumnToLineStringsConverter<Point>, LeftConverter> || std::is_same_v<ColumnToLineStringsConverter<Point>, RightConverter>)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName());
else if constexpr (std::is_same_v<ColumnToMultiLineStringsConverter<Point>, LeftConverter> || std::is_same_v<ColumnToMultiLineStringsConverter<Point>, RightConverter>)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be MultiLineString", getName());
else
{
auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst());

View File

@ -73,6 +73,8 @@ public:
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName());
else if constexpr (std::is_same_v<ColumnToLineStringsConverter<Point>, LeftConverter> || std::is_same_v<ColumnToLineStringsConverter<Point>, RightConverter>)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName());
else if constexpr (std::is_same_v<ColumnToMultiLineStringsConverter<Point>, LeftConverter> || std::is_same_v<ColumnToMultiLineStringsConverter<Point>, RightConverter>)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be MultiLineString", getName());
else
{
auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst());

View File

@ -77,6 +77,8 @@ public:
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName());
else if constexpr (std::is_same_v<ColumnToLineStringsConverter<Point>, LeftConverter> || std::is_same_v<ColumnToLineStringsConverter<Point>, RightConverter>)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName());
else if constexpr (std::is_same_v<ColumnToMultiLineStringsConverter<Point>, LeftConverter> || std::is_same_v<ColumnToMultiLineStringsConverter<Point>, RightConverter>)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be MultiLineString", getName());
else
{
auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst());

View File

@ -87,6 +87,11 @@ struct ReadWKTLineStringNameHolder
static constexpr const char * name = "readWKTLineString";
};
struct ReadWKTMultiLineStringNameHolder
{
static constexpr const char * name = "readWKTMultiLineString";
};
struct ReadWKTRingNameHolder
{
static constexpr const char * name = "readWKTRing";
@ -131,6 +136,31 @@ Parses a Well-Known Text (WKT) representation of a LineString geometry and retur
},
.categories{"Unique identifiers"}
});
factory.registerFunction<FunctionReadWKT<DataTypeMultiLineStringName, CartesianMultiLineString, MultiLineStringSerializer<CartesianPoint>, ReadWKTMultiLineStringNameHolder>>(FunctionDocumentation
{
.description=R"(
Parses a Well-Known Text (WKT) representation of a MultiLineString geometry and returns it in the internal ClickHouse format.
)",
.syntax = "readWKTMultiLineString(wkt_string)",
.arguments{
{"wkt_string", "The input WKT string representing a MultiLineString geometry."}
},
.returned_value = "The function returns a ClickHouse internal representation of the multilinestring geometry.",
.examples{
{"first call", "SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3), (4 4, 5 5, 6 6))');", R"(
readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3), (4 4, 5 5, 6 6))')
[[(1,1),(2,2),(3,3)],[(4,4),(5,5),(6,6)]]
)"},
{"second call", "SELECT toTypeName(readWKTLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'));", R"(
toTypeName(readWKTLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'))
MultiLineString
)"},
},
.categories{"Unique identifiers"}
});
factory.registerFunction<FunctionReadWKT<DataTypeRingName, CartesianRing, RingSerializer<CartesianPoint>, ReadWKTRingNameHolder>>();
factory.registerFunction<FunctionReadWKT<DataTypePolygonName, CartesianPolygon, PolygonSerializer<CartesianPoint>, ReadWKTPolygonNameHolder>>();
factory.registerFunction<FunctionReadWKT<DataTypeMultiPolygonName, CartesianMultiPolygon, MultiPolygonSerializer<CartesianPoint>, ReadWKTMultiPolygonNameHolder>>();

View File

@ -0,0 +1,30 @@
-- { echoOn }
SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))');
[[(1,1),(2,2),(3,3),(1,1)]]
SELECT toTypeName(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'));
MultiLineString
SELECT wkt(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'));
MULTILINESTRING((1 1,2 2,3 3,1 1))
SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))');
[[(1,1),(2,2),(3,3),(1,1)],[(1,0),(2,0),(3,0)]]
SELECT toTypeName(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'));
MultiLineString
SELECT wkt(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'));
MULTILINESTRING((1 1,2 2,3 3,1 1),(1 0,2 0,3 0))
-- Native Array(Array(Tuple(Float64, Float64))) is treated as Polygon, not as MultiLineString.
WITH wkt(CAST([[(1, 1), (2, 2), (3, 3), (1, 1)]], 'Array(Array(Tuple(Float64, Float64)))')) as x
SELECT x, toTypeName(x), readWKTPolygon(x) as y, toTypeName(y);
POLYGON((1 1,2 2,3 3,1 1)) String [[(1,1),(2,2),(3,3),(1,1)]] Polygon
-- Non constant tests
DROP TABLE IF EXISTS t;
CREATE TABLE IF NOT EXISTS t (shape Array(Array(Tuple(Float64, Float64))), wkt_string String, ord Float64) Engine = Memory;
INSERT INTO t (ord, shape, wkt_string) VALUES (1, [[(1, 1), (2, 2), (3, 3), (1, 1)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))');
INSERT INTO t (ord, shape, wkt_string) VALUES (2, [[(1, 1), (2, 2), (3, 3), (1, 1)], [(1, 0), (2, 0), (3, 0)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))');
INSERT INTO t (ord, shape, wkt_string) VALUES (3, [[(1, 0), (2, 1), (3, 0), (4, 1), (5, 0), (6, 1), (7, 0), (8, 1), (9, 0), (10, 1)]], 'MULTILINESTRING ((1 0, 2 1, 3 0, 4 1, 5 0, 6 1, 7 0, 8 1, 9 0, 10 1))');
-- Native Array(Array(Tuple(Float64, Float64))) is treated as Polygon, not as MultiLineString.
-- but reading MultiLineString should still return an Array(Array(Tuple(Float64, Float64)))
select wkt(shape), readWKTMultiLineString(wkt_string), readWKTMultiLineString(wkt_string) = shape from t order by ord;
POLYGON((1 1,2 2,3 3,1 1)) [[(1,1),(2,2),(3,3),(1,1)]] 1
POLYGON((1 1,2 2,3 3,1 1),(1 0,2 0,3 0,1 0)) [[(1,1),(2,2),(3,3),(1,1)],[(1,0),(2,0),(3,0)]] 1
POLYGON((1 0,2 1,3 0,4 1,5 0,6 1,7 0,8 1,9 0,10 1,1 0)) [[(1,0),(2,1),(3,0),(4,1),(5,0),(6,1),(7,0),(8,1),(9,0),(10,1)]] 1

View File

@ -0,0 +1,26 @@
-- { echoOn }
SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))');
SELECT toTypeName(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'));
SELECT wkt(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'));
SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))');
SELECT toTypeName(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'));
SELECT wkt(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'));
-- Native Array(Array(Tuple(Float64, Float64))) is treated as Polygon, not as MultiLineString.
WITH wkt(CAST([[(1, 1), (2, 2), (3, 3), (1, 1)]], 'Array(Array(Tuple(Float64, Float64)))')) as x
SELECT x, toTypeName(x), readWKTPolygon(x) as y, toTypeName(y);
-- Non constant tests
DROP TABLE IF EXISTS t;
CREATE TABLE IF NOT EXISTS t (shape Array(Array(Tuple(Float64, Float64))), wkt_string String, ord Float64) Engine = Memory;
INSERT INTO t (ord, shape, wkt_string) VALUES (1, [[(1, 1), (2, 2), (3, 3), (1, 1)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))');
INSERT INTO t (ord, shape, wkt_string) VALUES (2, [[(1, 1), (2, 2), (3, 3), (1, 1)], [(1, 0), (2, 0), (3, 0)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))');
INSERT INTO t (ord, shape, wkt_string) VALUES (3, [[(1, 0), (2, 1), (3, 0), (4, 1), (5, 0), (6, 1), (7, 0), (8, 1), (9, 0), (10, 1)]], 'MULTILINESTRING ((1 0, 2 1, 3 0, 4 1, 5 0, 6 1, 7 0, 8 1, 9 0, 10 1))');
-- Native Array(Array(Tuple(Float64, Float64))) is treated as Polygon, not as MultiLineString.
-- but reading MultiLineString should still return an Array(Array(Tuple(Float64, Float64)))
select wkt(shape), readWKTMultiLineString(wkt_string), readWKTMultiLineString(wkt_string) = shape from t order by ord;

View File

@ -1,4 +1,4 @@
personal_ws-1.1 en 2942
personal_ws-1.1 en 2942
AArch
ACLs
ALTERs
@ -562,6 +562,7 @@ MindsDB
Mongodb
Monotonicity
MsgPack
MultiLineString
MultiPolygon
Multiline
Multiqueries
@ -2090,6 +2091,7 @@ multiSearchFirstPositionUTF
multibyte
multidirectory
multiline
multilinestring
multiplyDecimal
multipolygon
multisearchany
@ -2367,6 +2369,7 @@ rankCorr
rapidjson
rawblob
readWKTLineString
readWKTMultiLineString
readWKTMultiPolygon
readWKTPoint
readWKTPolygon