Address latest comments

This commit is contained in:
achulkov2 2020-02-05 19:38:03 +03:00
parent 1f97500756
commit 0ae0e03d52

View File

@ -7,6 +7,8 @@
#include "DictionaryBlockInputStream.h" #include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h" #include "DictionaryFactory.h"
#include <numeric>
namespace DB namespace DB
{ {
@ -445,23 +447,52 @@ void IPolygonDictionary::getItemsImpl(
namespace namespace
{ {
inline void makeDifferences(IColumn::Offsets & values)
{
for (size_t i = 1; i < values.size(); ++i)
values[i] -= values[i - 1];
}
struct Offset struct Offset
{ {
Offset() = default; Offset() = default;
IColumn::Offsets ring_sizes; IColumn::Offsets ring_offsets;
IColumn::Offsets polygon_sizes; IColumn::Offsets polygon_offsets;
IColumn::Offsets multi_polygon_sizes; IColumn::Offsets multi_polygon_offsets;
IColumn::Offset points_added = 0;
IColumn::Offset current_ring = 0; IColumn::Offset current_ring = 0;
IColumn::Offset current_polygon = 0; IColumn::Offset current_polygon = 0;
IColumn::Offset current_multi_polygon = 0; IColumn::Offset current_multi_polygon = 0;
Offset& operator++()
{
++points_added;
if (points_added <= ring_offsets[current_ring])
return *this;
++current_ring;
if (current_ring < polygon_offsets[current_polygon])
return *this;
++current_polygon;
if (current_polygon < multi_polygon_offsets[current_multi_polygon])
return *this;
++current_multi_polygon;
return *this;
}
bool atLastPolygonOfMultiPolygon() { return current_polygon + 1 == multi_polygon_offsets[current_multi_polygon]; }
bool atLastRingOfPolygon() { return current_ring + 1 == polygon_offsets[current_polygon]; }
bool atLastPointOfRing() { return points_added == ring_offsets[current_ring]; }
bool allRingsHaveAPositiveArea()
{
IColumn::Offset prev_offset = 0;
for (const auto offset : ring_offsets)
{
if (offset - prev_offset < 3)
return false;
prev_offset = offset;
}
return true;
}
}; };
struct Data struct Data
@ -474,72 +505,32 @@ struct Data
dest.emplace_back(); dest.emplace_back();
ids.push_back((ids.empty() ? 0 : ids.back() + new_multi_polygon)); ids.push_back((ids.empty() ? 0 : ids.back() + new_multi_polygon));
} }
void addPoint(Float64 x, Float64 y)
{
auto & last_polygon = dest.back();
auto & last_ring = (last_polygon.inners().empty() ? last_polygon.outer() : last_polygon.inners().back());
last_ring.emplace_back(x, y);
}
}; };
void addNewMultiPolygon(Data &, Offset & offset) void addNewPoint(Float64 x, Float64 y, Data & data, Offset & offset)
{ {
++offset.current_multi_polygon; if (offset.atLastPointOfRing())
if (offset.current_multi_polygon == offset.multi_polygon_sizes.size())
throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS};
}
void addNewPolygon(Data & data, Offset & offset)
{
++offset.current_polygon;
if (offset.current_polygon == offset.polygon_sizes.size())
throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS};
if (--offset.multi_polygon_sizes[offset.current_multi_polygon] == 0)
{ {
addNewMultiPolygon(data, offset); if (offset.atLastRingOfPolygon())
data.addPolygon(true); data.addPolygon(offset.atLastPolygonOfMultiPolygon());
else
{
/** An outer ring is added automatically with a new polygon, thus we need the else statement here.
* This also implies that if we are at this point we have to add an inner ring.
*/
auto & last_polygon = data.dest.back();
last_polygon.inners().emplace_back();
}
} }
else data.addPoint(x, y);
data.addPolygon(); ++offset;
}
void addNewRing(Data & data, Offset & offset)
{
++offset.current_ring;
if (offset.current_ring == offset.ring_sizes.size())
throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS};
if (--offset.polygon_sizes[offset.current_polygon] == 0)
addNewPolygon(data, offset);
else
{
/** An outer ring is added automatically with a new polygon, thus we need the else statement here.
* This also implies that if we are at this point we have to add an inner ring.
*/
auto & last_polygon = data.dest.back();
last_polygon.inners().emplace_back();
}
}
void addNewPointFromMultiPolygon(const IPolygonDictionary::Point & pt, Data & data, Offset & offset)
{
if (offset.ring_sizes[offset.current_ring] == 0)
addNewRing(data, offset);
auto & last_polygon = data.dest.back();
auto & last_ring = (last_polygon.inners().empty() ? last_polygon.outer() : last_polygon.inners().back());
last_ring.push_back(pt);
--offset.ring_sizes[offset.current_ring];
}
void addNewPointFromSimplePolygon(const IPolygonDictionary::Point & pt, Data & data, Offset & offset)
{
if (offset.polygon_sizes[offset.current_polygon] == 0)
{
++offset.current_polygon;
if (offset.current_polygon == offset.polygon_sizes.size())
throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS};
}
auto & last_polygon = data.dest.back();
last_polygon.outer().push_back(pt);
--offset.polygon_sizes[offset.current_polygon];
} }
const IColumn * unrollMultiPolygons(const ColumnPtr & column, Offset & offset) const IColumn * unrollMultiPolygons(const ColumnPtr & column, Offset & offset)
@ -547,20 +538,17 @@ const IColumn * unrollMultiPolygons(const ColumnPtr & column, Offset & offset)
const auto ptr_multi_polygons = typeid_cast<const ColumnArray*>(column.get()); const auto ptr_multi_polygons = typeid_cast<const ColumnArray*>(column.get());
if (!ptr_multi_polygons) if (!ptr_multi_polygons)
throw Exception{"Expected a column containing arrays of polygons", ErrorCodes::TYPE_MISMATCH}; throw Exception{"Expected a column containing arrays of polygons", ErrorCodes::TYPE_MISMATCH};
offset.multi_polygon_sizes.assign(ptr_multi_polygons->getOffsets()); offset.multi_polygon_offsets.assign(ptr_multi_polygons->getOffsets());
makeDifferences(offset.multi_polygon_sizes);
const auto ptr_polygons = typeid_cast<const ColumnArray*>(&ptr_multi_polygons->getData()); const auto ptr_polygons = typeid_cast<const ColumnArray*>(&ptr_multi_polygons->getData());
if (!ptr_polygons) if (!ptr_polygons)
throw Exception{"Expected a column containing arrays of rings when reading polygons", ErrorCodes::TYPE_MISMATCH}; throw Exception{"Expected a column containing arrays of rings when reading polygons", ErrorCodes::TYPE_MISMATCH};
offset.polygon_sizes.assign(ptr_polygons->getOffsets()); offset.polygon_offsets.assign(ptr_polygons->getOffsets());
makeDifferences(offset.polygon_sizes);
const auto ptr_rings = typeid_cast<const ColumnArray*>(&ptr_polygons->getData()); const auto ptr_rings = typeid_cast<const ColumnArray*>(&ptr_polygons->getData());
if (!ptr_rings) if (!ptr_rings)
throw Exception{"Expected a column containing arrays of points when reading rings", ErrorCodes::TYPE_MISMATCH}; throw Exception{"Expected a column containing arrays of points when reading rings", ErrorCodes::TYPE_MISMATCH};
offset.ring_sizes.assign(ptr_rings->getOffsets()); offset.ring_offsets.assign(ptr_rings->getOffsets());
makeDifferences(offset.ring_sizes);
return ptr_rings->getDataPtr().get(); return ptr_rings->getDataPtr().get();
} }
@ -570,11 +558,14 @@ const IColumn * unrollSimplePolygons(const ColumnPtr & column, Offset & offset)
const auto ptr_polygons = typeid_cast<const ColumnArray*>(column.get()); const auto ptr_polygons = typeid_cast<const ColumnArray*>(column.get());
if (!ptr_polygons) if (!ptr_polygons)
throw Exception{"Expected a column containing arrays of points", ErrorCodes::TYPE_MISMATCH}; throw Exception{"Expected a column containing arrays of points", ErrorCodes::TYPE_MISMATCH};
offset.polygon_sizes.assign(ptr_polygons->getOffsets()); offset.ring_offsets.assign(ptr_polygons->getOffsets());
std::iota(offset.polygon_offsets.begin(), offset.polygon_offsets.end(), 1);
offset.multi_polygon_offsets.assign(offset.polygon_offsets);
return ptr_polygons->getDataPtr().get(); return ptr_polygons->getDataPtr().get();
} }
void getPointsReprByArrays(const IColumn * column, std::vector<IPolygonDictionary::Point> & dest) void handlePointsReprByArrays(const IColumn * column, Data & data, Offset & offset)
{ {
const auto ptr_points = typeid_cast<const ColumnArray*>(column); const auto ptr_points = typeid_cast<const ColumnArray*>(column);
const auto ptr_coord = typeid_cast<const ColumnVector<Float64>*>(&ptr_points->getData()); const auto ptr_coord = typeid_cast<const ColumnVector<Float64>*>(&ptr_points->getData());
@ -587,11 +578,11 @@ void getPointsReprByArrays(const IColumn * column, std::vector<IPolygonDictionar
if (offsets[i] - prev_offset != 2) if (offsets[i] - prev_offset != 2)
throw Exception{"All points should be two-dimensional", ErrorCodes::BAD_ARGUMENTS}; throw Exception{"All points should be two-dimensional", ErrorCodes::BAD_ARGUMENTS};
prev_offset = offsets[i]; prev_offset = offsets[i];
dest.emplace_back(ptr_coord->getElement(2 * i), ptr_coord->getElement(2 * i + 1)); addNewPoint(ptr_coord->getElement(2 * i), ptr_coord->getElement(2 * i + 1), data, offset);
} }
} }
void getPointReprByTuples(const IColumn * column, std::vector<IPolygonDictionary::Point> & dest) void handlePointsReprByTuples(const IColumn * column, Data & data, Offset & offset)
{ {
const auto ptr_points = typeid_cast<const ColumnTuple*>(column); const auto ptr_points = typeid_cast<const ColumnTuple*>(column);
if (!ptr_points) if (!ptr_points)
@ -604,7 +595,7 @@ void getPointReprByTuples(const IColumn * column, std::vector<IPolygonDictionary
throw Exception{"Expected coordinates to be of type Float64", ErrorCodes::TYPE_MISMATCH}; throw Exception{"Expected coordinates to be of type Float64", ErrorCodes::TYPE_MISMATCH};
for (size_t i = 0; i < column_x->size(); ++i) for (size_t i = 0; i < column_x->size(); ++i)
{ {
dest.emplace_back(column_x->getElement(i), column_y->getElement(i)); addNewPoint(column_x->getElement(i), column_y->getElement(i), data, offset);
} }
} }
@ -626,31 +617,22 @@ void IPolygonDictionary::extractPolygons(const ColumnPtr &column)
break; break;
} }
if (!offset.allRingsHaveAPositiveArea())
throw Exception{"Every ring included in a polygon or excluded from it should contain at least 3 points",
ErrorCodes::BAD_ARGUMENTS};
/** Adding the first empty polygon */ /** Adding the first empty polygon */
data.addPolygon(true); data.addPolygon(true);
std::vector<Point> points;
switch (point_type) switch (point_type)
{ {
case PointType::Array: case PointType::Array:
getPointsReprByArrays(points_collection, points); handlePointsReprByArrays(points_collection, data, offset);
break; break;
case PointType::Tuple: case PointType::Tuple:
getPointReprByTuples(points_collection, points); handlePointsReprByTuples(points_collection, data, offset);
break; break;
} }
for (auto & point : points)
{
switch (input_type)
{
case InputType::MultiPolygon:
addNewPointFromMultiPolygon(point, data, offset);
break;
case InputType::SimplePolygon:
addNewPointFromSimplePolygon(point, data, offset);
break;
}
}
} }
SimplePolygonDictionary::SimplePolygonDictionary( SimplePolygonDictionary::SimplePolygonDictionary(