2019-12-02 15:26:59 +00:00
|
|
|
#include "PolygonDictionary.h"
|
|
|
|
#include "DictionaryBlockInputStream.h"
|
|
|
|
#include "DictionaryFactory.h"
|
|
|
|
|
2020-02-18 13:30:09 +00:00
|
|
|
#include <Columns/ColumnArray.h>
|
|
|
|
#include <Columns/ColumnTuple.h>
|
|
|
|
#include <DataTypes/DataTypeArray.h>
|
|
|
|
|
2020-02-05 16:38:03 +00:00
|
|
|
#include <numeric>
|
|
|
|
|
2019-12-02 15:26:59 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int TYPE_MISMATCH;
|
|
|
|
extern const int BAD_ARGUMENTS;
|
|
|
|
extern const int UNSUPPORTED_METHOD;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
IPolygonDictionary::IPolygonDictionary(
|
2020-07-14 18:46:29 +00:00
|
|
|
const StorageID & dict_id_,
|
2019-12-02 15:26:59 +00:00
|
|
|
const DictionaryStructure & dict_struct_,
|
|
|
|
DictionarySourcePtr source_ptr_,
|
2020-01-27 17:08:29 +00:00
|
|
|
const DictionaryLifetime dict_lifetime_,
|
|
|
|
InputType input_type_,
|
|
|
|
PointType point_type_)
|
2020-07-14 18:46:29 +00:00
|
|
|
: IDictionaryBase(dict_id_)
|
2019-12-02 15:26:59 +00:00
|
|
|
, dict_struct(dict_struct_)
|
|
|
|
, source_ptr(std::move(source_ptr_))
|
|
|
|
, dict_lifetime(dict_lifetime_)
|
2020-01-27 17:08:29 +00:00
|
|
|
, input_type(input_type_)
|
|
|
|
, point_type(point_type_)
|
2019-12-02 15:26:59 +00:00
|
|
|
{
|
2019-12-16 15:34:46 +00:00
|
|
|
createAttributes();
|
|
|
|
loadData();
|
2019-12-02 15:26:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
std::string IPolygonDictionary::getTypeName() const
|
|
|
|
{
|
|
|
|
return "Polygon";
|
|
|
|
}
|
|
|
|
|
2019-12-16 15:46:51 +00:00
|
|
|
std::string IPolygonDictionary::getKeyDescription() const
|
|
|
|
{
|
|
|
|
return dict_struct.getKeyDescription();
|
|
|
|
}
|
|
|
|
|
2019-12-02 15:26:59 +00:00
|
|
|
size_t IPolygonDictionary::getBytesAllocated() const
|
|
|
|
{
|
|
|
|
return bytes_allocated;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t IPolygonDictionary::getQueryCount() const
|
|
|
|
{
|
|
|
|
return query_count.load(std::memory_order_relaxed);
|
|
|
|
}
|
|
|
|
|
|
|
|
double IPolygonDictionary::getHitRate() const
|
|
|
|
{
|
|
|
|
return 1.0;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t IPolygonDictionary::getElementCount() const
|
|
|
|
{
|
|
|
|
return element_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
double IPolygonDictionary::getLoadFactor() const
|
|
|
|
{
|
|
|
|
return 1.0;
|
|
|
|
}
|
|
|
|
|
|
|
|
const IDictionarySource * IPolygonDictionary::getSource() const
|
|
|
|
{
|
|
|
|
return source_ptr.get();
|
|
|
|
}
|
|
|
|
|
|
|
|
const DictionaryLifetime & IPolygonDictionary::getLifetime() const
|
|
|
|
{
|
|
|
|
return dict_lifetime;
|
|
|
|
}
|
|
|
|
|
|
|
|
const DictionaryStructure & IPolygonDictionary::getStructure() const
|
|
|
|
{
|
|
|
|
return dict_struct;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool IPolygonDictionary::isInjective(const std::string &) const
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-12-27 10:57:32 +00:00
|
|
|
BlockInputStreamPtr IPolygonDictionary::getBlockInputStream(const Names &, size_t) const
|
|
|
|
{
|
2019-12-27 13:06:03 +00:00
|
|
|
// TODO: In order for this to work one would first have to support retrieving arrays from dictionaries.
|
|
|
|
// I believe this is a separate task done by some other people.
|
2019-12-02 15:26:59 +00:00
|
|
|
throw Exception{"Reading the dictionary is not allowed", ErrorCodes::UNSUPPORTED_METHOD};
|
|
|
|
}
|
|
|
|
|
2019-12-26 15:21:49 +00:00
|
|
|
template <typename T>
|
|
|
|
void IPolygonDictionary::appendNullValueImpl(const Field & null_value)
|
|
|
|
{
|
|
|
|
null_values.emplace_back(T(null_value.get<NearestFieldType<T>>()));
|
|
|
|
}
|
|
|
|
|
2019-12-27 13:19:39 +00:00
|
|
|
void IPolygonDictionary::appendNullValue(AttributeUnderlyingType type, const Field & null_value)
|
2019-12-26 15:21:49 +00:00
|
|
|
{
|
|
|
|
switch (type)
|
|
|
|
{
|
|
|
|
case AttributeUnderlyingType::utUInt8:
|
|
|
|
appendNullValueImpl<UInt8>(null_value);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::utUInt16:
|
|
|
|
appendNullValueImpl<UInt16>(null_value);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::utUInt32:
|
|
|
|
appendNullValueImpl<UInt32>(null_value);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::utUInt64:
|
|
|
|
appendNullValueImpl<UInt64>(null_value);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::utUInt128:
|
|
|
|
appendNullValueImpl<UInt128>(null_value);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::utInt8:
|
|
|
|
appendNullValueImpl<Int8>(null_value);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::utInt16:
|
|
|
|
appendNullValueImpl<Int16>(null_value);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::utInt32:
|
|
|
|
appendNullValueImpl<Int32>(null_value);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::utInt64:
|
|
|
|
appendNullValueImpl<Int64>(null_value);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::utFloat32:
|
|
|
|
appendNullValueImpl<Float32>(null_value);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::utFloat64:
|
|
|
|
appendNullValueImpl<Float64>(null_value);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::utDecimal32:
|
|
|
|
appendNullValueImpl<Decimal32>(null_value);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::utDecimal64:
|
|
|
|
appendNullValueImpl<Decimal64>(null_value);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::utDecimal128:
|
|
|
|
appendNullValueImpl<Decimal128>(null_value);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::utString:
|
|
|
|
appendNullValueImpl<String>(null_value);
|
|
|
|
break;
|
2019-12-27 14:22:51 +00:00
|
|
|
}
|
2019-12-26 15:21:49 +00:00
|
|
|
}
|
|
|
|
|
2019-12-27 13:06:03 +00:00
|
|
|
void IPolygonDictionary::createAttributes()
|
|
|
|
{
|
2019-12-23 13:23:11 +00:00
|
|
|
attributes.resize(dict_struct.attributes.size());
|
2019-12-02 15:26:59 +00:00
|
|
|
for (size_t i = 0; i < dict_struct.attributes.size(); ++i)
|
|
|
|
{
|
2019-12-26 15:21:49 +00:00
|
|
|
const auto & attr = dict_struct.attributes[i];
|
|
|
|
attribute_index_by_name.emplace(attr.name, i);
|
|
|
|
|
|
|
|
appendNullValue(attr.underlying_type, attr.null_value);
|
2019-12-02 15:26:59 +00:00
|
|
|
|
2019-12-26 15:21:49 +00:00
|
|
|
if (attr.hierarchical)
|
2020-07-14 20:32:13 +00:00
|
|
|
throw Exception{ErrorCodes::TYPE_MISMATCH,
|
|
|
|
"{}: hierarchical attributes not supported for dictionary of polygonal type",
|
|
|
|
getDictionaryID().getNameForLogs()};
|
2019-12-02 15:26:59 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-24 18:00:03 +00:00
|
|
|
void IPolygonDictionary::blockToAttributes(const DB::Block & block)
|
2019-12-24 18:21:50 +00:00
|
|
|
{
|
2019-12-02 15:26:59 +00:00
|
|
|
const auto rows = block.rows();
|
|
|
|
element_count += rows;
|
2019-12-27 10:57:32 +00:00
|
|
|
for (size_t i = 0; i < attributes.size(); ++i)
|
|
|
|
{
|
2019-12-23 13:23:11 +00:00
|
|
|
const auto & column = block.safeGetByPosition(i + 1);
|
|
|
|
if (attributes[i])
|
|
|
|
{
|
2020-05-14 08:30:18 +00:00
|
|
|
MutableColumnPtr mutated = IColumn::mutate(std::move(attributes[i]));
|
2019-12-23 13:23:11 +00:00
|
|
|
mutated->insertRangeFrom(*column.column, 0, column.column->size());
|
|
|
|
attributes[i] = std::move(mutated);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
attributes[i] = column.column;
|
|
|
|
}
|
2020-01-27 17:08:29 +00:00
|
|
|
/** Multi-polygons could cause bigger sizes, but this is better than nothing. */
|
2019-12-02 15:26:59 +00:00
|
|
|
polygons.reserve(polygons.size() + rows);
|
2020-01-27 17:08:29 +00:00
|
|
|
ids.reserve(ids.size() + rows);
|
2019-12-02 15:26:59 +00:00
|
|
|
const auto & key = block.safeGetByPosition(0).column;
|
2020-01-27 17:08:29 +00:00
|
|
|
extractPolygons(key);
|
2019-12-02 15:26:59 +00:00
|
|
|
}
|
|
|
|
|
2019-12-24 18:21:50 +00:00
|
|
|
void IPolygonDictionary::loadData()
|
|
|
|
{
|
2019-12-02 15:26:59 +00:00
|
|
|
auto stream = source_ptr->loadAll();
|
|
|
|
stream->readPrefix();
|
2019-12-24 18:21:50 +00:00
|
|
|
while (const auto block = stream->read())
|
2019-12-02 15:26:59 +00:00
|
|
|
blockToAttributes(block);
|
|
|
|
stream->readSuffix();
|
2020-01-27 17:08:29 +00:00
|
|
|
|
2020-05-06 16:21:51 +00:00
|
|
|
std::vector<double> areas;
|
2020-02-18 13:30:09 +00:00
|
|
|
areas.reserve(polygons.size());
|
2020-05-06 16:21:51 +00:00
|
|
|
|
|
|
|
std::vector<std::pair<Polygon, size_t>> polygon_ids;
|
|
|
|
polygon_ids.reserve(polygons.size());
|
2020-05-06 18:01:39 +00:00
|
|
|
for (size_t i = 0; i < polygons.size(); ++i)
|
|
|
|
{
|
2020-05-06 16:21:51 +00:00
|
|
|
auto & polygon = polygons[i];
|
2020-01-27 17:08:29 +00:00
|
|
|
bg::correct(polygon);
|
2020-02-18 13:30:09 +00:00
|
|
|
areas.push_back(bg::area(polygon));
|
2020-05-06 16:21:51 +00:00
|
|
|
polygon_ids.emplace_back(polygon, i);
|
|
|
|
}
|
2020-05-06 18:01:39 +00:00
|
|
|
sort(polygon_ids.begin(), polygon_ids.end(), [& areas](const auto & lhs, const auto & rhs)
|
|
|
|
{
|
2020-05-06 16:21:51 +00:00
|
|
|
return areas[lhs.second] < areas[rhs.second];
|
|
|
|
});
|
|
|
|
std::vector<size_t> correct_ids;
|
2020-05-06 16:41:14 +00:00
|
|
|
correct_ids.reserve(polygon_ids.size());
|
2020-05-06 18:01:39 +00:00
|
|
|
for (size_t i = 0; i < polygon_ids.size(); ++i)
|
|
|
|
{
|
2020-05-06 16:21:51 +00:00
|
|
|
auto & polygon = polygon_ids[i];
|
|
|
|
correct_ids.emplace_back(ids[polygon.second]);
|
|
|
|
polygons[i] = polygon.first;
|
2020-02-18 13:30:09 +00:00
|
|
|
}
|
2020-05-06 16:21:51 +00:00
|
|
|
ids = correct_ids;
|
2019-12-02 15:26:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void IPolygonDictionary::calculateBytesAllocated()
|
|
|
|
{
|
2019-12-23 13:43:12 +00:00
|
|
|
// TODO:: Account for key.
|
|
|
|
for (const auto & column : attributes)
|
|
|
|
bytes_allocated += column->allocatedBytes();
|
2019-12-02 15:26:59 +00:00
|
|
|
}
|
|
|
|
|
2020-05-24 18:00:03 +00:00
|
|
|
std::vector<IPolygonDictionary::Point> IPolygonDictionary::extractPoints(const Columns & key_columns)
|
2019-12-23 13:23:11 +00:00
|
|
|
{
|
2020-01-27 17:08:29 +00:00
|
|
|
if (key_columns.size() != 2)
|
|
|
|
throw Exception{"Expected two columns of coordinates", ErrorCodes::BAD_ARGUMENTS};
|
2020-04-22 07:03:43 +00:00
|
|
|
const auto * column_x = typeid_cast<const ColumnVector<Float64>*>(key_columns[0].get());
|
|
|
|
const auto * column_y = typeid_cast<const ColumnVector<Float64>*>(key_columns[1].get());
|
2019-12-23 13:23:11 +00:00
|
|
|
if (!column_x || !column_y)
|
|
|
|
throw Exception{"Expected columns of Float64", ErrorCodes::TYPE_MISMATCH};
|
2019-12-02 15:26:59 +00:00
|
|
|
const auto rows = key_columns.front()->size();
|
2019-12-23 13:23:11 +00:00
|
|
|
std::vector<Point> result;
|
|
|
|
result.reserve(rows);
|
2019-12-02 15:26:59 +00:00
|
|
|
for (const auto row : ext::range(0, rows))
|
2019-12-23 13:23:11 +00:00
|
|
|
result.emplace_back(column_x->getElement(row), column_y->getElement(row));
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2020-05-24 18:00:03 +00:00
|
|
|
void IPolygonDictionary::has(const Columns & key_columns, const DataTypes &, PaddedPODArray<UInt8> & out) const
|
2019-12-27 10:57:32 +00:00
|
|
|
{
|
2019-12-23 13:23:11 +00:00
|
|
|
size_t row = 0;
|
|
|
|
for (const auto & pt : extractPoints(key_columns))
|
2019-12-02 15:26:59 +00:00
|
|
|
{
|
2019-12-23 13:23:11 +00:00
|
|
|
size_t trash = 0;
|
2019-12-02 15:26:59 +00:00
|
|
|
out[row] = find(pt, trash);
|
2019-12-23 13:23:11 +00:00
|
|
|
++row;
|
|
|
|
}
|
2019-12-25 17:32:02 +00:00
|
|
|
|
2019-12-25 18:49:27 +00:00
|
|
|
query_count.fetch_add(row, std::memory_order_relaxed);
|
2019-12-23 13:23:11 +00:00
|
|
|
}
|
|
|
|
|
2019-12-24 18:21:50 +00:00
|
|
|
size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name) const
|
|
|
|
{
|
2019-12-23 13:23:11 +00:00
|
|
|
const auto it = attribute_index_by_name.find(attribute_name);
|
|
|
|
if (it == attribute_index_by_name.end())
|
|
|
|
throw Exception{"No such attribute: " + attribute_name, ErrorCodes::BAD_ARGUMENTS};
|
|
|
|
return it->second;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define DECLARE(TYPE) \
|
|
|
|
void IPolygonDictionary::get##TYPE( \
|
2019-12-23 13:41:35 +00:00
|
|
|
const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType<TYPE> & out) const \
|
2019-12-23 13:23:11 +00:00
|
|
|
{ \
|
|
|
|
const auto ind = getAttributeIndex(attribute_name); \
|
2020-07-14 18:46:54 +00:00
|
|
|
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
2019-12-23 13:23:11 +00:00
|
|
|
\
|
2019-12-26 15:21:49 +00:00
|
|
|
const auto null_value = std::get<TYPE>(null_values[ind]); \
|
2019-12-23 13:23:11 +00:00
|
|
|
\
|
|
|
|
getItemsImpl<TYPE, TYPE>( \
|
|
|
|
ind, \
|
|
|
|
key_columns, \
|
|
|
|
[&](const size_t row, const auto value) { out[row] = value; }, \
|
|
|
|
[&](const size_t) { return null_value; }); \
|
|
|
|
}
|
|
|
|
DECLARE(UInt8)
|
|
|
|
DECLARE(UInt16)
|
|
|
|
DECLARE(UInt32)
|
|
|
|
DECLARE(UInt64)
|
|
|
|
DECLARE(UInt128)
|
|
|
|
DECLARE(Int8)
|
|
|
|
DECLARE(Int16)
|
|
|
|
DECLARE(Int32)
|
|
|
|
DECLARE(Int64)
|
|
|
|
DECLARE(Float32)
|
|
|
|
DECLARE(Float64)
|
|
|
|
DECLARE(Decimal32)
|
|
|
|
DECLARE(Decimal64)
|
|
|
|
DECLARE(Decimal128)
|
|
|
|
#undef DECLARE
|
|
|
|
|
|
|
|
void IPolygonDictionary::getString(
|
2019-12-23 13:41:35 +00:00
|
|
|
const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ColumnString * out) const
|
2019-12-23 13:23:11 +00:00
|
|
|
{
|
|
|
|
const auto ind = getAttributeIndex(attribute_name);
|
2020-07-14 18:46:54 +00:00
|
|
|
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
|
2019-12-23 13:23:11 +00:00
|
|
|
|
2019-12-26 15:21:49 +00:00
|
|
|
const auto & null_value = StringRef{std::get<String>(null_values[ind])};
|
2019-12-23 13:23:11 +00:00
|
|
|
|
2019-12-26 13:23:04 +00:00
|
|
|
getItemsImpl<String, StringRef>(
|
2019-12-23 13:23:11 +00:00
|
|
|
ind,
|
|
|
|
key_columns,
|
2019-12-26 13:23:04 +00:00
|
|
|
[&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); },
|
2019-12-23 13:23:11 +00:00
|
|
|
[&](const size_t) { return null_value; });
|
|
|
|
}
|
|
|
|
|
|
|
|
#define DECLARE(TYPE) \
|
|
|
|
void IPolygonDictionary::get##TYPE( \
|
|
|
|
const std::string & attribute_name, \
|
|
|
|
const Columns & key_columns, \
|
2019-12-23 13:39:17 +00:00
|
|
|
const DataTypes &, \
|
2019-12-23 13:23:11 +00:00
|
|
|
const PaddedPODArray<TYPE> & def, \
|
|
|
|
ResultArrayType<TYPE> & out) const \
|
|
|
|
{ \
|
|
|
|
const auto ind = getAttributeIndex(attribute_name); \
|
2020-07-14 18:46:54 +00:00
|
|
|
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
2019-12-23 13:23:11 +00:00
|
|
|
\
|
|
|
|
getItemsImpl<TYPE, TYPE>( \
|
|
|
|
ind, \
|
|
|
|
key_columns, \
|
|
|
|
[&](const size_t row, const auto value) { out[row] = value; }, \
|
|
|
|
[&](const size_t row) { return def[row]; }); \
|
|
|
|
}
|
|
|
|
DECLARE(UInt8)
|
|
|
|
DECLARE(UInt16)
|
|
|
|
DECLARE(UInt32)
|
|
|
|
DECLARE(UInt64)
|
|
|
|
DECLARE(UInt128)
|
|
|
|
DECLARE(Int8)
|
|
|
|
DECLARE(Int16)
|
|
|
|
DECLARE(Int32)
|
|
|
|
DECLARE(Int64)
|
|
|
|
DECLARE(Float32)
|
|
|
|
DECLARE(Float64)
|
|
|
|
DECLARE(Decimal32)
|
|
|
|
DECLARE(Decimal64)
|
|
|
|
DECLARE(Decimal128)
|
|
|
|
#undef DECLARE
|
|
|
|
|
|
|
|
void IPolygonDictionary::getString(
|
|
|
|
const std::string & attribute_name,
|
|
|
|
const Columns & key_columns,
|
2019-12-23 13:39:17 +00:00
|
|
|
const DataTypes &,
|
2019-12-23 13:23:11 +00:00
|
|
|
const ColumnString * const def,
|
|
|
|
ColumnString * const out) const
|
|
|
|
{
|
|
|
|
const auto ind = getAttributeIndex(attribute_name);
|
2020-07-14 18:46:54 +00:00
|
|
|
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
|
2019-12-23 13:23:11 +00:00
|
|
|
|
2019-12-25 16:31:57 +00:00
|
|
|
getItemsImpl<String, StringRef>(
|
2019-12-23 13:23:11 +00:00
|
|
|
ind,
|
|
|
|
key_columns,
|
|
|
|
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
|
|
|
[&](const size_t row) { return def->getDataAt(row); });
|
|
|
|
}
|
|
|
|
|
|
|
|
#define DECLARE(TYPE) \
|
|
|
|
void IPolygonDictionary::get##TYPE( \
|
|
|
|
const std::string & attribute_name, \
|
|
|
|
const Columns & key_columns, \
|
2019-12-23 13:39:17 +00:00
|
|
|
const DataTypes &, \
|
2019-12-23 13:23:11 +00:00
|
|
|
const TYPE def, \
|
|
|
|
ResultArrayType<TYPE> & out) const \
|
|
|
|
{ \
|
|
|
|
const auto ind = getAttributeIndex(attribute_name); \
|
2020-07-14 18:46:54 +00:00
|
|
|
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
2019-12-23 13:23:11 +00:00
|
|
|
\
|
|
|
|
getItemsImpl<TYPE, TYPE>( \
|
|
|
|
ind, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
|
|
|
|
}
|
|
|
|
DECLARE(UInt8)
|
|
|
|
DECLARE(UInt16)
|
|
|
|
DECLARE(UInt32)
|
|
|
|
DECLARE(UInt64)
|
|
|
|
DECLARE(UInt128)
|
|
|
|
DECLARE(Int8)
|
|
|
|
DECLARE(Int16)
|
|
|
|
DECLARE(Int32)
|
|
|
|
DECLARE(Int64)
|
|
|
|
DECLARE(Float32)
|
|
|
|
DECLARE(Float64)
|
|
|
|
DECLARE(Decimal32)
|
|
|
|
DECLARE(Decimal64)
|
|
|
|
DECLARE(Decimal128)
|
|
|
|
#undef DECLARE
|
|
|
|
|
|
|
|
void IPolygonDictionary::getString(
|
|
|
|
const std::string & attribute_name,
|
|
|
|
const Columns & key_columns,
|
2019-12-23 13:39:17 +00:00
|
|
|
const DataTypes &,
|
2019-12-23 13:23:11 +00:00
|
|
|
const String & def,
|
|
|
|
ColumnString * const out) const
|
|
|
|
{
|
|
|
|
const auto ind = getAttributeIndex(attribute_name);
|
2020-07-14 18:46:54 +00:00
|
|
|
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
|
2019-12-23 13:23:11 +00:00
|
|
|
|
2019-12-25 16:31:57 +00:00
|
|
|
getItemsImpl<String, StringRef>(
|
2019-12-23 13:23:11 +00:00
|
|
|
ind,
|
|
|
|
key_columns,
|
|
|
|
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
|
|
|
[&](const size_t) { return StringRef{def}; });
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
|
|
|
void IPolygonDictionary::getItemsImpl(
|
|
|
|
size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
|
|
|
|
{
|
|
|
|
const auto points = extractPoints(key_columns);
|
|
|
|
|
2019-12-26 13:23:04 +00:00
|
|
|
using ColVecType = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
|
|
|
|
using ColType = std::conditional_t<std::is_same<AttributeType, String>::value, ColumnString, ColVecType>;
|
|
|
|
const auto column = typeid_cast<const ColType *>(attributes[attribute_ind].get());
|
|
|
|
if (!column)
|
2020-01-14 18:32:47 +00:00
|
|
|
throw Exception{"An attribute should be a column of its type", ErrorCodes::BAD_ARGUMENTS};
|
2019-12-23 13:23:11 +00:00
|
|
|
for (const auto i : ext::range(0, points.size()))
|
|
|
|
{
|
|
|
|
size_t id = 0;
|
2019-12-26 13:23:04 +00:00
|
|
|
const auto found = find(points[i], id);
|
2020-01-28 13:25:55 +00:00
|
|
|
id = ids[id];
|
2019-12-26 13:23:04 +00:00
|
|
|
if (!found)
|
|
|
|
{
|
2019-12-26 15:21:49 +00:00
|
|
|
set_value(i, static_cast<OutputType>(get_default(i)));
|
2019-12-26 13:23:04 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if constexpr (std::is_same<AttributeType, String>::value)
|
|
|
|
set_value(i, static_cast<OutputType>(column->getDataAt(id)));
|
|
|
|
else
|
|
|
|
set_value(i, static_cast<OutputType>(column->getElement(id)));
|
2019-12-02 15:26:59 +00:00
|
|
|
}
|
2019-12-23 13:23:11 +00:00
|
|
|
|
|
|
|
query_count.fetch_add(points.size(), std::memory_order_relaxed);
|
2019-12-02 15:26:59 +00:00
|
|
|
}
|
|
|
|
|
2020-01-14 18:32:47 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
2020-01-27 17:08:29 +00:00
|
|
|
struct Offset
|
|
|
|
{
|
|
|
|
Offset() = default;
|
2020-01-30 16:35:33 +00:00
|
|
|
|
2020-02-05 16:38:03 +00:00
|
|
|
IColumn::Offsets ring_offsets;
|
|
|
|
IColumn::Offsets polygon_offsets;
|
|
|
|
IColumn::Offsets multi_polygon_offsets;
|
2020-01-30 16:35:33 +00:00
|
|
|
|
2020-02-05 16:38:03 +00:00
|
|
|
IColumn::Offset points_added = 0;
|
2020-01-30 16:35:33 +00:00
|
|
|
IColumn::Offset current_ring = 0;
|
|
|
|
IColumn::Offset current_polygon = 0;
|
|
|
|
IColumn::Offset current_multi_polygon = 0;
|
2020-02-05 16:38:03 +00:00
|
|
|
|
|
|
|
Offset& operator++()
|
|
|
|
{
|
|
|
|
++points_added;
|
|
|
|
if (points_added <= ring_offsets[current_ring])
|
|
|
|
return *this;
|
|
|
|
|
|
|
|
++current_ring;
|
|
|
|
if (current_ring < polygon_offsets[current_polygon])
|
|
|
|
return *this;
|
|
|
|
|
|
|
|
++current_polygon;
|
|
|
|
if (current_polygon < multi_polygon_offsets[current_multi_polygon])
|
|
|
|
return *this;
|
|
|
|
|
|
|
|
++current_multi_polygon;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool atLastPolygonOfMultiPolygon() { return current_polygon + 1 == multi_polygon_offsets[current_multi_polygon]; }
|
|
|
|
bool atLastRingOfPolygon() { return current_ring + 1 == polygon_offsets[current_polygon]; }
|
|
|
|
bool atLastPointOfRing() { return points_added == ring_offsets[current_ring]; }
|
|
|
|
|
|
|
|
bool allRingsHaveAPositiveArea()
|
|
|
|
{
|
|
|
|
IColumn::Offset prev_offset = 0;
|
|
|
|
for (const auto offset : ring_offsets)
|
|
|
|
{
|
|
|
|
if (offset - prev_offset < 3)
|
|
|
|
return false;
|
|
|
|
prev_offset = offset;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2020-01-27 17:08:29 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct Data
|
|
|
|
{
|
|
|
|
std::vector<IPolygonDictionary::Polygon> & dest;
|
|
|
|
std::vector<size_t> & ids;
|
|
|
|
|
2020-01-28 14:21:02 +00:00
|
|
|
void addPolygon(bool new_multi_polygon = false)
|
|
|
|
{
|
2020-01-27 17:08:29 +00:00
|
|
|
dest.emplace_back();
|
2020-01-28 13:25:55 +00:00
|
|
|
ids.push_back((ids.empty() ? 0 : ids.back() + new_multi_polygon));
|
2020-01-27 17:08:29 +00:00
|
|
|
}
|
|
|
|
|
2020-05-13 23:20:06 +00:00
|
|
|
void addPoint(IPolygonDictionary::Coord x, IPolygonDictionary::Coord y)
|
2020-01-27 17:08:29 +00:00
|
|
|
{
|
2020-02-05 16:38:03 +00:00
|
|
|
auto & last_polygon = dest.back();
|
|
|
|
auto & last_ring = (last_polygon.inners().empty() ? last_polygon.outer() : last_polygon.inners().back());
|
|
|
|
last_ring.emplace_back(x, y);
|
2020-01-27 17:08:29 +00:00
|
|
|
}
|
2020-02-05 16:38:03 +00:00
|
|
|
};
|
2020-01-14 18:32:47 +00:00
|
|
|
|
2020-05-13 23:20:06 +00:00
|
|
|
void addNewPoint(IPolygonDictionary::Coord x, IPolygonDictionary::Coord y, Data & data, Offset & offset)
|
2020-01-27 17:08:29 +00:00
|
|
|
{
|
2020-02-05 16:38:03 +00:00
|
|
|
if (offset.atLastPointOfRing())
|
2020-01-27 17:08:29 +00:00
|
|
|
{
|
2020-02-05 16:38:03 +00:00
|
|
|
if (offset.atLastRingOfPolygon())
|
|
|
|
data.addPolygon(offset.atLastPolygonOfMultiPolygon());
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/** An outer ring is added automatically with a new polygon, thus we need the else statement here.
|
|
|
|
* This also implies that if we are at this point we have to add an inner ring.
|
|
|
|
*/
|
|
|
|
auto & last_polygon = data.dest.back();
|
|
|
|
last_polygon.inners().emplace_back();
|
|
|
|
}
|
2020-01-27 17:08:29 +00:00
|
|
|
}
|
2020-02-05 16:38:03 +00:00
|
|
|
data.addPoint(x, y);
|
|
|
|
++offset;
|
2020-01-27 17:08:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
const IColumn * unrollMultiPolygons(const ColumnPtr & column, Offset & offset)
|
2020-01-15 13:28:18 +00:00
|
|
|
{
|
2020-04-22 07:03:43 +00:00
|
|
|
const auto * ptr_multi_polygons = typeid_cast<const ColumnArray*>(column.get());
|
2020-01-14 14:40:34 +00:00
|
|
|
if (!ptr_multi_polygons)
|
|
|
|
throw Exception{"Expected a column containing arrays of polygons", ErrorCodes::TYPE_MISMATCH};
|
2020-02-05 16:38:03 +00:00
|
|
|
offset.multi_polygon_offsets.assign(ptr_multi_polygons->getOffsets());
|
2020-01-14 14:40:34 +00:00
|
|
|
|
2020-04-22 07:03:43 +00:00
|
|
|
const auto * ptr_polygons = typeid_cast<const ColumnArray*>(&ptr_multi_polygons->getData());
|
2020-01-14 14:40:34 +00:00
|
|
|
if (!ptr_polygons)
|
|
|
|
throw Exception{"Expected a column containing arrays of rings when reading polygons", ErrorCodes::TYPE_MISMATCH};
|
2020-02-05 16:38:03 +00:00
|
|
|
offset.polygon_offsets.assign(ptr_polygons->getOffsets());
|
2020-01-14 14:40:34 +00:00
|
|
|
|
2020-04-22 07:03:43 +00:00
|
|
|
const auto * ptr_rings = typeid_cast<const ColumnArray*>(&ptr_polygons->getData());
|
2020-01-14 14:40:34 +00:00
|
|
|
if (!ptr_rings)
|
2020-01-14 14:59:21 +00:00
|
|
|
throw Exception{"Expected a column containing arrays of points when reading rings", ErrorCodes::TYPE_MISMATCH};
|
2020-02-05 16:38:03 +00:00
|
|
|
offset.ring_offsets.assign(ptr_rings->getOffsets());
|
2019-12-26 13:23:04 +00:00
|
|
|
|
2020-01-27 17:08:29 +00:00
|
|
|
return ptr_rings->getDataPtr().get();
|
|
|
|
}
|
|
|
|
|
|
|
|
const IColumn * unrollSimplePolygons(const ColumnPtr & column, Offset & offset)
|
|
|
|
{
|
2020-04-22 07:03:43 +00:00
|
|
|
const auto * ptr_polygons = typeid_cast<const ColumnArray*>(column.get());
|
2020-01-27 17:08:29 +00:00
|
|
|
if (!ptr_polygons)
|
|
|
|
throw Exception{"Expected a column containing arrays of points", ErrorCodes::TYPE_MISMATCH};
|
2020-02-05 16:38:03 +00:00
|
|
|
offset.ring_offsets.assign(ptr_polygons->getOffsets());
|
|
|
|
std::iota(offset.polygon_offsets.begin(), offset.polygon_offsets.end(), 1);
|
|
|
|
offset.multi_polygon_offsets.assign(offset.polygon_offsets);
|
|
|
|
|
2020-01-27 17:08:29 +00:00
|
|
|
return ptr_polygons->getDataPtr().get();
|
|
|
|
}
|
2020-01-14 14:40:34 +00:00
|
|
|
|
2020-02-05 16:38:03 +00:00
|
|
|
void handlePointsReprByArrays(const IColumn * column, Data & data, Offset & offset)
|
2020-01-27 17:08:29 +00:00
|
|
|
{
|
2020-04-22 07:03:43 +00:00
|
|
|
const auto * ptr_points = typeid_cast<const ColumnArray*>(column);
|
|
|
|
const auto * ptr_coord = typeid_cast<const ColumnVector<Float64>*>(&ptr_points->getData());
|
2020-01-14 14:40:34 +00:00
|
|
|
if (!ptr_coord)
|
2020-01-27 17:08:29 +00:00
|
|
|
throw Exception{"Expected coordinates to be of type Float64", ErrorCodes::TYPE_MISMATCH};
|
|
|
|
const auto & offsets = ptr_points->getOffsets();
|
2020-01-30 16:41:58 +00:00
|
|
|
IColumn::Offset prev_offset = 0;
|
2020-01-27 17:08:29 +00:00
|
|
|
for (size_t i = 0; i < offsets.size(); ++i)
|
|
|
|
{
|
2020-01-30 16:41:58 +00:00
|
|
|
if (offsets[i] - prev_offset != 2)
|
2020-01-27 17:08:29 +00:00
|
|
|
throw Exception{"All points should be two-dimensional", ErrorCodes::BAD_ARGUMENTS};
|
2020-01-30 16:41:58 +00:00
|
|
|
prev_offset = offsets[i];
|
2020-02-05 16:38:03 +00:00
|
|
|
addNewPoint(ptr_coord->getElement(2 * i), ptr_coord->getElement(2 * i + 1), data, offset);
|
2020-01-27 17:08:29 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-05 16:38:03 +00:00
|
|
|
void handlePointsReprByTuples(const IColumn * column, Data & data, Offset & offset)
|
2020-01-27 17:08:29 +00:00
|
|
|
{
|
2020-04-22 07:03:43 +00:00
|
|
|
const auto * ptr_points = typeid_cast<const ColumnTuple*>(column);
|
2020-01-27 17:08:29 +00:00
|
|
|
if (!ptr_points)
|
|
|
|
throw Exception{"Expected a column of tuples representing points", ErrorCodes::TYPE_MISMATCH};
|
|
|
|
if (ptr_points->tupleSize() != 2)
|
|
|
|
throw Exception{"Points should be two-dimensional", ErrorCodes::BAD_ARGUMENTS};
|
2020-04-22 07:03:43 +00:00
|
|
|
const auto * column_x = typeid_cast<const ColumnVector<Float64>*>(&ptr_points->getColumn(0));
|
|
|
|
const auto * column_y = typeid_cast<const ColumnVector<Float64>*>(&ptr_points->getColumn(1));
|
2020-01-27 17:08:29 +00:00
|
|
|
if (!column_x || !column_y)
|
|
|
|
throw Exception{"Expected coordinates to be of type Float64", ErrorCodes::TYPE_MISMATCH};
|
|
|
|
for (size_t i = 0; i < column_x->size(); ++i)
|
2020-01-14 14:40:34 +00:00
|
|
|
{
|
2020-02-05 16:38:03 +00:00
|
|
|
addNewPoint(column_x->getElement(i), column_y->getElement(i), data, offset);
|
2020-01-27 17:08:29 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2020-05-24 18:00:03 +00:00
|
|
|
void IPolygonDictionary::extractPolygons(const ColumnPtr & column)
|
2020-01-27 17:08:29 +00:00
|
|
|
{
|
|
|
|
Data data = {polygons, ids};
|
|
|
|
Offset offset;
|
|
|
|
|
2020-01-27 20:28:20 +00:00
|
|
|
const IColumn * points_collection = nullptr;
|
2020-01-27 17:08:29 +00:00
|
|
|
switch (input_type)
|
|
|
|
{
|
|
|
|
case InputType::MultiPolygon:
|
|
|
|
points_collection = unrollMultiPolygons(column, offset);
|
|
|
|
break;
|
|
|
|
case InputType::SimplePolygon:
|
|
|
|
points_collection = unrollSimplePolygons(column, offset);
|
|
|
|
break;
|
|
|
|
}
|
2020-01-14 14:40:34 +00:00
|
|
|
|
2020-02-05 16:38:03 +00:00
|
|
|
if (!offset.allRingsHaveAPositiveArea())
|
|
|
|
throw Exception{"Every ring included in a polygon or excluded from it should contain at least 3 points",
|
|
|
|
ErrorCodes::BAD_ARGUMENTS};
|
|
|
|
|
2020-01-27 17:08:29 +00:00
|
|
|
/** Adding the first empty polygon */
|
|
|
|
data.addPolygon(true);
|
|
|
|
|
|
|
|
switch (point_type)
|
|
|
|
{
|
|
|
|
case PointType::Array:
|
2020-02-05 16:38:03 +00:00
|
|
|
handlePointsReprByArrays(points_collection, data, offset);
|
2020-01-27 17:08:29 +00:00
|
|
|
break;
|
|
|
|
case PointType::Tuple:
|
2020-02-05 16:38:03 +00:00
|
|
|
handlePointsReprByTuples(points_collection, data, offset);
|
2020-01-27 17:08:29 +00:00
|
|
|
break;
|
|
|
|
}
|
2020-01-14 14:40:34 +00:00
|
|
|
}
|
2019-12-26 13:23:04 +00:00
|
|
|
|
2019-12-25 22:02:55 +00:00
|
|
|
}
|
|
|
|
|