ClickHouse/dbms/src/Dictionaries/PolygonDictionary.cpp

467 lines
15 KiB
C++
Raw Normal View History

2019-12-02 15:26:59 +00:00
#include <ext/map.h>
#include "PolygonDictionary.h"
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
namespace DB
{
namespace ErrorCodes
{
extern const int TYPE_MISMATCH;
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
extern const int UNKNOWN_TYPE;
extern const int UNSUPPORTED_METHOD;
}
IPolygonDictionary::IPolygonDictionary(
const std::string & name_,
const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_,
const DictionaryLifetime dict_lifetime_)
: name(name_)
, dict_struct(dict_struct_)
, source_ptr(std::move(source_ptr_))
, dict_lifetime(dict_lifetime_)
{
2019-12-16 15:34:46 +00:00
createAttributes();
loadData();
2019-12-02 15:26:59 +00:00
}
std::string IPolygonDictionary::getName() const
{
return name;
}
std::string IPolygonDictionary::getTypeName() const
{
return "Polygon";
}
2019-12-16 15:46:51 +00:00
std::string IPolygonDictionary::getKeyDescription() const
{
return dict_struct.getKeyDescription();
}
2019-12-02 15:26:59 +00:00
size_t IPolygonDictionary::getBytesAllocated() const
{
return bytes_allocated;
}
size_t IPolygonDictionary::getQueryCount() const
{
return query_count.load(std::memory_order_relaxed);
}
double IPolygonDictionary::getHitRate() const
{
return 1.0;
}
size_t IPolygonDictionary::getElementCount() const
{
return element_count;
}
double IPolygonDictionary::getLoadFactor() const
{
return 1.0;
}
const IDictionarySource * IPolygonDictionary::getSource() const
{
return source_ptr.get();
}
const DictionaryLifetime & IPolygonDictionary::getLifetime() const
{
return dict_lifetime;
}
const DictionaryStructure & IPolygonDictionary::getStructure() const
{
return dict_struct;
}
bool IPolygonDictionary::isInjective(const std::string &) const
{
return false;
}
BlockInputStreamPtr IPolygonDictionary::getBlockInputStream(const Names &, size_t) const {
// TODO: Better error message.
throw Exception{"Reading the dictionary is not allowed", ErrorCodes::UNSUPPORTED_METHOD};
}
void IPolygonDictionary::createAttributes() {
2019-12-23 13:23:11 +00:00
attributes.resize(dict_struct.attributes.size());
2019-12-02 15:26:59 +00:00
for (size_t i = 0; i < dict_struct.attributes.size(); ++i)
{
attribute_index_by_name.emplace(dict_struct.attributes[i].name, i);
2019-12-16 15:11:16 +00:00
if (dict_struct.attributes[i].hierarchical)
2019-12-02 15:26:59 +00:00
throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
ErrorCodes::TYPE_MISMATCH};
}
}
2019-12-24 18:21:50 +00:00
void IPolygonDictionary::blockToAttributes(const DB::Block &block)
{
2019-12-02 15:26:59 +00:00
const auto rows = block.rows();
element_count += rows;
2019-12-23 13:23:11 +00:00
for (size_t i = 0; i < attributes.size(); ++i) {
const auto & column = block.safeGetByPosition(i + 1);
if (attributes[i])
{
MutableColumnPtr mutated = std::move(*attributes[i]).mutate();
mutated->insertRangeFrom(*column.column, 0, column.column->size());
attributes[i] = std::move(mutated);
}
else
attributes[i] = column.column;
}
2019-12-02 15:26:59 +00:00
polygons.reserve(polygons.size() + rows);
const auto & key = block.safeGetByPosition(0).column;
for (const auto row : ext::range(0, rows))
{
const auto & field = (*key)[row];
2019-12-23 13:23:11 +00:00
// TODO: Get data more efficiently using
2019-12-02 15:26:59 +00:00
polygons.push_back(fieldToMultiPolygon(field));
}
}
2019-12-24 18:21:50 +00:00
void IPolygonDictionary::loadData()
{
2019-12-02 15:26:59 +00:00
auto stream = source_ptr->loadAll();
stream->readPrefix();
2019-12-24 18:21:50 +00:00
while (const auto block = stream->read())
2019-12-02 15:26:59 +00:00
blockToAttributes(block);
stream->readSuffix();
}
void IPolygonDictionary::calculateBytesAllocated()
{
2019-12-23 13:43:12 +00:00
// TODO:: Account for key.
for (const auto & column : attributes)
bytes_allocated += column->allocatedBytes();
2019-12-02 15:26:59 +00:00
}
2019-12-23 13:23:11 +00:00
std::vector<IPolygonDictionary::Point> IPolygonDictionary::extractPoints(const Columns &key_columns)
{
if (key_columns.size() != DIM)
throw Exception{"Expected " + std::to_string(DIM) + " columns of coordinates", ErrorCodes::BAD_ARGUMENTS};
const auto column_x = typeid_cast<const ColumnVector<Float64>*>(key_columns[0].get());
const auto column_y = typeid_cast<const ColumnVector<Float64>*>(key_columns[1].get());
if (!column_x || !column_y)
throw Exception{"Expected columns of Float64", ErrorCodes::TYPE_MISMATCH};
2019-12-02 15:26:59 +00:00
const auto rows = key_columns.front()->size();
2019-12-23 13:23:11 +00:00
std::vector<Point> result;
result.reserve(rows);
2019-12-02 15:26:59 +00:00
for (const auto row : ext::range(0, rows))
2019-12-23 13:23:11 +00:00
result.emplace_back(column_x->getElement(row), column_y->getElement(row));
return result;
}
void IPolygonDictionary::has(const Columns &key_columns, const DataTypes &, PaddedPODArray<UInt8> &out) const {
size_t row = 0;
for (const auto & pt : extractPoints(key_columns))
2019-12-02 15:26:59 +00:00
{
// TODO: Check whether this will be optimized by the compiler.
2019-12-23 13:23:11 +00:00
size_t trash = 0;
2019-12-02 15:26:59 +00:00
out[row] = find(pt, trash);
2019-12-23 13:23:11 +00:00
++row;
}
}
2019-12-24 18:21:50 +00:00
size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name) const
{
2019-12-23 13:23:11 +00:00
const auto it = attribute_index_by_name.find(attribute_name);
if (it == attribute_index_by_name.end())
throw Exception{"No such attribute: " + attribute_name, ErrorCodes::BAD_ARGUMENTS};
return it->second;
}
template <typename T>
2019-12-23 13:39:17 +00:00
T IPolygonDictionary::getNullValue(const DB::Field &field)
2019-12-23 13:23:11 +00:00
{
return field.get<NearestFieldType<T>>();
}
#define DECLARE(TYPE) \
void IPolygonDictionary::get##TYPE( \
2019-12-23 13:41:35 +00:00
const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType<TYPE> & out) const \
2019-12-23 13:23:11 +00:00
{ \
const auto ind = getAttributeIndex(attribute_name); \
2019-12-23 13:40:42 +00:00
checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
2019-12-23 13:23:11 +00:00
\
2019-12-23 13:39:17 +00:00
const auto null_value = getNullValue<TYPE>(dict_struct.attributes[ind].null_value); \
2019-12-23 13:23:11 +00:00
\
getItemsImpl<TYPE, TYPE>( \
ind, \
key_columns, \
[&](const size_t row, const auto value) { out[row] = value; }, \
[&](const size_t) { return null_value; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void IPolygonDictionary::getString(
2019-12-23 13:41:35 +00:00
const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ColumnString * out) const
2019-12-23 13:23:11 +00:00
{
const auto ind = getAttributeIndex(attribute_name);
2019-12-23 13:39:17 +00:00
checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
2019-12-23 13:23:11 +00:00
2019-12-25 15:27:04 +00:00
const auto & null_value = getNullValue<String>(dict_struct.attributes[ind].null_value);
2019-12-23 13:23:11 +00:00
2019-12-25 15:27:04 +00:00
getItemsImpl<String, String>(
2019-12-23 13:23:11 +00:00
ind,
key_columns,
2019-12-25 15:27:04 +00:00
[&](const size_t, const String value) { out->insertData(value.data(), value.size()); },
2019-12-23 13:23:11 +00:00
[&](const size_t) { return null_value; });
}
#define DECLARE(TYPE) \
void IPolygonDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
2019-12-23 13:39:17 +00:00
const DataTypes &, \
2019-12-23 13:23:11 +00:00
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto ind = getAttributeIndex(attribute_name); \
2019-12-23 13:39:17 +00:00
checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
2019-12-23 13:23:11 +00:00
\
getItemsImpl<TYPE, TYPE>( \
ind, \
key_columns, \
[&](const size_t row, const auto value) { out[row] = value; }, \
[&](const size_t row) { return def[row]; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void IPolygonDictionary::getString(
const std::string & attribute_name,
const Columns & key_columns,
2019-12-23 13:39:17 +00:00
const DataTypes &,
2019-12-23 13:23:11 +00:00
const ColumnString * const def,
ColumnString * const out) const
{
const auto ind = getAttributeIndex(attribute_name);
2019-12-23 13:39:17 +00:00
checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
2019-12-23 13:23:11 +00:00
getItemsImpl<StringRef, StringRef>(
ind,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t row) { return def->getDataAt(row); });
}
#define DECLARE(TYPE) \
void IPolygonDictionary::get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
2019-12-23 13:39:17 +00:00
const DataTypes &, \
2019-12-23 13:23:11 +00:00
const TYPE def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto ind = getAttributeIndex(attribute_name); \
2019-12-23 13:39:17 +00:00
checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
2019-12-23 13:23:11 +00:00
\
getItemsImpl<TYPE, TYPE>( \
ind, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void IPolygonDictionary::getString(
const std::string & attribute_name,
const Columns & key_columns,
2019-12-23 13:39:17 +00:00
const DataTypes &,
2019-12-23 13:23:11 +00:00
const String & def,
ColumnString * const out) const
{
const auto ind = getAttributeIndex(attribute_name);
2019-12-23 13:39:17 +00:00
checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
2019-12-23 13:23:11 +00:00
getItemsImpl<StringRef, StringRef>(
ind,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return StringRef{def}; });
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void IPolygonDictionary::getItemsImpl(
size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
{
const auto points = extractPoints(key_columns);
for (const auto i : ext::range(0, points.size()))
{
size_t id = 0;
auto found = find(points[i], id);
set_value(i, found ? static_cast<OutputType>((*attributes[attribute_ind])[id].get<AttributeType>()) : get_default(i));
2019-12-02 15:26:59 +00:00
}
2019-12-23 13:23:11 +00:00
query_count.fetch_add(points.size(), std::memory_order_relaxed);
2019-12-02 15:26:59 +00:00
}
IPolygonDictionary::Point IPolygonDictionary::fieldToPoint(const Field &field)
{
if (field.getType() == Field::Types::Array)
{
auto coordinate_array = field.get<Array>();
if (coordinate_array.size() != DIM)
2019-12-24 18:21:50 +00:00
throw Exception{"All points should be " + std::to_string(DIM) + "-dimensional", ErrorCodes::LOGICAL_ERROR};
2019-12-02 15:26:59 +00:00
Float64 values[DIM];
for (size_t i = 0; i < DIM; ++i)
{
if (coordinate_array[i].getType() != Field::Types::Float64)
throw Exception{"Coordinates should be Float64", ErrorCodes::TYPE_MISMATCH};
values[i] = coordinate_array[i].get<Float64>();
}
return {values[0], values[1]};
}
else
throw Exception{"Point is not represented by an array", ErrorCodes::TYPE_MISMATCH};
}
2019-12-24 18:21:50 +00:00
IPolygonDictionary::Polygon IPolygonDictionary::fieldToPolygon(const Field & field)
{
2019-12-02 15:26:59 +00:00
Polygon result;
if (field.getType() == Field::Types::Array)
{
const auto & ring_array = field.get<Array>();
if (ring_array.empty())
throw Exception{"Empty polygons are not allowed", ErrorCodes::LOGICAL_ERROR};
result.inners().resize(ring_array.size() - 1);
if (ring_array[0].getType() != Field::Types::Array)
throw Exception{"Outer polygon ring is not represented by an array", ErrorCodes::TYPE_MISMATCH};
for (const auto & point : ring_array[0].get<Array>())
bg::append(result.outer(), fieldToPoint(point));
for (size_t i = 0; i < result.inners().size(); ++i) {
if (ring_array[i + 1].getType() != Field::Types::Array)
throw Exception{"Inner polygon ring is not represented by an array", ErrorCodes::TYPE_MISMATCH};
for (const auto & point : ring_array[i + 1].get<Array>())
bg::append(result.inners()[i], fieldToPoint(point));
}
}
else
throw Exception{"Polygon is not represented by an array", ErrorCodes::TYPE_MISMATCH};
2019-12-23 13:52:44 +00:00
bg::correct(result);
2019-12-02 15:26:59 +00:00
return result;
}
2019-12-24 18:21:50 +00:00
// TODO: Do this more efficiently by casting to the corresponding Column and avoiding Fields.
IPolygonDictionary::MultiPolygon IPolygonDictionary::fieldToMultiPolygon(const Field &field)
{
2019-12-02 15:26:59 +00:00
MultiPolygon result;
if (field.getType() == Field::Types::Array)
{
const auto& polygon_array = field.get<Array>();
result.reserve(polygon_array.size());
for (const auto & polygon : polygon_array)
result.push_back(fieldToPolygon(polygon));
}
else
throw Exception{"MultiPolygon is not represented by an array", ErrorCodes::TYPE_MISMATCH};
return result;
}
2019-12-16 15:11:16 +00:00
SimplePolygonDictionary::SimplePolygonDictionary(
2019-12-23 13:23:11 +00:00
const std::string & name_,
const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_,
const DictionaryLifetime dict_lifetime_)
: IPolygonDictionary(name_, dict_struct_, std::move(source_ptr_), dict_lifetime_)
2019-12-16 15:11:16 +00:00
{
}
2019-12-16 15:24:26 +00:00
std::shared_ptr<const IExternalLoadable> SimplePolygonDictionary::clone() const
{
return std::make_shared<SimplePolygonDictionary>(
this->name,
this->dict_struct,
this->source_ptr->clone(),
this->dict_lifetime);
}
2019-12-02 15:26:59 +00:00
bool SimplePolygonDictionary::find(const Point &point, size_t & id) const
{
for (size_t i = 0; i < (this->polygons).size(); ++i)
{
2019-12-24 18:21:50 +00:00
if (bg::covered_by(point, (this->polygons)[i])) {
2019-12-02 15:26:59 +00:00
id = i;
return true;
}
}
return false;
}
void registerDictionaryPolygon(DictionaryFactory & factory)
{
auto create_layout = [=](const std::string & name,
const DictionaryStructure & dict_struct,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
DictionarySourcePtr source_ptr) -> DictionaryPtr
{
// TODO: Check that there is only one key and it is of the correct type.
if (dict_struct.range_min || dict_struct.range_max)
throw Exception{name
+ ": elements .structure.range_min and .structure.range_max should be defined only "
"for a dictionary of layout 'range_hashed'",
ErrorCodes::BAD_ARGUMENTS};
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
return std::make_unique<SimplePolygonDictionary>(name, dict_struct, std::move(source_ptr), dict_lifetime);
};
factory.registerLayout("polygon", create_layout, true);
}
}