2019-12-02 15:26:59 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <atomic>
|
|
|
|
#include <variant>
|
|
|
|
#include <Core/Block.h>
|
2019-12-23 13:23:11 +00:00
|
|
|
#include <Columns/ColumnDecimal.h>
|
|
|
|
#include <Columns/ColumnString.h>
|
2019-12-02 15:26:59 +00:00
|
|
|
#include <Common/Arena.h>
|
|
|
|
#include <boost/geometry.hpp>
|
|
|
|
#include <boost/geometry/geometries/multi_polygon.hpp>
|
|
|
|
|
|
|
|
#include "DictionaryStructure.h"
|
|
|
|
#include "IDictionary.h"
|
|
|
|
#include "IDictionarySource.h"
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace bg = boost::geometry;
|
|
|
|
|
2019-12-25 17:32:02 +00:00
|
|
|
/** An interface for polygon dictionaries.
|
|
|
|
* Polygons are read and stored as multi_polygons from boost::geometry in Euclidean coordinates.
|
|
|
|
* An implementation should inherit from this base class and preprocess the data upon construction if needed.
|
|
|
|
* It must override the find method of this class which retrieves the polygon containing a single point.
|
|
|
|
*/
|
2019-12-02 15:26:59 +00:00
|
|
|
class IPolygonDictionary : public IDictionaryBase
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
IPolygonDictionary(
|
2020-01-14 14:40:34 +00:00
|
|
|
const std::string & database_,
|
2019-12-02 15:26:59 +00:00
|
|
|
const std::string & name_,
|
|
|
|
const DictionaryStructure & dict_struct_,
|
|
|
|
DictionarySourcePtr source_ptr_,
|
|
|
|
DictionaryLifetime dict_lifetime_);
|
|
|
|
|
2020-01-14 14:40:34 +00:00
|
|
|
const std::string & getDatabase() const override;
|
|
|
|
const std::string & getName() const override;
|
|
|
|
const std::string & getFullName() const override;
|
2019-12-02 15:26:59 +00:00
|
|
|
|
|
|
|
std::string getTypeName() const override;
|
|
|
|
|
2019-12-16 15:46:51 +00:00
|
|
|
std::string getKeyDescription() const;
|
|
|
|
|
2019-12-02 15:26:59 +00:00
|
|
|
size_t getBytesAllocated() const override;
|
|
|
|
|
|
|
|
size_t getQueryCount() const override;
|
|
|
|
|
|
|
|
double getHitRate() const override;
|
|
|
|
|
|
|
|
size_t getElementCount() const override;
|
|
|
|
|
|
|
|
double getLoadFactor() const override;
|
|
|
|
|
|
|
|
const IDictionarySource * getSource() const override;
|
|
|
|
|
|
|
|
const DictionaryStructure & getStructure() const override;
|
|
|
|
|
|
|
|
const DictionaryLifetime & getLifetime() const override;
|
|
|
|
|
|
|
|
bool isInjective(const std::string & attribute_name) const override;
|
|
|
|
|
|
|
|
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
|
|
|
|
2019-12-23 13:23:11 +00:00
|
|
|
template <typename T>
|
|
|
|
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
|
|
|
|
2019-12-24 18:21:50 +00:00
|
|
|
/** Functions used to retrieve attributes of specific type by key. */
|
|
|
|
|
2019-12-23 13:23:11 +00:00
|
|
|
#define DECLARE(TYPE) \
|
|
|
|
void get##TYPE( \
|
2019-12-23 13:39:17 +00:00
|
|
|
const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType<TYPE> & out) const;
|
2019-12-23 13:23:11 +00:00
|
|
|
DECLARE(UInt8)
|
|
|
|
DECLARE(UInt16)
|
|
|
|
DECLARE(UInt32)
|
|
|
|
DECLARE(UInt64)
|
|
|
|
DECLARE(UInt128)
|
|
|
|
DECLARE(Int8)
|
|
|
|
DECLARE(Int16)
|
|
|
|
DECLARE(Int32)
|
|
|
|
DECLARE(Int64)
|
|
|
|
DECLARE(Float32)
|
|
|
|
DECLARE(Float64)
|
|
|
|
DECLARE(Decimal32)
|
|
|
|
DECLARE(Decimal64)
|
|
|
|
DECLARE(Decimal128)
|
|
|
|
#undef DECLARE
|
|
|
|
|
2019-12-23 13:39:17 +00:00
|
|
|
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ColumnString * out) const;
|
2019-12-23 13:23:11 +00:00
|
|
|
|
|
|
|
#define DECLARE(TYPE) \
|
|
|
|
void get##TYPE( \
|
|
|
|
const std::string & attribute_name, \
|
|
|
|
const Columns & key_columns, \
|
2019-12-23 13:39:17 +00:00
|
|
|
const DataTypes &, \
|
2019-12-23 13:23:11 +00:00
|
|
|
const PaddedPODArray<TYPE> & def, \
|
|
|
|
ResultArrayType<TYPE> & out) const;
|
|
|
|
DECLARE(UInt8)
|
|
|
|
DECLARE(UInt16)
|
|
|
|
DECLARE(UInt32)
|
|
|
|
DECLARE(UInt64)
|
|
|
|
DECLARE(UInt128)
|
|
|
|
DECLARE(Int8)
|
|
|
|
DECLARE(Int16)
|
|
|
|
DECLARE(Int32)
|
|
|
|
DECLARE(Int64)
|
|
|
|
DECLARE(Float32)
|
|
|
|
DECLARE(Float64)
|
|
|
|
DECLARE(Decimal32)
|
|
|
|
DECLARE(Decimal64)
|
|
|
|
DECLARE(Decimal128)
|
|
|
|
#undef DECLARE
|
|
|
|
|
2019-12-23 13:39:17 +00:00
|
|
|
void getString(
|
|
|
|
const std::string & attribute_name,
|
|
|
|
const Columns & key_columns,
|
|
|
|
const DataTypes &,
|
|
|
|
const ColumnString * const def,
|
|
|
|
ColumnString * const out) const;
|
2019-12-23 13:23:11 +00:00
|
|
|
|
|
|
|
#define DECLARE(TYPE) \
|
|
|
|
void get##TYPE( \
|
|
|
|
const std::string & attribute_name, \
|
|
|
|
const Columns & key_columns, \
|
2019-12-23 13:39:17 +00:00
|
|
|
const DataTypes &, \
|
2019-12-23 13:23:11 +00:00
|
|
|
const TYPE def, \
|
|
|
|
ResultArrayType<TYPE> & out) const;
|
|
|
|
DECLARE(UInt8)
|
|
|
|
DECLARE(UInt16)
|
|
|
|
DECLARE(UInt32)
|
|
|
|
DECLARE(UInt64)
|
|
|
|
DECLARE(UInt128)
|
|
|
|
DECLARE(Int8)
|
|
|
|
DECLARE(Int16)
|
|
|
|
DECLARE(Int32)
|
|
|
|
DECLARE(Int64)
|
|
|
|
DECLARE(Float32)
|
|
|
|
DECLARE(Float64)
|
|
|
|
DECLARE(Decimal32)
|
|
|
|
DECLARE(Decimal64)
|
|
|
|
DECLARE(Decimal128)
|
|
|
|
#undef DECLARE
|
|
|
|
|
2019-12-24 18:21:50 +00:00
|
|
|
void getString(
|
|
|
|
const std::string & attribute_name,
|
|
|
|
const Columns & key_columns,
|
|
|
|
const DataTypes & key_types,
|
|
|
|
const String & def,
|
|
|
|
ColumnString * const out) const;
|
2019-12-23 13:23:11 +00:00
|
|
|
|
2019-12-24 18:21:50 +00:00
|
|
|
/** Checks whether or not a point can be found in one of the polygons in the dictionary.
|
|
|
|
* The check is performed for multiple points represented by columns of their x and y coordinates.
|
|
|
|
* The boolean result is written to out.
|
|
|
|
*/
|
|
|
|
// TODO: Refactor the whole dictionary design to perform stronger checks, i.e. make this an override.
|
2019-12-16 15:34:46 +00:00
|
|
|
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
|
2019-12-02 15:26:59 +00:00
|
|
|
|
|
|
|
protected:
|
2019-12-25 17:32:02 +00:00
|
|
|
/** A simple two-dimensional point in Euclidean coordinates. */
|
2019-12-02 15:26:59 +00:00
|
|
|
using Point = bg::model::point<Float64, 2, bg::cs::cartesian>;
|
2019-12-25 17:32:02 +00:00
|
|
|
/** A polygon in boost is a an outer ring of points with zero or more cut out inner rings. */
|
2019-12-02 15:26:59 +00:00
|
|
|
using Polygon = bg::model::polygon<Point>;
|
2019-12-25 17:32:02 +00:00
|
|
|
/** A multi_polygon in boost is a collection of polygons. */
|
2019-12-02 15:26:59 +00:00
|
|
|
using MultiPolygon = bg::model::multi_polygon<Polygon>;
|
|
|
|
|
2019-12-24 18:21:50 +00:00
|
|
|
/** Returns true if the given point can be found in the polygon dictionary.
|
2019-12-25 17:32:02 +00:00
|
|
|
* If true id is set to the index of a polygon containing the given point.
|
2019-12-24 18:21:50 +00:00
|
|
|
* Overridden in different implementations of this interface.
|
|
|
|
*/
|
2019-12-02 15:26:59 +00:00
|
|
|
virtual bool find(const Point & point, size_t & id) const = 0;
|
|
|
|
|
2019-12-24 18:21:50 +00:00
|
|
|
std::vector<MultiPolygon> polygons;
|
|
|
|
|
2020-01-14 14:40:34 +00:00
|
|
|
const std::string database;
|
2019-12-16 15:24:26 +00:00
|
|
|
const std::string name;
|
2020-01-14 14:40:34 +00:00
|
|
|
const std::string full_name;
|
2019-12-16 15:24:26 +00:00
|
|
|
const DictionaryStructure dict_struct;
|
|
|
|
const DictionarySourcePtr source_ptr;
|
|
|
|
const DictionaryLifetime dict_lifetime;
|
|
|
|
|
2019-12-16 15:11:16 +00:00
|
|
|
private:
|
2019-12-24 18:21:50 +00:00
|
|
|
/** Helper functions for loading the data from the configuration.
|
|
|
|
* The polygons serving as keys are extracted into boost types.
|
|
|
|
* All other values are stored in one column per attribute.
|
|
|
|
*/
|
2019-12-02 15:26:59 +00:00
|
|
|
void createAttributes();
|
|
|
|
void blockToAttributes(const Block & block);
|
|
|
|
void loadData();
|
|
|
|
|
|
|
|
void calculateBytesAllocated();
|
|
|
|
|
2019-12-24 18:21:50 +00:00
|
|
|
/** Checks whether a given attribute exists and returns its index */
|
2019-12-23 13:23:11 +00:00
|
|
|
size_t getAttributeIndex(const std::string & attribute_name) const;
|
2019-12-24 18:21:50 +00:00
|
|
|
|
2019-12-27 13:06:03 +00:00
|
|
|
/** Helper functions to retrieve and instantiate the provided null value of an attribute.
|
|
|
|
* Since a null value is obligatory for every attribute they are simply appended to null_values defined below.
|
|
|
|
*/
|
2019-12-23 13:23:11 +00:00
|
|
|
template <typename T>
|
2019-12-26 15:21:49 +00:00
|
|
|
void appendNullValueImpl(const Field & null_value);
|
|
|
|
void appendNullValue(AttributeUnderlyingType type, const Field & value);
|
2019-12-23 13:23:11 +00:00
|
|
|
|
2019-12-24 18:21:50 +00:00
|
|
|
/** Helper function for retrieving the value of an attribute by key. */
|
2019-12-23 13:23:11 +00:00
|
|
|
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
|
|
|
void getItemsImpl(size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
|
|
|
|
|
2019-12-27 13:06:03 +00:00
|
|
|
/** A mapping from the names of the attributes to their index in the two vectors defined below. */
|
2019-12-24 18:21:50 +00:00
|
|
|
std::map<std::string, size_t> attribute_index_by_name;
|
2019-12-27 13:06:03 +00:00
|
|
|
/** A vector of columns storing the values of each attribute. */
|
2019-12-23 13:23:11 +00:00
|
|
|
Columns attributes;
|
2019-12-27 13:06:03 +00:00
|
|
|
/** A vector of null values corresponding to each attribute. */
|
2019-12-26 15:21:49 +00:00
|
|
|
std::vector<std::variant<
|
|
|
|
UInt8,
|
|
|
|
UInt16,
|
|
|
|
UInt32,
|
|
|
|
UInt64,
|
|
|
|
UInt128,
|
|
|
|
Int8,
|
|
|
|
Int16,
|
|
|
|
Int32,
|
|
|
|
Int64,
|
|
|
|
Decimal32,
|
|
|
|
Decimal64,
|
|
|
|
Decimal128,
|
|
|
|
Float32,
|
|
|
|
Float64,
|
|
|
|
String>> null_values;
|
2019-12-02 15:26:59 +00:00
|
|
|
|
|
|
|
size_t bytes_allocated = 0;
|
|
|
|
size_t element_count = 0;
|
|
|
|
mutable std::atomic<size_t> query_count{0};
|
2020-01-14 14:40:34 +00:00
|
|
|
|
|
|
|
static void extractMultiPolygons(const ColumnPtr & column, std::vector<MultiPolygon> & dest);
|
2019-12-02 15:26:59 +00:00
|
|
|
|
2019-12-24 18:21:50 +00:00
|
|
|
/** Extracts a list of points from two columns representing their x and y coordinates. */
|
2019-12-23 13:23:11 +00:00
|
|
|
static std::vector<Point> extractPoints(const Columns &key_columns);
|
2019-12-24 18:21:50 +00:00
|
|
|
|
|
|
|
/** Converts an array containing two Float64s to a point. */
|
2019-12-02 15:26:59 +00:00
|
|
|
static Point fieldToPoint(const Field & field);
|
2019-12-24 18:21:50 +00:00
|
|
|
|
|
|
|
/** Converts an array of arrays of points to a polygon. The first array represents the outer ring and zero or more
|
|
|
|
* following arrays represent the rings that are excluded from the polygon.
|
|
|
|
*/
|
2019-12-02 15:26:59 +00:00
|
|
|
static Polygon fieldToPolygon(const Field & field);
|
2019-12-24 18:21:50 +00:00
|
|
|
|
|
|
|
/** Converts an array of polygons (see above) to a multi-polygon. */
|
2019-12-02 15:26:59 +00:00
|
|
|
static MultiPolygon fieldToMultiPolygon(const Field & field);
|
|
|
|
|
2019-12-24 18:21:50 +00:00
|
|
|
/** The number of dimensions used. Change with great caution. */
|
2019-12-02 15:26:59 +00:00
|
|
|
static constexpr size_t DIM = 2;
|
|
|
|
};
|
|
|
|
|
2019-12-25 17:32:02 +00:00
|
|
|
/** Simple implementation of the polygon dictionary. Doesn't generate anything during its construction.
|
2019-12-24 18:21:50 +00:00
|
|
|
* Iterates over all stored polygons for each query, checking each of them in linear time.
|
2019-12-25 17:32:02 +00:00
|
|
|
* Retrieves the first polygon in the dictionary containing a given point.
|
2019-12-24 18:21:50 +00:00
|
|
|
*/
|
2019-12-02 15:26:59 +00:00
|
|
|
class SimplePolygonDictionary : public IPolygonDictionary
|
|
|
|
{
|
2019-12-16 15:11:16 +00:00
|
|
|
public:
|
|
|
|
SimplePolygonDictionary(
|
|
|
|
const std::string & name_,
|
|
|
|
const DictionaryStructure & dict_struct_,
|
|
|
|
DictionarySourcePtr source_ptr_,
|
|
|
|
DictionaryLifetime dict_lifetime_);
|
2019-12-16 15:24:26 +00:00
|
|
|
|
|
|
|
std::shared_ptr<const IExternalLoadable> clone() const override;
|
|
|
|
|
2019-12-02 15:26:59 +00:00
|
|
|
private:
|
|
|
|
bool find(const Point & point, size_t & id) const override;
|
|
|
|
};
|
|
|
|
|
2019-12-25 22:02:55 +00:00
|
|
|
}
|
|
|
|
|