Merge pull request #9278 from achulkov2/polygon-dict-grids

Polygon dictionaries with grids
This commit is contained in:
Anton Popov 2020-07-29 21:32:35 +03:00 committed by GitHub
commit cf505a92ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 3314 additions and 477 deletions

View File

@ -0,0 +1,86 @@
# Cловари полигонов {#slovari-polygonov}
Словари полигонов позволяют эффективно искать полигон, в который попадают данные точки, среди множества полигонов.
Для примера: определение района города по географическим координатам.
Пример конфигурации:
``` xml
<dictionary>
<structure>
<key>
<name>key</name>
<type>Array(Array(Array(Array(Float64))))</type>
</key>
<attribute>
<name>name</name>
<type>String</type>
<null_value></null_value>
</attribute>
<attribute>
<name>value</name>
<type>UInt64</type>
<null_value>0</null_value>
</attribute>
</structure>
<layout>
<polygon />
</layout>
</dictionary>
```
Соответствущий [DDL-запрос](../../../sql-reference/statements/create.md#create-dictionary-query):
``` sql
CREATE DICTIONARY polygon_dict_name (
key Array(Array(Array(Array(Float64)))),
name String,
value UInt64
)
PRIMARY KEY key
LAYOUT(POLYGON())
...
```
При конфигурации словаря полигонов ключ должен иметь один из двух типов:
- Простой полигон. Представляет из себя массив точек.
- Мультиполигон. Представляет из себя массив полигонов. Каждый полигон задается двумерным массивом точек — первый элемент этого массива задает внешнюю границу полигона,
последующие элементы могут задавать дырки, вырезаемые из него.
Точки могут задаваться массивом или кортежем из своих координат. В текущей реализации поддерживается только двумерные точки.
Пользователь может [загружать свои собственные данные](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) во всех поддерживаемых ClickHouse форматах.
Доступно 3 типа [хранения данных в памяти](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md):
- POLYGON_SIMPLE. Это наивная реализация, в которой на каждый запрос делается линейный проход по всем полигонам, и для каждого проверяется принадлежность без использования дополнительных индексов.
- POLYGON_INDEX_EACH. Для каждого полигона строится отдельный индекс, который позволяет быстро проверять принадлежность в большинстве случаев (оптимизирован под географические регионы).
Также на рассматриваемую область накладывается сетка, которая значительно сужает количество рассматриваемых полигонов.
Сетка строится рекурсивным делением ячейки на 16 равных частей и конфигурируется двумя параметрами.
Деление прекращается при достижении глубины рекурсии MAX_DEPTH или в тот момент, когда ячейку пересекают не более MIN_INTERSECTIONS полигонов.
Для ответа на запрос находится соответствующая ячейка, и происходит поочередное обращение к индексу для сохранных в ней полигонов.
- POLYGON_INDEX_CELL. В этом размещении также строится сетка, описанная выше. Доступны такие же параметры. Для каждой ячейки-листа строится индекс на всех попадающих в неё кусках полигонов, который позволяет быстро отвечать на запрос.
- POLYGON. Синоним к POLYGON_INDEX_CELL.
Запросы к словарю осуществляются с помощью стандартных [функций](../../../sql-reference/functions/ext-dict-functions.md) для работы со внешними словарями.
Важным отличием является то, что здесь ключами будут являются точки, для которых хочется найти содержащий их полигон.
Пример работы со словарем, определенным выше:
``` sql
CREATE TABLE points (
x Float64,
y Float64
)
...
SELECT tuple(x, y) AS key, dictGet(dict_name, 'name', key), dictGet(dict_name, 'value', key) FROM points ORDER BY x, y;
```
В результате исполнения последней команды для каждой точки в таблице `points` будет найден полигон минимальной площади, содержащий данную точку, и выведены запрошенные аттрибуты.

View File

@ -1,12 +1,11 @@
#include <ext/map.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include "PolygonDictionary.h"
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
#include <Columns/ColumnArray.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeArray.h>
#include <numeric>
namespace DB
@ -191,7 +190,7 @@ void IPolygonDictionary::createAttributes()
}
}
void IPolygonDictionary::blockToAttributes(const DB::Block &block)
void IPolygonDictionary::blockToAttributes(const DB::Block & block)
{
const auto rows = block.rows();
element_count += rows;
@ -222,8 +221,31 @@ void IPolygonDictionary::loadData()
blockToAttributes(block);
stream->readSuffix();
for (auto & polygon : polygons)
std::vector<double> areas;
areas.reserve(polygons.size());
std::vector<std::pair<Polygon, size_t>> polygon_ids;
polygon_ids.reserve(polygons.size());
for (size_t i = 0; i < polygons.size(); ++i)
{
auto & polygon = polygons[i];
bg::correct(polygon);
areas.push_back(bg::area(polygon));
polygon_ids.emplace_back(polygon, i);
}
sort(polygon_ids.begin(), polygon_ids.end(), [& areas](const auto & lhs, const auto & rhs)
{
return areas[lhs.second] < areas[rhs.second];
});
std::vector<size_t> correct_ids;
correct_ids.reserve(polygon_ids.size());
for (size_t i = 0; i < polygon_ids.size(); ++i)
{
auto & polygon = polygon_ids[i];
correct_ids.emplace_back(ids[polygon.second]);
polygons[i] = polygon.first;
}
ids = correct_ids;
}
void IPolygonDictionary::calculateBytesAllocated()
@ -233,7 +255,7 @@ void IPolygonDictionary::calculateBytesAllocated()
bytes_allocated += column->allocatedBytes();
}
std::vector<IPolygonDictionary::Point> IPolygonDictionary::extractPoints(const Columns &key_columns)
std::vector<IPolygonDictionary::Point> IPolygonDictionary::extractPoints(const Columns & key_columns)
{
if (key_columns.size() != 2)
throw Exception{"Expected two columns of coordinates", ErrorCodes::BAD_ARGUMENTS};
@ -249,7 +271,7 @@ std::vector<IPolygonDictionary::Point> IPolygonDictionary::extractPoints(const C
return result;
}
void IPolygonDictionary::has(const Columns &key_columns, const DataTypes &, PaddedPODArray<UInt8> &out) const
void IPolygonDictionary::has(const Columns & key_columns, const DataTypes &, PaddedPODArray<UInt8> & out) const
{
size_t row = 0;
for (const auto & pt : extractPoints(key_columns))
@ -505,7 +527,7 @@ struct Data
ids.push_back((ids.empty() ? 0 : ids.back() + new_multi_polygon));
}
void addPoint(Float64 x, Float64 y)
void addPoint(IPolygonDictionary::Coord x, IPolygonDictionary::Coord y)
{
auto & last_polygon = dest.back();
auto & last_ring = (last_polygon.inners().empty() ? last_polygon.outer() : last_polygon.inners().back());
@ -513,7 +535,7 @@ struct Data
}
};
void addNewPoint(Float64 x, Float64 y, Data & data, Offset & offset)
void addNewPoint(IPolygonDictionary::Coord x, IPolygonDictionary::Coord y, Data & data, Offset & offset)
{
if (offset.atLastPointOfRing())
{
@ -600,7 +622,7 @@ void handlePointsReprByTuples(const IColumn * column, Data & data, Offset & offs
}
void IPolygonDictionary::extractPolygons(const ColumnPtr &column)
void IPolygonDictionary::extractPolygons(const ColumnPtr & column)
{
Data data = {polygons, ids};
Offset offset;
@ -634,114 +656,5 @@ void IPolygonDictionary::extractPolygons(const ColumnPtr &column)
}
}
SimplePolygonDictionary::SimplePolygonDictionary(
const std::string & database_,
const std::string & name_,
const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_,
const DictionaryLifetime dict_lifetime_,
InputType input_type_,
PointType point_type_)
: IPolygonDictionary(database_, name_, dict_struct_, std::move(source_ptr_), dict_lifetime_, input_type_, point_type_)
{
}
std::shared_ptr<const IExternalLoadable> SimplePolygonDictionary::clone() const
{
return std::make_shared<SimplePolygonDictionary>(
this->database,
this->name,
this->dict_struct,
this->source_ptr->clone(),
this->dict_lifetime,
this->input_type,
this->point_type);
}
bool SimplePolygonDictionary::find(const Point &point, size_t & id) const
{
bool found = false;
double area = 0;
for (size_t i = 0; i < (this->polygons).size(); ++i)
{
if (bg::covered_by(point, (this->polygons)[i]))
{
double new_area = bg::area((this->polygons)[i]);
if (!found || new_area < area)
{
found = true;
id = i;
area = new_area;
}
}
}
return found;
}
void registerDictionaryPolygon(DictionaryFactory & factory)
{
auto create_layout = [=](const std::string &,
const DictionaryStructure & dict_struct,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
DictionarySourcePtr source_ptr) -> DictionaryPtr
{
const String database = config.getString(config_prefix + ".database", "");
const String name = config.getString(config_prefix + ".name");
if (!dict_struct.key)
throw Exception{"'key' is required for a dictionary of layout 'polygon'", ErrorCodes::BAD_ARGUMENTS};
if (dict_struct.key->size() != 1)
throw Exception{"The 'key' should consist of a single attribute for a dictionary of layout 'polygon'",
ErrorCodes::BAD_ARGUMENTS};
IPolygonDictionary::InputType input_type;
IPolygonDictionary::PointType point_type;
const auto key_type = (*dict_struct.key)[0].type;
const auto f64 = std::make_shared<DataTypeFloat64>();
const auto multi_polygon_array = DataTypeArray(std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(f64))));
const auto multi_polygon_tuple = DataTypeArray(std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(std::vector<DataTypePtr>{f64, f64}))));
const auto simple_polygon_array = DataTypeArray(std::make_shared<DataTypeArray>(f64));
const auto simple_polygon_tuple = DataTypeArray(std::make_shared<DataTypeTuple>(std::vector<DataTypePtr>{f64, f64}));
if (key_type->equals(multi_polygon_array))
{
input_type = IPolygonDictionary::InputType::MultiPolygon;
point_type = IPolygonDictionary::PointType::Array;
}
else if (key_type->equals(multi_polygon_tuple))
{
input_type = IPolygonDictionary::InputType::MultiPolygon;
point_type = IPolygonDictionary::PointType::Tuple;
}
else if (key_type->equals(simple_polygon_array))
{
input_type = IPolygonDictionary::InputType::SimplePolygon;
point_type = IPolygonDictionary::PointType::Array;
}
else if (key_type->equals(simple_polygon_tuple))
{
input_type = IPolygonDictionary::InputType::SimplePolygon;
point_type = IPolygonDictionary::PointType::Tuple;
}
else
throw Exception{"The key type " + key_type->getName() +
" is not one of the following allowed types for a dictionary of layout 'polygon': " +
multi_polygon_array.getName() + " " +
multi_polygon_tuple.getName() + " " +
simple_polygon_array.getName() + " " +
simple_polygon_tuple.getName() + " ",
ErrorCodes::BAD_ARGUMENTS};
if (dict_struct.range_min || dict_struct.range_max)
throw Exception{name
+ ": elements range_min and range_max should be defined only "
"for a dictionary of layout 'range_hashed'",
ErrorCodes::BAD_ARGUMENTS};
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
return std::make_unique<SimplePolygonDictionary>(database, name, dict_struct, std::move(source_ptr), dict_lifetime, input_type, point_type);
};
factory.registerLayout("polygon", create_layout, true);
}
}

View File

@ -19,10 +19,10 @@ namespace DB
namespace bg = boost::geometry;
/** An interface for polygon dictionaries.
* Polygons are read and stored as multi_polygons from boost::geometry in Euclidean coordinates.
* An implementation should inherit from this base class and preprocess the data upon construction if needed.
* It must override the find method of this class which retrieves the polygon containing a single point.
*/
* Polygons are read and stored as multi_polygons from boost::geometry in Euclidean coordinates.
* An implementation should inherit from this base class and preprocess the data upon construction if needed.
* It must override the find method of this class which retrieves the polygon containing a single point.
*/
class IPolygonDictionary : public IDictionaryBase
{
public:
@ -41,8 +41,8 @@ public:
SimplePolygon
};
/** Controls the different types allowed for providing the coordinates of points.
* Right now a point can be represented by either an array or a tuple of two Float64 values.
*/
* Right now a point can be represented by either an array or a tuple of two Float64 values.
*/
enum class PointType
{
Array,
@ -178,10 +178,14 @@ public:
// TODO: Refactor the whole dictionary design to perform stronger checks, i.e. make this an override.
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
/** Single coordinate type. */
using Coord = Float32;
/** A two-dimensional point in Euclidean coordinates. */
using Point = bg::model::point<Float64, 2, bg::cs::cartesian>;
using Point = bg::model::d2::point_xy<Coord, bg::cs::cartesian>;
/** A polygon in boost is a an outer ring of points with zero or more cut out inner rings. */
using Polygon = bg::model::polygon<Point>;
/** A ring in boost used for describing the polygons. */
using Ring = bg::model::ring<Point>;
protected:
/** Returns true if the given point can be found in the polygon dictionary.
@ -266,28 +270,5 @@ private:
static std::vector<Point> extractPoints(const Columns &key_columns);
};
/** Simple implementation of the polygon dictionary. Doesn't generate anything during its construction.
* Iterates over all stored polygons for each query, checking each of them in linear time.
* Retrieves the polygon with the smallest area containing the given point. If there is more than one any such polygon
* may be returned.
*/
class SimplePolygonDictionary : public IPolygonDictionary
{
public:
SimplePolygonDictionary(
const std::string & database_,
const std::string & name_,
const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_,
DictionaryLifetime dict_lifetime_,
InputType input_type_,
PointType point_type_);
std::shared_ptr<const IExternalLoadable> clone() const override;
private:
bool find(const Point & point, size_t & id) const override;
};
}

View File

@ -0,0 +1,255 @@
#include "PolygonDictionaryImplementations.h"
#include "DictionaryFactory.h"
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <common/logger_useful.h>
#include <numeric>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
PolygonDictionarySimple::PolygonDictionarySimple(
const std::string & database_,
const std::string & name_,
const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_,
const DictionaryLifetime dict_lifetime_,
InputType input_type_,
PointType point_type_):
IPolygonDictionary(database_, name_, dict_struct_, std::move(source_ptr_), dict_lifetime_, input_type_, point_type_)
{
}
std::shared_ptr<const IExternalLoadable> PolygonDictionarySimple::clone() const
{
return std::make_shared<PolygonDictionarySimple>(
this->database,
this->name,
this->dict_struct,
this->source_ptr->clone(),
this->dict_lifetime,
this->input_type,
this->point_type);
}
bool PolygonDictionarySimple::find(const Point & point, size_t & id) const
{
bool found = false;
for (size_t i = 0; i < polygons.size(); ++i)
{
if (bg::covered_by(point, polygons[i]))
{
id = i;
found = true;
break;
}
}
return found;
}
PolygonDictionaryIndexEach::PolygonDictionaryIndexEach(
const std::string & database_,
const std::string & name_,
const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_,
const DictionaryLifetime dict_lifetime_,
InputType input_type_,
PointType point_type_,
int min_intersections_,
int max_depth_)
: IPolygonDictionary(database_, name_, dict_struct_, std::move(source_ptr_), dict_lifetime_, input_type_, point_type_),
grid(min_intersections_, max_depth_, polygons),
min_intersections(min_intersections_),
max_depth(max_depth_)
{
buckets.reserve(polygons.size());
for (const auto & polygon : polygons)
{
std::vector<Polygon> single;
single.emplace_back(polygon);
buckets.emplace_back(single);
}
}
std::shared_ptr<const IExternalLoadable> PolygonDictionaryIndexEach::clone() const
{
return std::make_shared<PolygonDictionaryIndexEach>(
this->database,
this->name,
this->dict_struct,
this->source_ptr->clone(),
this->dict_lifetime,
this->input_type,
this->point_type,
this->min_intersections,
this->max_depth);
}
bool PolygonDictionaryIndexEach::find(const Point & point, size_t & id) const
{
const auto * cell = grid.find(point.x(), point.y());
if (cell)
{
for (const auto & candidate : cell->polygon_ids)
{
size_t unused;
if (buckets[candidate].find(point, unused))
{
id = candidate;
return true;
}
}
if (cell->first_covered != FinalCell::kNone)
{
id = cell->first_covered;
return true;
}
}
return false;
}
PolygonDictionaryIndexCell::PolygonDictionaryIndexCell(
const std::string & database_,
const std::string & name_,
const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_,
const DictionaryLifetime dict_lifetime_,
InputType input_type_,
PointType point_type_,
size_t min_intersections_,
size_t max_depth_)
: IPolygonDictionary(database_, name_, dict_struct_, std::move(source_ptr_), dict_lifetime_, input_type_, point_type_),
index(min_intersections_, max_depth_, polygons),
min_intersections(min_intersections_),
max_depth(max_depth_)
{
}
std::shared_ptr<const IExternalLoadable> PolygonDictionaryIndexCell::clone() const
{
return std::make_shared<PolygonDictionaryIndexCell>(
this->database,
this->name,
this->dict_struct,
this->source_ptr->clone(),
this->dict_lifetime,
this->input_type,
this->point_type,
this->min_intersections,
this->max_depth);
}
bool PolygonDictionaryIndexCell::find(const Point & point, size_t & id) const
{
const auto * cell = index.find(point.x(), point.y());
if (cell)
{
if (!(cell->corresponding_ids).empty() && cell->index.find(point, id))
{
id = cell->corresponding_ids[id];
return true;
}
if (cell->first_covered != FinalCellWithSlabs::kNone)
{
id = cell->first_covered;
return true;
}
}
return false;
}
template <class PolygonDictionary>
DictionaryPtr createLayout(const std::string & ,
const DictionaryStructure & dict_struct,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
DictionarySourcePtr source_ptr)
{
const String database = config.getString(config_prefix + ".database", "");
const String name = config.getString(config_prefix + ".name");
if (!dict_struct.key)
throw Exception{"'key' is required for a polygon dictionary", ErrorCodes::BAD_ARGUMENTS};
if (dict_struct.key->size() != 1)
throw Exception{"The 'key' should consist of a single attribute for a polygon dictionary",
ErrorCodes::BAD_ARGUMENTS};
IPolygonDictionary::InputType input_type;
IPolygonDictionary::PointType point_type;
const auto key_type = (*dict_struct.key)[0].type;
const auto f64 = std::make_shared<DataTypeFloat64>();
const auto multi_polygon_array = DataTypeArray(std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(f64))));
const auto multi_polygon_tuple = DataTypeArray(std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(std::vector<DataTypePtr>{f64, f64}))));
const auto simple_polygon_array = DataTypeArray(std::make_shared<DataTypeArray>(f64));
const auto simple_polygon_tuple = DataTypeArray(std::make_shared<DataTypeTuple>(std::vector<DataTypePtr>{f64, f64}));
if (key_type->equals(multi_polygon_array))
{
input_type = IPolygonDictionary::InputType::MultiPolygon;
point_type = IPolygonDictionary::PointType::Array;
}
else if (key_type->equals(multi_polygon_tuple))
{
input_type = IPolygonDictionary::InputType::MultiPolygon;
point_type = IPolygonDictionary::PointType::Tuple;
}
else if (key_type->equals(simple_polygon_array))
{
input_type = IPolygonDictionary::InputType::SimplePolygon;
point_type = IPolygonDictionary::PointType::Array;
}
else if (key_type->equals(simple_polygon_tuple))
{
input_type = IPolygonDictionary::InputType::SimplePolygon;
point_type = IPolygonDictionary::PointType::Tuple;
}
else
throw Exception{"The key type " + key_type->getName() +
" is not one of the following allowed types for a polygon dictionary: " +
multi_polygon_array.getName() + " " +
multi_polygon_tuple.getName() + " " +
simple_polygon_array.getName() + " " +
simple_polygon_tuple.getName() + " ",
ErrorCodes::BAD_ARGUMENTS};
if (dict_struct.range_min || dict_struct.range_max)
throw Exception{name
+ ": elements range_min and range_max should be defined only "
"for a dictionary of layout 'range_hashed'",
ErrorCodes::BAD_ARGUMENTS};
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
if constexpr (std::is_same_v<PolygonDictionary, PolygonDictionaryIndexEach> || std::is_same_v<PolygonDictionary, PolygonDictionaryIndexCell>)
{
const auto & layout_prefix = config_prefix + ".layout";
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(layout_prefix, keys);
const auto & dict_prefix = layout_prefix + "." + keys.front();
size_t max_depth = config.getUInt(dict_prefix + ".max_depth", PolygonDictionary::kMaxDepthDefault);
size_t min_intersections = config.getUInt(dict_prefix + ".min_intersections", PolygonDictionary::kMinIntersectionsDefault);
return std::make_unique<PolygonDictionary>(database, name, dict_struct, std::move(source_ptr), dict_lifetime, input_type, point_type, min_intersections, max_depth);
}
else
return std::make_unique<PolygonDictionary>(database, name, dict_struct, std::move(source_ptr), dict_lifetime, input_type, point_type);
}
void registerDictionaryPolygon(DictionaryFactory & factory)
{
factory.registerLayout("polygon_simple", createLayout<PolygonDictionarySimple>, true);
factory.registerLayout("polygon_index_each", createLayout<PolygonDictionaryIndexEach>, true);
factory.registerLayout("polygon_index_cell", createLayout<PolygonDictionaryIndexCell>, true);
/// Alias to the most performant dictionary type - polygon_index_cell
factory.registerLayout("polygon", createLayout<PolygonDictionaryIndexCell>, true);
}
}

View File

@ -0,0 +1,99 @@
#pragma once
#include "PolygonDictionary.h"
#include "PolygonDictionaryUtils.h"
#include <vector>
namespace DB
{
/** Simple implementation of the polygon dictionary. Doesn't generate anything during its construction.
* Iterates over all stored polygons for each query, checking each of them in linear time.
* Retrieves the polygon with the smallest area containing the given point.
* If there is more than one any such polygon may be returned.
*/
class PolygonDictionarySimple : public IPolygonDictionary
{
public:
PolygonDictionarySimple(
const std::string & database_,
const std::string & name_,
const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_,
DictionaryLifetime dict_lifetime_,
InputType input_type_,
PointType point_type_);
std::shared_ptr<const IExternalLoadable> clone() const override;
private:
bool find(const Point & point, size_t & id) const override;
};
/** A polygon dictionary which generates a recursive grid in order to efficiently cut the number
* of polygons to be checked for a given point.
* For more detail see the GridRoot and FinalCell classes.
* Separately, a slab index is built for each individual polygon. This allows to check the
* candidates more efficiently.
*/
class PolygonDictionaryIndexEach : public IPolygonDictionary
{
public:
PolygonDictionaryIndexEach(
const std::string & database_,
const std::string & name_,
const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_,
DictionaryLifetime dict_lifetime_,
InputType input_type_,
PointType point_type_,
int min_intersections_,
int max_depth_);
std::shared_ptr<const IExternalLoadable> clone() const override;
static constexpr size_t kMinIntersectionsDefault = 1;
static constexpr size_t kMaxDepthDefault = 5;
private:
bool find(const Point & point, size_t & id) const override;
std::vector<SlabsPolygonIndex> buckets;
GridRoot<FinalCell> grid;
const size_t min_intersections;
const size_t max_depth;
};
/** Uses single SlabsPolygonIndex for all queries. */
class PolygonDictionaryIndexCell : public IPolygonDictionary
{
public:
PolygonDictionaryIndexCell(
const std::string & database_,
const std::string & name_,
const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_,
DictionaryLifetime dict_lifetime_,
InputType input_type_,
PointType point_type_,
size_t min_intersections_,
size_t max_depth_);
std::shared_ptr<const IExternalLoadable> clone() const override;
static constexpr size_t kMinIntersectionsDefault = 1;
static constexpr size_t kMaxDepthDefault = 5;
private:
bool find(const Point & point, size_t & id) const override;
GridRoot<FinalCellWithSlabs> index;
const size_t min_intersections;
const size_t max_depth;
};
}

View File

@ -0,0 +1,320 @@
#include "PolygonDictionaryUtils.h"
#include <Common/ThreadPool.h>
#include <common/logger_useful.h>
#include <algorithm>
#include <thread>
#include <numeric>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
FinalCell::FinalCell(const std::vector<size_t> & polygon_ids_, const std::vector<Polygon> &, const Box &, bool is_last_covered_):
polygon_ids(polygon_ids_)
{
if (is_last_covered_)
{
first_covered = polygon_ids.back();
polygon_ids.pop_back();
}
}
const FinalCell * FinalCell::find(Coord, Coord) const
{
return this;
}
inline void shift(Point & point, Coord val)
{
point.x(point.x() + val);
point.y(point.y() + val);
}
FinalCellWithSlabs::FinalCellWithSlabs(const std::vector<size_t> & polygon_ids_, const std::vector<Polygon> & polygons_, const Box & box_, bool is_last_covered_)
{
auto extended = box_;
shift(extended.min_corner(), -GridRoot<FinalCellWithSlabs>::kEps);
shift(extended.max_corner(), GridRoot<FinalCellWithSlabs>::kEps);
Polygon tmp_poly;
bg::convert(extended, tmp_poly);
std::vector<Polygon> intersections;
if (is_last_covered_)
first_covered = polygon_ids_.back();
for (size_t i = 0; i + is_last_covered_ < polygon_ids_.size(); ++i)
{
std::vector<Polygon> intersection;
bg::intersection(tmp_poly, polygons_[polygon_ids_[i]], intersection);
for (auto & polygon : intersection)
intersections.emplace_back(std::move(polygon));
while (corresponding_ids.size() < intersections.size())
corresponding_ids.push_back(polygon_ids_[i]);
}
if (!intersections.empty())
index = SlabsPolygonIndex{intersections};
}
const FinalCellWithSlabs * FinalCellWithSlabs::find(Coord, Coord) const
{
return this;
}
SlabsPolygonIndex::SlabsPolygonIndex(
const std::vector<Polygon> & polygons)
: log(&Poco::Logger::get("SlabsPolygonIndex")),
sorted_x(uniqueX(polygons))
{
indexBuild(polygons);
}
std::vector<Coord> SlabsPolygonIndex::uniqueX(const std::vector<Polygon> & polygons)
{
std::vector<Coord> all_x;
for (const auto & poly : polygons)
{
for (const auto & point : poly.outer())
all_x.push_back(point.x());
for (const auto & inner : poly.inners())
for (const auto & point : inner)
all_x.push_back(point.x());
}
/** Making all_x sorted and distinct */
std::sort(all_x.begin(), all_x.end());
all_x.erase(std::unique(all_x.begin(), all_x.end()), all_x.end());
LOG_TRACE(log, "Found {} unique x coordinates", all_x.size());
return all_x;
}
void SlabsPolygonIndex::indexBuild(const std::vector<Polygon> & polygons)
{
for (size_t i = 0; i < polygons.size(); ++i)
{
indexAddRing(polygons[i].outer(), i);
for (const auto & inner : polygons[i].inners())
indexAddRing(inner, i);
}
/** Sorting edges of (left_point, right_point, polygon_id) in that order */
std::sort(all_edges.begin(), all_edges.end(), Edge::compareByLeftPoint);
for (size_t i = 0; i != all_edges.size(); ++i)
all_edges[i].edge_id = i;
/** Total number of edges */
size_t m = all_edges.size();
LOG_TRACE(log, "Just sorted {} edges from all {} polygons", all_edges.size(), polygons.size());
/** Using custom comparator for fetching edges in right_point order, like in scanline */
auto cmp = [](const Edge & a, const Edge & b)
{
return Edge::compareByRightPoint(a, b);
};
std::set<Edge, decltype(cmp)> interesting_edges(cmp);
/** Size of index (number of different x coordinates) */
size_t n = 0;
if (!sorted_x.empty())
{
n = sorted_x.size() - 1;
}
edges_index_tree.resize(2 * n);
/** Map of interesting edge ids to the index of left x, the index of right x */
std::vector<size_t> edge_left(m, n), edge_right(m, n);
size_t total_index_edges = 0;
size_t edges_it = 0;
for (size_t l = 0, r = 1; r < sorted_x.size(); ++l, ++r)
{
const Coord lx = sorted_x[l];
const Coord rx = sorted_x[r];
/** Removing edges where right_point.x <= lx */
while (!interesting_edges.empty() && interesting_edges.begin()->r.x() <= lx)
{
edge_right[interesting_edges.begin()->edge_id] = l;
interesting_edges.erase(interesting_edges.begin());
}
/** Adding edges where left_point.x < rx */
for (; edges_it < all_edges.size() && all_edges[edges_it].l.x() < rx; ++edges_it)
{
interesting_edges.insert(all_edges[edges_it]);
edge_left[all_edges[edges_it].edge_id] = l;
}
}
for (size_t i = 0; i != all_edges.size(); i++)
{
size_t l = edge_left[i];
size_t r = edge_right[i];
if (l == n || sorted_x[l] != all_edges[i].l.x() || sorted_x[r] != all_edges[i].r.x())
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Error occured while building polygon index. Edge {} is [{}, {}] but found [{}, {}]. l = {}, r = {}",
i, all_edges[i].l.x(), all_edges[i].r.x(), sorted_x[l], sorted_x[r], l, r);
}
/** Adding [l, r) to the segment tree */
for (l += n, r += n; l < r; l >>= 1, r >>= 1)
{
if (l & 1)
{
edges_index_tree[l++].emplace_back(all_edges[i]);
++total_index_edges;
}
if (r & 1)
{
edges_index_tree[--r].emplace_back(all_edges[i]);
++total_index_edges;
}
}
}
LOG_TRACE(log, "Polygon index is built, total_index_edges = {}", total_index_edges);
}
void SlabsPolygonIndex::indexAddRing(const Ring & ring, size_t polygon_id)
{
for (size_t i = 0, prev = ring.size() - 1; i < ring.size(); prev = i, ++i)
{
Point a = ring[prev];
Point b = ring[i];
/** Making a.x <= b.x */
if (a.x() > b.x())
std::swap(a, b);
if (a.x() == b.x() && a.y() > b.y())
std::swap(a, b);
if (a.x() == b.x())
{
/** Vertical edge found, skipping for now */
continue;
}
all_edges.emplace_back(a, b, polygon_id, 0);
}
}
SlabsPolygonIndex::Edge::Edge(
const Point & l_,
const Point & r_,
size_t polygon_id_,
size_t edge_id_)
: l(l_),
r(r_),
polygon_id(polygon_id_),
edge_id(edge_id_)
{
/** Calculating arguments of line equation.
* Original equation of this edge is:
* f(x) = l.y() + (r.y() - l.y()) / (r.x() - l.x()) * (x - l.x())
*/
k = (r.y() - l.y()) / (r.x() - l.x());
b = l.y() - k * l.x();
}
bool SlabsPolygonIndex::Edge::compareByLeftPoint(const Edge & a, const Edge & b)
{
/** Comparing left point */
if (a.l.x() != b.l.x())
return a.l.x() < b.l.x();
if (a.l.y() != b.l.y())
return a.l.y() < b.l.y();
/** Comparing right point */
if (a.r.x() != b.r.x())
return a.r.x() < b.r.x();
if (a.r.y() != b.r.y())
return a.r.y() < b.r.y();
return a.polygon_id < b.polygon_id;
}
bool SlabsPolygonIndex::Edge::compareByRightPoint(const Edge & a, const Edge & b)
{
/** Comparing right point */
if (a.r.x() != b.r.x())
return a.r.x() < b.r.x();
if (a.r.y() != b.r.y())
return a.r.y() < b.r.y();
/** Comparing left point */
if (a.l.x() != b.l.x())
return a.l.x() < b.l.x();
if (a.l.y() != b.l.y())
return a.l.y() < b.l.y();
if (a.polygon_id != b.polygon_id)
return a.polygon_id < b.polygon_id;
return a.edge_id < b.edge_id;
}
bool SlabsPolygonIndex::find(const Point & point, size_t & id) const
{
/** Vertical line or nothing at all, no match here */
if (sorted_x.size() < 2)
return false;
Coord x = point.x();
Coord y = point.y();
/** Not in bounding box */
if (x < sorted_x[0] || x > sorted_x.back())
return false;
bool found = false;
/** Point is considired inside when ray down from point crosses odd number of edges.
* This vector will contain polygon ids of all crosses. Smallest id with odd number of
* occurrences is the answer.
*/
std::vector<size_t> intersections;
intersections.reserve(10);
/** Find position of the slab with binary search by sorted_x */
size_t pos = std::upper_bound(sorted_x.begin() + 1, sorted_x.end() - 1, x) - sorted_x.begin() - 1;
/** Jump to the leaf in segment tree */
pos += edges_index_tree.size() / 2;
do
{
/** Iterating over interesting edges */
for (const auto & edge : edges_index_tree[pos])
{
/** Check if point lies above the edge */
if (x * edge.k + edge.b <= y)
intersections.emplace_back(edge.polygon_id);
}
pos >>= 1;
} while (pos != 0);
/** Sort all ids and find smallest with odd occurrences */
std::sort(intersections.begin(), intersections.end());
for (size_t i = 0; i < intersections.size(); i += 2)
{
if (i + 1 == intersections.size() || intersections[i] != intersections[i + 1])
{
found = true;
id = intersections[i];
break;
}
}
return found;
}
}

View File

@ -0,0 +1,291 @@
#pragma once
#include <Core/Types.h>
#include <Common/ThreadPool.h>
#include <Poco/Logger.h>
#include <boost/geometry.hpp>
#include <boost/geometry/geometries/box.hpp>
#include <boost/geometry/geometries/point_xy.hpp>
#include <boost/geometry/geometries/polygon.hpp>
#include "PolygonDictionary.h"
#include <numeric>
namespace DB
{
namespace bg = boost::geometry;
using Coord = IPolygonDictionary::Coord;
using Point = IPolygonDictionary::Point;
using Polygon = IPolygonDictionary::Polygon;
using Ring = IPolygonDictionary::Ring;
using Box = bg::model::box<IPolygonDictionary::Point>;
/** SlabsPolygonIndex builds index based on shooting ray down from point.
* When this ray crosses odd number of edges in single polygon, point is considered inside.
*
* SlabsPolygonIndex divides plane into vertical slabs, separated by vertical lines going through all points.
* For each slab, all edges falling in that slab are effectively stored.
* For each find query, required slab is found with binary search, and result is computed
* by iterating over all edges in that slab.
*/
class SlabsPolygonIndex
{
public:
SlabsPolygonIndex() = default;
/** Builds an index by splitting all edges with all points x coordinates. */
SlabsPolygonIndex(const std::vector<Polygon> & polygons);
/** Finds polygon id the same way as IPolygonIndex. */
bool find(const Point & point, size_t & id) const;
/** Edge describes edge (adjacent points) of any polygon, and contains polygon's id.
* Invariant here is first point has x not greater than second point.
*/
struct Edge
{
Point l;
Point r;
size_t polygon_id;
size_t edge_id;
Coord k;
Coord b;
Edge(const Point & l, const Point & r, size_t polygon_id, size_t edge_id);
static bool compareByLeftPoint(const Edge & a, const Edge & b);
static bool compareByRightPoint(const Edge & a, const Edge & b);
};
/** EdgeLine is optimized version of Edge. */
struct EdgeLine
{
explicit EdgeLine(const Edge & e): k(e.k), b(e.b), polygon_id(e.polygon_id) {}
Coord k;
Coord b;
size_t polygon_id;
};
private:
/** Returns unique x coordinates among all points */
std::vector<Coord> uniqueX(const std::vector<Polygon> & polygons);
/** Builds index described above */
void indexBuild(const std::vector<Polygon> & polygons);
/** Auxiliary function for adding ring to the index */
void indexAddRing(const Ring & ring, size_t polygon_id);
Poco::Logger * log;
/** Sorted distinct coordinates of all vertexes */
std::vector<Coord> sorted_x;
std::vector<Edge> all_edges;
/** This edges_index_tree stores all slabs with edges efficiently, using segment tree algorithm.
* edges_index_tree[i] node combines segments from edges_index_tree[i*2] and edges_index_tree[i*2+1].
* Every polygon's edge covers a segment of x coordinates, and can be added to this tree by
* placing it into O(log n) nodes of this tree.
*/
std::vector<std::vector<EdgeLine>> edges_index_tree;
};
template <class ReturnCell>
class ICell
{
public:
virtual ~ICell() = default;
[[nodiscard]] virtual const ReturnCell * find(Coord x, Coord y) const = 0;
};
/** This leaf cell implementation simply stores the indexes of the intersections.
* As an additional optimization, if a polygon covers the cell completely its index is stored in
* the first_covered field and all following polygon indexes are discarded,
* since they won't ever be useful.
*/
class FinalCell : public ICell<FinalCell>
{
public:
explicit FinalCell(const std::vector<size_t> & polygon_ids_, const std::vector<Polygon> &, const Box &, bool is_last_covered_);
std::vector<size_t> polygon_ids;
size_t first_covered = kNone;
static constexpr size_t kNone = -1;
private:
[[nodiscard]] const FinalCell * find(Coord x, Coord y) const override;
};
/** This leaf cell implementation intersects the given polygons with the cell's box and builds a
* slab index for the result.
* Since the intersections can produce multiple polygons a vector of corresponding ids is stored.
* If the slab index returned the id x for a query the correct polygon id is corresponding_ids[x].
* As an additional optimization, if a polygon covers the cell completely its index stored in the
* first_covered field and all following polygons are not used for building the slab index.
*/
class FinalCellWithSlabs : public ICell<FinalCellWithSlabs>
{
public:
explicit FinalCellWithSlabs(const std::vector<size_t> & polygon_ids_, const std::vector<Polygon> & polygons_, const Box & box_, bool is_last_covered_);
SlabsPolygonIndex index;
std::vector<size_t> corresponding_ids;
size_t first_covered = kNone;
static constexpr size_t kNone = -1;
private:
[[nodiscard]] const FinalCellWithSlabs * find(Coord x, Coord y) const override;
};
template <class ReturnCell>
class DividedCell : public ICell<ReturnCell>
{
public:
explicit DividedCell(std::vector<std::unique_ptr<ICell<ReturnCell>>> children_): children(std::move(children_)) {}
[[nodiscard]] const ReturnCell * find(Coord x, Coord y) const override
{
auto x_ratio = x * kSplit;
auto y_ratio = y * kSplit;
auto x_bin = static_cast<int>(x_ratio);
auto y_bin = static_cast<int>(y_ratio);
return children[y_bin + x_bin * kSplit]->find(x_ratio - x_bin, y_ratio - y_bin);
}
/** When a cell is split every side is split into kSplit pieces producing kSplit * kSplit equal smaller cells. */
static constexpr size_t kSplit = 4;
private:
std::vector<std::unique_ptr<ICell<ReturnCell>>> children;
};
/** A recursively built grid containing information about polygons intersecting each cell.
* The starting cell is the bounding box of the given polygons which are passed by reference.
* For every cell a vector of indices of intersecting polygons is calculated, in the order originally provided upon
* construction. A cell is recursively split into kSplit * kSplit equal cells up to the point where the cell
* intersects a small enough number of polygons or the maximum allowed depth is exceeded.
* Both of these parameters are set in the constructor.
* Once these conditions are fulfilled some index is built and stored in the leaf cells.
* The ReturnCell class passed in the template parameter is responsible for this.
*/
template <class ReturnCell>
class GridRoot : public ICell<ReturnCell>
{
public:
GridRoot(size_t min_intersections_, size_t max_depth_, const std::vector<Polygon> & polygons_):
kMinIntersections(min_intersections_), kMaxDepth(max_depth_), polygons(polygons_)
{
setBoundingBox();
std::vector<size_t> order(polygons.size());
std::iota(order.begin(), order.end(), 0);
root = makeCell(min_x, min_y, max_x, max_y, order);
}
/** Retrieves the cell containing a given point.
* A null pointer is returned when the point falls outside the grid.
*/
[[nodiscard]] const ReturnCell * find(Coord x, Coord y) const override
{
if (x < min_x || x >= max_x)
return nullptr;
if (y < min_y || y >= max_y)
return nullptr;
return root->find((x - min_x) / (max_x - min_x), (y - min_y) / (max_y - min_y));
}
/** Until this depth is reached each row of cells is calculated concurrently in a new thread. */
static constexpr size_t kMultiProcessingDepth = 2;
/** A constant used to avoid errors with points falling on the boundaries of cells. */
static constexpr Coord kEps = 1e-4;
private:
std::unique_ptr<ICell<ReturnCell>> root = nullptr;
Coord min_x = 0, min_y = 0;
Coord max_x = 0, max_y = 0;
const size_t kMinIntersections;
const size_t kMaxDepth;
const std::vector<Polygon> & polygons;
std::unique_ptr<ICell<ReturnCell>> makeCell(Coord current_min_x, Coord current_min_y, Coord current_max_x, Coord current_max_y, std::vector<size_t> possible_ids, size_t depth = 0)
{
auto current_box = Box(Point(current_min_x, current_min_y), Point(current_max_x, current_max_y));
Polygon tmp_poly;
bg::convert(current_box, tmp_poly);
possible_ids.erase(std::remove_if(possible_ids.begin(), possible_ids.end(), [&](const auto id)
{
return !bg::intersects(current_box, polygons[id]);
}), possible_ids.end());
int covered = 0;
#ifndef __clang_analyzer__ /// Triggers a warning in boost geometry.
auto it = std::find_if(possible_ids.begin(), possible_ids.end(), [&](const auto id)
{
return bg::covered_by(tmp_poly, polygons[id]);
});
if (it != possible_ids.end())
{
possible_ids.erase(it + 1, possible_ids.end());
covered = 1;
}
#endif
size_t intersections = possible_ids.size() - covered;
if (intersections <= kMinIntersections || depth++ == kMaxDepth)
return std::make_unique<ReturnCell>(possible_ids, polygons, current_box, covered);
auto x_shift = (current_max_x - current_min_x) / DividedCell<ReturnCell>::kSplit;
auto y_shift = (current_max_y - current_min_y) / DividedCell<ReturnCell>::kSplit;
std::vector<std::unique_ptr<ICell<ReturnCell>>> children;
children.resize(DividedCell<ReturnCell>::kSplit * DividedCell<ReturnCell>::kSplit);
std::vector<ThreadFromGlobalPool> threads{};
for (size_t i = 0; i < DividedCell<ReturnCell>::kSplit; current_min_x += x_shift, ++i)
{
auto handle_row = [this, &children, &y_shift, &x_shift, &possible_ids, &depth, i](Coord x, Coord y)
{
for (size_t j = 0; j < DividedCell<ReturnCell>::kSplit; y += y_shift, ++j)
{
children[i * DividedCell<ReturnCell>::kSplit + j] = makeCell(x, y, x + x_shift, y + y_shift, possible_ids, depth);
}
};
if (depth <= kMultiProcessingDepth)
threads.emplace_back(handle_row, current_min_x, current_min_y);
else
handle_row(current_min_x, current_min_y);
}
for (auto & thread : threads)
thread.join();
return std::make_unique<DividedCell<ReturnCell>>(std::move(children));
}
void setBoundingBox()
{
bool first = true;
std::for_each(polygons.begin(), polygons.end(), [&](const auto & polygon)
{
bg::for_each_point(polygon, [&](const Point & point)
{
auto x = point.x();
auto y = point.y();
if (first || x < min_x)
min_x = x;
if (first || x > max_x)
max_x = x;
if (first || y < min_y)
min_y = y;
if (first || y > max_y)
max_y = y;
if (first)
first = false;
});
});
max_x += kEps;
max_y += kEps;
}
};
}

View File

@ -56,6 +56,8 @@ SRCS(
MongoDBDictionarySource.cpp
MySQLDictionarySource.cpp
PolygonDictionary.cpp
PolygonDictionaryUtils.cpp
PolygonDictionaryImplementations.cpp
RangeHashedDictionary.cpp
readInvalidateQuery.cpp
RedisBlockInputStream.cpp
@ -65,7 +67,6 @@ SRCS(
SSDComplexKeyCacheDictionary.cpp
writeParenthesisedString.cpp
XDBCDictionarySource.cpp
)
END()

View File

@ -38,7 +38,7 @@
#include <Dictionaries/ComplexKeyDirectDictionary.h>
#include <Dictionaries/RangeHashedDictionary.h>
#include <Dictionaries/TrieDictionary.h>
#include <Dictionaries/PolygonDictionary.h>
#include <Dictionaries/PolygonDictionaryImplementations.h>
#include <Dictionaries/DirectDictionary.h>
#include <ext/range.h>
@ -194,7 +194,9 @@ private:
#if !defined(ARCADIA_BUILD)
!executeDispatchComplex<TrieDictionary>(block, arguments, result, dict) &&
#endif
!executeDispatchComplex<SimplePolygonDictionary>(block, arguments, result, dict))
!executeDispatchComplex<PolygonDictionarySimple>(block, arguments, result, dict) &&
!executeDispatchComplex<PolygonDictionaryIndexEach>(block, arguments, result, dict) &&
!executeDispatchComplex<PolygonDictionaryIndexCell>(block, arguments, result, dict))
throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
}
@ -350,7 +352,9 @@ private:
#if !defined(ARCADIA_BUILD)
!executeDispatchComplex<TrieDictionary>(block, arguments, result, dict) &&
#endif
!executeDispatchComplex<SimplePolygonDictionary>(block, arguments, result, dict) &&
!executeDispatchComplex<PolygonDictionarySimple>(block, arguments, result, dict) &&
!executeDispatchComplex<PolygonDictionaryIndexEach>(block, arguments, result, dict) &&
!executeDispatchComplex<PolygonDictionaryIndexCell>(block, arguments, result, dict) &&
!executeDispatchRange<RangeHashedDictionary>(block, arguments, result, dict))
throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
}
@ -534,7 +538,9 @@ private:
#if !defined(ARCADIA_BUILD)
!executeDispatchComplex<TrieDictionary>(block, arguments, result, dict) &&
#endif
!executeDispatchComplex<SimplePolygonDictionary>(block, arguments, result, dict))
!executeDispatchComplex<PolygonDictionarySimple>(block, arguments, result, dict) &&
!executeDispatchComplex<PolygonDictionaryIndexEach>(block, arguments, result, dict) &&
!executeDispatchComplex<PolygonDictionaryIndexCell>(block, arguments, result, dict))
throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
}
@ -874,7 +880,9 @@ private:
#if !defined(ARCADIA_BUILD)
!executeDispatchComplex<TrieDictionary>(block, arguments, result, dict) &&
#endif
!executeDispatchComplex<SimplePolygonDictionary>(block, arguments, result, dict) &&
!executeDispatchComplex<PolygonDictionarySimple>(block, arguments, result, dict) &&
!executeDispatchComplex<PolygonDictionaryIndexEach>(block, arguments, result, dict) &&
!executeDispatchComplex<PolygonDictionaryIndexCell>(block, arguments, result, dict) &&
!executeDispatchRange<RangeHashedDictionary>(block, arguments, result, dict))
throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
}
@ -1135,7 +1143,9 @@ private:
#if !defined(ARCADIA_BUILD)
!executeDispatchComplex<TrieDictionary>(block, arguments, result, dict) &&
#endif
!executeDispatchComplex<SimplePolygonDictionary>(block, arguments, result, dict))
!executeDispatchComplex<PolygonDictionarySimple>(block, arguments, result, dict) &&
!executeDispatchComplex<PolygonDictionaryIndexEach>(block, arguments, result, dict) &&
!executeDispatchComplex<PolygonDictionaryIndexCell>(block, arguments, result, dict))
throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
}

View File

@ -1,142 +0,0 @@
dictGet test_01037.dict_array (-100,-42) qqq 101
dictGet test_01037.dict_array (-1,0) Click South 423
dictGet test_01037.dict_array (-0.1,0) Click South 423
dictGet test_01037.dict_array (0,-2) Click West 424
dictGet test_01037.dict_array (0,-1.1) Click West 424
dictGet test_01037.dict_array (0,1.1) Click North 422
dictGet test_01037.dict_array (0,2) Click North 422
dictGet test_01037.dict_array (0.1,0) Click East 421
dictGet test_01037.dict_array (0.99,2.99) Click North 422
dictGet test_01037.dict_array (1,0) Click East 421
dictGet test_01037.dict_array (2,4) House 523
dictGet test_01037.dict_array (2,4.1) qqq 101
dictGet test_01037.dict_array (3,3) House 523
dictGet test_01037.dict_array (4,4) House 523
dictGet test_01037.dict_array (5,6) qqq 101
dictGet test_01037.dict_array (7.01,7.01) qqq 101
dictGetOrDefault test_01037.dict_array (-100,-42) www 1234
dictGetOrDefault test_01037.dict_array (-1,0) Click South 423
dictGetOrDefault test_01037.dict_array (-0.1,0) Click South 423
dictGetOrDefault test_01037.dict_array (0,-2) Click West 424
dictGetOrDefault test_01037.dict_array (0,-1.1) Click West 424
dictGetOrDefault test_01037.dict_array (0,1.1) Click North 422
dictGetOrDefault test_01037.dict_array (0,2) Click North 422
dictGetOrDefault test_01037.dict_array (0.1,0) Click East 421
dictGetOrDefault test_01037.dict_array (0.99,2.99) Click North 422
dictGetOrDefault test_01037.dict_array (1,0) Click East 421
dictGetOrDefault test_01037.dict_array (2,4) House 523
dictGetOrDefault test_01037.dict_array (2,4.1) www 1234
dictGetOrDefault test_01037.dict_array (3,3) House 523
dictGetOrDefault test_01037.dict_array (4,4) House 523
dictGetOrDefault test_01037.dict_array (5,6) www 1234
dictGetOrDefault test_01037.dict_array (7.01,7.01) www 1234
dictGetOrDefault test_01037.dict_array (-100,-42) dd 44
dictGetOrDefault test_01037.dict_array (-1,0) Click South 423
dictGetOrDefault test_01037.dict_array (-0.1,0) Click South 423
dictGetOrDefault test_01037.dict_array (0,-2) Click West 424
dictGetOrDefault test_01037.dict_array (0,-1.1) Click West 424
dictGetOrDefault test_01037.dict_array (0,1.1) Click North 422
dictGetOrDefault test_01037.dict_array (0,2) Click North 422
dictGetOrDefault test_01037.dict_array (0.1,0) Click East 421
dictGetOrDefault test_01037.dict_array (0.99,2.99) Click North 422
dictGetOrDefault test_01037.dict_array (1,0) Click East 421
dictGetOrDefault test_01037.dict_array (2,4) House 523
dictGetOrDefault test_01037.dict_array (2,4.1) gac 803
dictGetOrDefault test_01037.dict_array (3,3) House 523
dictGetOrDefault test_01037.dict_array (4,4) House 523
dictGetOrDefault test_01037.dict_array (5,6) cc 33
dictGetOrDefault test_01037.dict_array (7.01,7.01) ee 55
dictGet test_01037.dict_tuple (-100,-42) qqq 101
dictGet test_01037.dict_tuple (-1,0) Click South 423
dictGet test_01037.dict_tuple (-0.1,0) Click South 423
dictGet test_01037.dict_tuple (0,-2) Click West 424
dictGet test_01037.dict_tuple (0,-1.1) Click West 424
dictGet test_01037.dict_tuple (0,1.1) Click North 422
dictGet test_01037.dict_tuple (0,2) Click North 422
dictGet test_01037.dict_tuple (0.1,0) Click East 421
dictGet test_01037.dict_tuple (0.99,2.99) Click North 422
dictGet test_01037.dict_tuple (1,0) Click East 421
dictGet test_01037.dict_tuple (2,4) House 523
dictGet test_01037.dict_tuple (2,4.1) qqq 101
dictGet test_01037.dict_tuple (3,3) House 523
dictGet test_01037.dict_tuple (4,4) House 523
dictGet test_01037.dict_tuple (5,6) qqq 101
dictGet test_01037.dict_tuple (7.01,7.01) qqq 101
dictGetOrDefault test_01037.dict_tuple (-100,-42) www 1234
dictGetOrDefault test_01037.dict_tuple (-1,0) Click South 423
dictGetOrDefault test_01037.dict_tuple (-0.1,0) Click South 423
dictGetOrDefault test_01037.dict_tuple (0,-2) Click West 424
dictGetOrDefault test_01037.dict_tuple (0,-1.1) Click West 424
dictGetOrDefault test_01037.dict_tuple (0,1.1) Click North 422
dictGetOrDefault test_01037.dict_tuple (0,2) Click North 422
dictGetOrDefault test_01037.dict_tuple (0.1,0) Click East 421
dictGetOrDefault test_01037.dict_tuple (0.99,2.99) Click North 422
dictGetOrDefault test_01037.dict_tuple (1,0) Click East 421
dictGetOrDefault test_01037.dict_tuple (2,4) House 523
dictGetOrDefault test_01037.dict_tuple (2,4.1) www 1234
dictGetOrDefault test_01037.dict_tuple (3,3) House 523
dictGetOrDefault test_01037.dict_tuple (4,4) House 523
dictGetOrDefault test_01037.dict_tuple (5,6) www 1234
dictGetOrDefault test_01037.dict_tuple (7.01,7.01) www 1234
dictGetOrDefault test_01037.dict_tuple (-100,-42) dd 44
dictGetOrDefault test_01037.dict_tuple (-1,0) Click South 423
dictGetOrDefault test_01037.dict_tuple (-0.1,0) Click South 423
dictGetOrDefault test_01037.dict_tuple (0,-2) Click West 424
dictGetOrDefault test_01037.dict_tuple (0,-1.1) Click West 424
dictGetOrDefault test_01037.dict_tuple (0,1.1) Click North 422
dictGetOrDefault test_01037.dict_tuple (0,2) Click North 422
dictGetOrDefault test_01037.dict_tuple (0.1,0) Click East 421
dictGetOrDefault test_01037.dict_tuple (0.99,2.99) Click North 422
dictGetOrDefault test_01037.dict_tuple (1,0) Click East 421
dictGetOrDefault test_01037.dict_tuple (2,4) House 523
dictGetOrDefault test_01037.dict_tuple (2,4.1) gac 803
dictGetOrDefault test_01037.dict_tuple (3,3) House 523
dictGetOrDefault test_01037.dict_tuple (4,4) House 523
dictGetOrDefault test_01037.dict_tuple (5,6) cc 33
dictGetOrDefault test_01037.dict_tuple (7.01,7.01) ee 55
dictHas test_01037.dict_array (-100,-42) 0
dictHas test_01037.dict_array (-1,0) 1
dictHas test_01037.dict_array (-0.1,0) 1
dictHas test_01037.dict_array (0,-2) 1
dictHas test_01037.dict_array (0,-1.1) 1
dictHas test_01037.dict_array (0,-1) 1
dictHas test_01037.dict_array (0,0) 1
dictHas test_01037.dict_array (0,1) 1
dictHas test_01037.dict_array (0,1.1) 1
dictHas test_01037.dict_array (0,2) 1
dictHas test_01037.dict_array (0.1,0) 1
dictHas test_01037.dict_array (0.99,2.99) 1
dictHas test_01037.dict_array (1,0) 1
dictHas test_01037.dict_array (1,1) 1
dictHas test_01037.dict_array (1,3) 1
dictHas test_01037.dict_array (2,4) 1
dictHas test_01037.dict_array (2,4.1) 0
dictHas test_01037.dict_array (3,3) 1
dictHas test_01037.dict_array (4,4) 1
dictHas test_01037.dict_array (5,1) 1
dictHas test_01037.dict_array (5,5) 1
dictHas test_01037.dict_array (5,6) 0
dictHas test_01037.dict_array (7.01,7.01) 0
dictHas test_01037.dict_tuple (-100,-42) 0
dictHas test_01037.dict_tuple (-1,0) 1
dictHas test_01037.dict_tuple (-0.1,0) 1
dictHas test_01037.dict_tuple (0,-2) 1
dictHas test_01037.dict_tuple (0,-1.1) 1
dictHas test_01037.dict_tuple (0,-1) 1
dictHas test_01037.dict_tuple (0,0) 1
dictHas test_01037.dict_tuple (0,1) 1
dictHas test_01037.dict_tuple (0,1.1) 1
dictHas test_01037.dict_tuple (0,2) 1
dictHas test_01037.dict_tuple (0.1,0) 1
dictHas test_01037.dict_tuple (0.99,2.99) 1
dictHas test_01037.dict_tuple (1,0) 1
dictHas test_01037.dict_tuple (1,1) 1
dictHas test_01037.dict_tuple (1,3) 1
dictHas test_01037.dict_tuple (2,4) 1
dictHas test_01037.dict_tuple (2,4.1) 0
dictHas test_01037.dict_tuple (3,3) 1
dictHas test_01037.dict_tuple (4,4) 1
dictHas test_01037.dict_tuple (5,1) 1
dictHas test_01037.dict_tuple (5,5) 1
dictHas test_01037.dict_tuple (5,6) 0
dictHas test_01037.dict_tuple (7.01,7.01) 0

View File

@ -1,108 +0,0 @@
SET send_logs_level = 'fatal';
DROP DATABASE IF EXISTS test_01037;
CREATE DATABASE test_01037 Engine = Ordinary;
DROP DICTIONARY IF EXISTS test_01037.dict_array;
DROP TABLE IF EXISTS test_01037.polygons_array;
CREATE TABLE test_01037.polygons_array (key Array(Array(Float64)), name String, value UInt64) ENGINE = Memory;
INSERT INTO test_01037.polygons_array VALUES ([[3, 1], [0, 1], [0, -1], [3, -1]], 'Click East', 421);
INSERT INTO test_01037.polygons_array VALUES ([[-1, 1], [1, 1], [1, 3], [-1, 3]], 'Click North', 422);
INSERT INTO test_01037.polygons_array VALUES ([[-3, 1], [-3, -1], [0, -1], [0, 1]], 'Click South', 423);
INSERT INTO test_01037.polygons_array VALUES ([[-1, -1], [1, -1], [1, -3], [-1, -3]], 'Click West', 424);
INSERT INTO test_01037.polygons_array VALUES ([[1, 1], [1, 3], [3, 5], [5, 5], [5, 1]], 'House', 523);
CREATE DICTIONARY test_01037.dict_array
(
key Array(Array(Float64)),
name String DEFAULT 'qqq',
value UInt64 DEFAULT 101
)
PRIMARY KEY key
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'polygons_array' PASSWORD '' DB 'test_01037'))
LIFETIME(MIN 1 MAX 10)
LAYOUT(POLYGON());
DROP DICTIONARY IF EXISTS test_01037.dict_tuple;
DROP TABLE IF EXISTS test_01037.polygons_tuple;
CREATE TABLE test_01037.polygons_tuple (key Array(Tuple(Float64, Float64)), name String, value UInt64) ENGINE = Memory;
INSERT INTO test_01037.polygons_tuple VALUES ([(3.0, 1.0), (0.0, 1.0), (0.0, -1.0), (3.0, -1.0)], 'Click East', 421);
INSERT INTO test_01037.polygons_tuple VALUES ([(-1, 1), (1, 1), (1, 3), (-1, 3)], 'Click North', 422);
INSERT INTO test_01037.polygons_tuple VALUES ([(-3, 1), (-3, -1), (0, -1), (0, 1)], 'Click South', 423);
INSERT INTO test_01037.polygons_tuple VALUES ([(-1, -1), (1, -1), (1, -3), (-1, -3)], 'Click West', 424);
INSERT INTO test_01037.polygons_tuple VALUES ([(1, 1), (1, 3), (3, 5), (5, 5), (5, 1)], 'House', 523);
CREATE DICTIONARY test_01037.dict_tuple
(
key Array(Tuple(Float64, Float64)),
name String DEFAULT 'qqq',
value UInt64 DEFAULT 101
)
PRIMARY KEY key
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'polygons_tuple' PASSWORD '' DB 'test_01037'))
LIFETIME(MIN 1 MAX 10)
LAYOUT(POLYGON());
DROP TABLE IF EXISTS test_01037.points;
CREATE TABLE test_01037.points (x Float64, y Float64, def_i UInt64, def_s String) ENGINE = Memory;
INSERT INTO test_01037.points VALUES (0.1, 0.0, 112, 'aax');
INSERT INTO test_01037.points VALUES (-0.1, 0.0, 113, 'aay');
INSERT INTO test_01037.points VALUES (0.0, 1.1, 114, 'aaz');
INSERT INTO test_01037.points VALUES (0.0, -1.1, 115, 'aat');
INSERT INTO test_01037.points VALUES (3.0, 3.0, 22, 'bb');
INSERT INTO test_01037.points VALUES (5.0, 6.0, 33, 'cc');
INSERT INTO test_01037.points VALUES (-100.0, -42.0, 44, 'dd');
INSERT INTO test_01037.points VALUES (7.01, 7.01, 55, 'ee')
INSERT INTO test_01037.points VALUES (0.99, 2.99, 66, 'ee');
INSERT INTO test_01037.points VALUES (1.0, 0.0, 771, 'ffa');
INSERT INTO test_01037.points VALUES (-1.0, 0.0, 772, 'ffb');
INSERT INTO test_01037.points VALUES (0.0, 2.0, 773, 'ffc');
INSERT INTO test_01037.points VALUES (0.0, -2.0, 774, 'ffd');
INSERT INTO test_01037.points VALUES (2.0, 4.0, 801, 'gaa')
INSERT INTO test_01037.points VALUES (4.0, 4.0, 802, 'gab')
INSERT INTO test_01037.points VALUES (2.0, 4.1, 803, 'gac')
select 'dictGet', 'test_01037.dict_array' as dict_name, tuple(x, y) as key,
dictGet(dict_name, 'name', key),
dictGet(dict_name, 'value', key) from test_01037.points order by x, y;
select 'dictGetOrDefault', 'test_01037.dict_array' as dict_name, tuple(x, y) as key,
dictGetOrDefault(dict_name, 'name', key, 'www'),
dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points order by x, y;
select 'dictGetOrDefault', 'test_01037.dict_array' as dict_name, tuple(x, y) as key,
dictGetOrDefault(dict_name, 'name', key, def_s),
dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points order by x, y;
select 'dictGet', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key,
dictGet(dict_name, 'name', key),
dictGet(dict_name, 'value', key) from test_01037.points order by x, y;
select 'dictGetOrDefault', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key,
dictGetOrDefault(dict_name, 'name', key, 'www'),
dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points order by x, y;
select 'dictGetOrDefault', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key,
dictGetOrDefault(dict_name, 'name', key, def_s),
dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points order by x, y;
INSERT INTO test_01037.points VALUES (5.0, 5.0, 0, '');
INSERT INTO test_01037.points VALUES (5.0, 1.0, 0, '');
INSERT INTO test_01037.points VALUES (1.0, 3.0, 0, '');
INSERT INTO test_01037.points VALUES (0.0, 0.0, 0, '');
INSERT INTO test_01037.points VALUES (0.0, 1.0, 0, '');
INSERT INTO test_01037.points VALUES (0.0, -1.0, 0, '');
INSERT INTO test_01037.points VALUES (1.0, 1.0, 0, '');
select 'dictHas', 'test_01037.dict_array' as dict_name, tuple(x, y) as key,
dictHas(dict_name, key) from test_01037.points order by x, y;
select 'dictHas', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key,
dictHas(dict_name, key) from test_01037.points order by x, y;
DROP DICTIONARY test_01037.dict_array;
DROP DICTIONARY test_01037.dict_tuple;
DROP TABLE test_01037.polygons_array;
DROP TABLE test_01037.polygons_tuple;
DROP TABLE test_01037.points;
DROP DATABASE test_01037;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,67 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
TMP_DIR="/tmp"
declare -a SearchTypes=("POLYGON" "POLYGON_SIMPLE" "POLYGON_INDEX_EACH" "POLYGON_INDEX_CELL")
tar -xf ${CURDIR}/01037_test_data_search.tar.gz -C ${CURDIR}
$CLICKHOUSE_CLIENT -n --query="
DROP DATABASE IF EXISTS test_01037;
CREATE DATABASE test_01037 Engine = Ordinary;
DROP TABLE IF EXISTS test_01037.points;
CREATE TABLE test_01037.points (x Float64, y Float64) ENGINE = Memory;
"
$CLICKHOUSE_CLIENT --query="INSERT INTO test_01037.points FORMAT TSV" --max_insert_block_size=100000 < "${CURDIR}/01037_point_data"
rm ${CURDIR}/01037_point_data
$CLICKHOUSE_CLIENT -n --query="
DROP TABLE IF EXISTS test_01037.polygons_array;
CREATE TABLE test_01037.polygons_array
(
key Array(Array(Array(Array(Float64)))),
name String,
value UInt64
)
ENGINE = Memory;
"
$CLICKHOUSE_CLIENT --query="INSERT INTO test_01037.polygons_array FORMAT JSONEachRow" --max_insert_block_size=100000 < "${CURDIR}/01037_polygon_data"
rm ${CURDIR}/01037_polygon_data
for type in ${SearchTypes[@]};
do
outputFile="${TMP_DIR}/results${type}.out"
$CLICKHOUSE_CLIENT -n --query="
DROP DICTIONARY IF EXISTS test_01037.dict_array;
CREATE DICTIONARY test_01037.dict_array
(
key Array(Array(Array(Array(Float64)))),
name String DEFAULT 'qqq',
value UInt64 DEFAULT 101
)
PRIMARY KEY key
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'polygons_array' PASSWORD '' DB 'test_01037'))
LIFETIME(0)
LAYOUT($type());
select 'dictGet', 'test_01037.dict_array' as dict_name, tuple(x, y) as key,
dictGet(dict_name, 'value', key) from test_01037.points order by x, y;
" > $outputFile
diff -q "${CURDIR}/01037_polygon_dicts_correctness_all.ans" "$outputFile"
done
$CLICKHOUSE_CLIENT -n --query="
DROP TABLE test_01037.points;
DROP DATABASE test_01037;
"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,67 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
TMP_DIR="/tmp"
declare -a SearchTypes=("POLYGON_INDEX_EACH" "POLYGON_INDEX_CELL")
tar -xf ${CURDIR}/01037_test_data_perf.tar.gz -C ${CURDIR}
$CLICKHOUSE_CLIENT -n --query="
DROP DATABASE IF EXISTS test_01037;
CREATE DATABASE test_01037 Engine = Ordinary;
DROP TABLE IF EXISTS test_01037.points;
CREATE TABLE test_01037.points (x Float64, y Float64) ENGINE = Memory;
"
$CLICKHOUSE_CLIENT --query="INSERT INTO test_01037.points FORMAT TSV" --max_insert_block_size=100000 < "${CURDIR}/01037_point_data"
rm ${CURDIR}/01037_point_data
$CLICKHOUSE_CLIENT -n --query="
DROP TABLE IF EXISTS test_01037.polygons_array;
CREATE TABLE test_01037.polygons_array
(
key Array(Array(Array(Array(Float64)))),
name String,
value UInt64
)
ENGINE = Memory;
"
$CLICKHOUSE_CLIENT --query="INSERT INTO test_01037.polygons_array FORMAT JSONEachRow" --max_insert_block_size=100000 < "${CURDIR}/01037_polygon_data"
rm ${CURDIR}/01037_polygon_data
for type in ${SearchTypes[@]};
do
outputFile="${TMP_DIR}/results${type}.out"
$CLICKHOUSE_CLIENT -n --query="
DROP DICTIONARY IF EXISTS test_01037.dict_array;
CREATE DICTIONARY test_01037.dict_array
(
key Array(Array(Array(Array(Float64)))),
name String DEFAULT 'qqq',
value UInt64 DEFAULT 101
)
PRIMARY KEY key
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'polygons_array' PASSWORD '' DB 'test_01037'))
LIFETIME(0)
LAYOUT($type());
select 'dictGet', 'test_01037.dict_array' as dict_name, tuple(x, y) as key,
dictGet(dict_name, 'value', key) from test_01037.points order by x, y;
" > $outputFile
diff -q "${CURDIR}/01037_polygon_dicts_correctness_fast.ans" "$outputFile"
done
$CLICKHOUSE_CLIENT -n --query="
DROP TABLE test_01037.points;
DROP DATABASE test_01037;
"

View File

@ -81,19 +81,12 @@ dictHas test_01037.dict_array (-1,0) 1
dictHas test_01037.dict_array (-0.1,0) 1
dictHas test_01037.dict_array (0,-2) 1
dictHas test_01037.dict_array (0,-1.1) 1
dictHas test_01037.dict_array (0,-1) 1
dictHas test_01037.dict_array (0,0) 1
dictHas test_01037.dict_array (0,1) 1
dictHas test_01037.dict_array (0,1.1) 1
dictHas test_01037.dict_array (0,2) 1
dictHas test_01037.dict_array (0.1,0) 1
dictHas test_01037.dict_array (0.99,2.99) 1
dictHas test_01037.dict_array (1,0) 1
dictHas test_01037.dict_array (1,1) 1
dictHas test_01037.dict_array (1,3) 1
dictHas test_01037.dict_array (3,3) 1
dictHas test_01037.dict_array (5,1) 1
dictHas test_01037.dict_array (5,5) 1
dictHas test_01037.dict_array (5,6) 1
dictHas test_01037.dict_array (7.01,7.01) 0
dictHas test_01037.dict_tuple (-100,-42) 0
@ -101,18 +94,11 @@ dictHas test_01037.dict_tuple (-1,0) 1
dictHas test_01037.dict_tuple (-0.1,0) 1
dictHas test_01037.dict_tuple (0,-2) 1
dictHas test_01037.dict_tuple (0,-1.1) 1
dictHas test_01037.dict_tuple (0,-1) 1
dictHas test_01037.dict_tuple (0,0) 1
dictHas test_01037.dict_tuple (0,1) 1
dictHas test_01037.dict_tuple (0,1.1) 1
dictHas test_01037.dict_tuple (0,2) 1
dictHas test_01037.dict_tuple (0.1,0) 1
dictHas test_01037.dict_tuple (0.99,2.99) 1
dictHas test_01037.dict_tuple (1,0) 1
dictHas test_01037.dict_tuple (1,1) 1
dictHas test_01037.dict_tuple (1,3) 1
dictHas test_01037.dict_tuple (3,3) 1
dictHas test_01037.dict_tuple (5,1) 1
dictHas test_01037.dict_tuple (5,5) 1
dictHas test_01037.dict_tuple (5,6) 1
dictHas test_01037.dict_tuple (7.01,7.01) 0

View File

@ -1,10 +1,15 @@
SET send_logs_level = 'fatal';
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
TMP_DIR="/tmp"
$CLICKHOUSE_CLIENT -n --query="
DROP DATABASE IF EXISTS test_01037;
CREATE DATABASE test_01037 Engine = Ordinary;
DROP DICTIONARY IF EXISTS test_01037.dict_array;
DROP TABLE IF EXISTS test_01037.polygons_array;
CREATE TABLE test_01037.polygons_array (key Array(Array(Array(Array(Float64)))), name String, value UInt64) ENGINE = Memory;
@ -15,18 +20,6 @@ INSERT INTO test_01037.polygons_array VALUES ([[[[-1, 1], [1, 1], [1, 3], [-1, 3
INSERT INTO test_01037.polygons_array VALUES ([[[[-3, 1], [-3, -1], [0, -1], [0, 1]]]], 'Click South', 423);
INSERT INTO test_01037.polygons_array VALUES ([[[[-1, -1], [1, -1], [1, -3], [-1, -3]]]], 'Click West', 424);
CREATE DICTIONARY test_01037.dict_array
(
key Array(Array(Array(Array(Float64)))),
name String DEFAULT 'qqq',
value UInt64 DEFAULT 101
)
PRIMARY KEY key
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'polygons_array' PASSWORD '' DB 'test_01037'))
LIFETIME(MIN 1 MAX 10)
LAYOUT(POLYGON());
DROP DICTIONARY IF EXISTS test_01037.dict_tuple;
DROP TABLE IF EXISTS test_01037.polygons_tuple;
CREATE TABLE test_01037.polygons_tuple (key Array(Array(Array(Tuple(Float64, Float64)))), name String, value UInt64) ENGINE = Memory;
@ -37,17 +30,6 @@ INSERT INTO test_01037.polygons_tuple VALUES ([[[(-1, 1), (1, 1), (1, 3), (-1, 3
INSERT INTO test_01037.polygons_tuple VALUES ([[[(-3, 1), (-3, -1), (0, -1), (0, 1)]]], 'Click South', 423);
INSERT INTO test_01037.polygons_tuple VALUES ([[[(-1, -1), (1, -1), (1, -3), (-1, -3)]]], 'Click West', 424);
CREATE DICTIONARY test_01037.dict_tuple
(
key Array(Array(Array(Tuple(Float64, Float64)))),
name String DEFAULT 'qqq',
value UInt64 DEFAULT 101
)
PRIMARY KEY key
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'polygons_tuple' PASSWORD '' DB 'test_01037'))
LIFETIME(MIN 1 MAX 10)
LAYOUT(POLYGON());
DROP TABLE IF EXISTS test_01037.points;
CREATE TABLE test_01037.points (x Float64, y Float64, def_i UInt64, def_s String) ENGINE = Memory;
@ -58,50 +40,79 @@ INSERT INTO test_01037.points VALUES (0.0, -1.1, 115, 'aat');
INSERT INTO test_01037.points VALUES (3.0, 3.0, 22, 'bb');
INSERT INTO test_01037.points VALUES (5.0, 6.0, 33, 'cc');
INSERT INTO test_01037.points VALUES (-100.0, -42.0, 44, 'dd');
INSERT INTO test_01037.points VALUES (7.01, 7.01, 55, 'ee')
INSERT INTO test_01037.points VALUES (7.01, 7.01, 55, 'ee');
INSERT INTO test_01037.points VALUES (0.99, 2.99, 66, 'ee');
INSERT INTO test_01037.points VALUES (1.0, 0.0, 771, 'ffa');
INSERT INTO test_01037.points VALUES (-1.0, 0.0, 772, 'ffb');
INSERT INTO test_01037.points VALUES (0.0, 2.0, 773, 'ffc');
INSERT INTO test_01037.points VALUES (0.0, -2.0, 774, 'ffd');
"
select 'dictGet', 'test_01037.dict_array' as dict_name, tuple(x, y) as key,
dictGet(dict_name, 'name', key),
dictGet(dict_name, 'value', key) from test_01037.points order by x, y;
select 'dictGetOrDefault', 'test_01037.dict_array' as dict_name, tuple(x, y) as key,
dictGetOrDefault(dict_name, 'name', key, 'www'),
dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points order by x, y;
select 'dictGetOrDefault', 'test_01037.dict_array' as dict_name, tuple(x, y) as key,
dictGetOrDefault(dict_name, 'name', key, def_s),
dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points order by x, y;
select 'dictGet', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key,
dictGet(dict_name, 'name', key),
dictGet(dict_name, 'value', key) from test_01037.points order by x, y;
select 'dictGetOrDefault', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key,
dictGetOrDefault(dict_name, 'name', key, 'www'),
dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points order by x, y;
select 'dictGetOrDefault', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key,
dictGetOrDefault(dict_name, 'name', key, def_s),
dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points order by x, y;
declare -a SearchTypes=("POLYGON" "POLYGON_SIMPLE" "POLYGON_INDEX_EACH" "POLYGON_INDEX_CELL")
INSERT INTO test_01037.points VALUES (5.0, 5.0, 0, '');
INSERT INTO test_01037.points VALUES (5.0, 1.0, 0, '');
INSERT INTO test_01037.points VALUES (1.0, 3.0, 0, '');
INSERT INTO test_01037.points VALUES (0.0, 0.0, 0, '');
INSERT INTO test_01037.points VALUES (0.0, 1.0, 0, '');
INSERT INTO test_01037.points VALUES (0.0, -1.0, 0, '');
INSERT INTO test_01037.points VALUES (1.0, 1.0, 0, '');
for type in ${SearchTypes[@]};
do
outputFile="${TMP_DIR}/results${type}.out"
select 'dictHas', 'test_01037.dict_array' as dict_name, tuple(x, y) as key,
dictHas(dict_name, key) from test_01037.points order by x, y;
$CLICKHOUSE_CLIENT -n --query="
DROP DICTIONARY IF EXISTS test_01037.dict_array;
CREATE DICTIONARY test_01037.dict_array
(
key Array(Array(Array(Array(Float64)))),
name String DEFAULT 'qqq',
value UInt64 DEFAULT 101
)
PRIMARY KEY key
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'polygons_array' PASSWORD '' DB 'test_01037'))
LIFETIME(0)
LAYOUT($type());
select 'dictHas', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key,
dictHas(dict_name, key) from test_01037.points order by x, y;
DROP DICTIONARY IF EXISTS test_01037.dict_tuple;
CREATE DICTIONARY test_01037.dict_tuple
(
key Array(Array(Array(Tuple(Float64, Float64)))),
name String DEFAULT 'qqq',
value UInt64 DEFAULT 101
)
PRIMARY KEY key
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'polygons_tuple' PASSWORD '' DB 'test_01037'))
LIFETIME(0)
LAYOUT($type());
select 'dictGet', 'test_01037.dict_array' as dict_name, tuple(x, y) as key,
dictGet(dict_name, 'name', key),
dictGet(dict_name, 'value', key) from test_01037.points order by x, y;
select 'dictGetOrDefault', 'test_01037.dict_array' as dict_name, tuple(x, y) as key,
dictGetOrDefault(dict_name, 'name', key, 'www'),
dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points order by x, y;
select 'dictGetOrDefault', 'test_01037.dict_array' as dict_name, tuple(x, y) as key,
dictGetOrDefault(dict_name, 'name', key, def_s),
dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points order by x, y;
select 'dictGet', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key,
dictGet(dict_name, 'name', key),
dictGet(dict_name, 'value', key) from test_01037.points order by x, y;
select 'dictGetOrDefault', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key,
dictGetOrDefault(dict_name, 'name', key, 'www'),
dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points order by x, y;
select 'dictGetOrDefault', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key,
dictGetOrDefault(dict_name, 'name', key, def_s),
dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points order by x, y;
select 'dictHas', 'test_01037.dict_array' as dict_name, tuple(x, y) as key,
dictHas(dict_name, key) from test_01037.points order by x, y;
select 'dictHas', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key,
dictHas(dict_name, key) from test_01037.points order by x, y;
" > $outputFile
diff -q "${CURDIR}/01037_polygon_dicts_simple_functions.ans" "$outputFile"
done
$CLICKHOUSE_CLIENT -n --query="
DROP DICTIONARY test_01037.dict_array;
DROP DICTIONARY test_01037.dict_tuple;
DROP TABLE test_01037.polygons_array;
DROP TABLE test_01037.polygons_tuple;
DROP TABLE test_01037.points;
DROP DATABASE test_01037;
"

Binary file not shown.