From 66341cc40fa3e8edf8ea6e5d66cdf169ba850a6c Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Mon, 2 Dec 2019 18:26:59 +0300 Subject: [PATCH 01/76] Temporary: will be renamed later --- dbms/src/Dictionaries/DictionaryStructure.cpp | 10 +- dbms/src/Dictionaries/DictionaryStructure.h | 3 +- dbms/src/Dictionaries/PolygonDictionary.cpp | 252 ++++++++++++++++++ dbms/src/Dictionaries/PolygonDictionary.h | 101 +++++++ .../src/Dictionaries/registerDictionaries.cpp | 2 + .../Functions/FunctionsExternalDictionaries.h | 4 +- 6 files changed, 369 insertions(+), 3 deletions(-) create mode 100644 dbms/src/Dictionaries/PolygonDictionary.cpp create mode 100644 dbms/src/Dictionaries/PolygonDictionary.h diff --git a/dbms/src/Dictionaries/DictionaryStructure.cpp b/dbms/src/Dictionaries/DictionaryStructure.cpp index 925e9e01a82..9da9427976d 100644 --- a/dbms/src/Dictionaries/DictionaryStructure.cpp +++ b/dbms/src/Dictionaries/DictionaryStructure.cpp @@ -74,6 +74,9 @@ AttributeUnderlyingType getAttributeUnderlyingType(const std::string & type) return AttributeUnderlyingType::utDecimal128; } + if (type.find("Array") == 0) + return AttributeUnderlyingType::utString; + throw Exception{"Unknown type " + type, ErrorCodes::UNKNOWN_TYPE}; } @@ -112,6 +115,8 @@ std::string toString(const AttributeUnderlyingType type) return "Decimal128"; case AttributeUnderlyingType::utString: return "String"; + //case AttributeUnderlyingType::utArray: + // return "Array"; } throw Exception{"Unknown attribute_type " + toString(static_cast(type)), ErrorCodes::ARGUMENT_OUT_OF_BOUND}; @@ -243,9 +248,12 @@ bool DictionaryStructure::isKeySizeFixed() const return true; for (const auto & key_i : *key) + { if (key_i.underlying_type == AttributeUnderlyingType::utString) return false; - + //if (key_i.underlying_type == AttributeUnderlyingType::utArray) + // return false; + } return true; } diff --git a/dbms/src/Dictionaries/DictionaryStructure.h b/dbms/src/Dictionaries/DictionaryStructure.h index 2893dea2e4f..d40360041ab 100644 --- a/dbms/src/Dictionaries/DictionaryStructure.h +++ b/dbms/src/Dictionaries/DictionaryStructure.h @@ -36,7 +36,8 @@ enum class AttributeUnderlyingType utDecimal32, utDecimal64, utDecimal128, - utString + utString, + //utArray }; diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp new file mode 100644 index 00000000000..2dfdcaebdfe --- /dev/null +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -0,0 +1,252 @@ +#include +#include "PolygonDictionary.h" +#include "DictionaryBlockInputStream.h" +#include "DictionaryFactory.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int TYPE_MISMATCH; + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; + extern const int UNKNOWN_TYPE; + extern const int UNSUPPORTED_METHOD; +} + + +IPolygonDictionary::IPolygonDictionary( + const std::string & name_, + const DictionaryStructure & dict_struct_, + DictionarySourcePtr source_ptr_, + const DictionaryLifetime dict_lifetime_) + : name(name_) + , dict_struct(dict_struct_) + , source_ptr(std::move(source_ptr_)) + , dict_lifetime(dict_lifetime_) +{ +} + +std::string IPolygonDictionary::getName() const +{ + return name; +} + +std::string IPolygonDictionary::getTypeName() const +{ + return "Polygon"; +} + +size_t IPolygonDictionary::getBytesAllocated() const +{ + return bytes_allocated; +} + +size_t IPolygonDictionary::getQueryCount() const +{ + return query_count.load(std::memory_order_relaxed); +} + +double IPolygonDictionary::getHitRate() const +{ + return 1.0; +} + +size_t IPolygonDictionary::getElementCount() const +{ + return element_count; +} + +double IPolygonDictionary::getLoadFactor() const +{ + return 1.0; +} + +const IDictionarySource * IPolygonDictionary::getSource() const +{ + return source_ptr.get(); +} + +const DictionaryLifetime & IPolygonDictionary::getLifetime() const +{ + return dict_lifetime; +} + +const DictionaryStructure & IPolygonDictionary::getStructure() const +{ + return dict_struct; +} + +bool IPolygonDictionary::isInjective(const std::string &) const +{ + return false; +} + +BlockInputStreamPtr IPolygonDictionary::getBlockInputStream(const Names &, size_t) const { + // TODO: Better error message. + throw Exception{"Reading the dictionary is not allowed", ErrorCodes::UNSUPPORTED_METHOD}; +} + +std::shared_ptr IPolygonDictionary::clone() const +{ + return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime) +} + +void IPolygonDictionary::createAttributes() { + for (size_t i = 0; i < dict_struct.attributes.size(); ++i) + { + attribute_index_by_name.emplace(dict_struct.attributes[i].name, i); + + if (dict_struct.attributes[i].name.hierarchical) + throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), + ErrorCodes::TYPE_MISMATCH}; + } +} + +void IPolygonDictionary::blockToAttributes(const DB::Block &block) { + const auto rows = block.rows(); + element_count += rows; + // TODO: Only save columns for attributes, since we are converting the key to a boost type separately. + blocks.push_back(block); + polygons.reserve(polygons.size() + rows); + + const auto & key = block.safeGetByPosition(0).column; + + for (const auto row : ext::range(0, rows)) + { + const auto & field = (*key)[row]; + polygons.push_back(fieldToMultiPolygon(field)); + } +} + +void IPolygonDictionary::loadData() { + auto stream = source_ptr->loadAll(); + stream->readPrefix(); + while (const auto block = stream->read()) { + blockToAttributes(block); + } + stream->readSuffix(); +} + +void IPolygonDictionary::calculateBytesAllocated() +{ + for (const auto & block : blocks) + bytes_allocated += block.allocatedBytes(); + +} + +void IPolygonDictionary::has(const Columns &key_columns, const DataTypes &key_types, PaddedPODArray &out) { + // TODO: Use constant in error message? + if (key_types.size() != DIM) + throw Exception{"Expected two columns of coordinates", ErrorCodes::BAD_ARGUMENTS}; + for (const auto i : ext::range(0, DIM)) + { + // TODO: Not sure if this is the best way to check. + if (key_types[i]->getName() != "Array(Float64)") + throw Exception{"Expected an array of Float64", ErrorCodes::TYPE_MISMATCH}; + } + const auto rows = key_columns.front()->size(); + for (const auto row : ext::range(0, rows)) + { + Point pt(key_columns[0]->getFloat64(row), key_columns[1]->getFloat64(row)); + // TODO: Check whether this will be optimized by the compiler. + size_t trash; + out[row] = find(pt, trash); + } +} + +IPolygonDictionary::Point IPolygonDictionary::fieldToPoint(const Field &field) +{ + if (field.getType() == Field::Types::Array) + { + auto coordinate_array = field.get(); + if (coordinate_array.size() != DIM) + throw Exception{"All points should be two-dimensional", ErrorCodes::LOGICAL_ERROR}; + Float64 values[DIM]; + for (size_t i = 0; i < DIM; ++i) + { + if (coordinate_array[i].getType() != Field::Types::Float64) + throw Exception{"Coordinates should be Float64", ErrorCodes::TYPE_MISMATCH}; + values[i] = coordinate_array[i].get(); + } + return {values[0], values[1]}; + } + else + throw Exception{"Point is not represented by an array", ErrorCodes::TYPE_MISMATCH}; +} + +IPolygonDictionary::Polygon IPolygonDictionary::fieldToPolygon(const Field & field) { + Polygon result; + if (field.getType() == Field::Types::Array) + { + const auto & ring_array = field.get(); + if (ring_array.empty()) + throw Exception{"Empty polygons are not allowed", ErrorCodes::LOGICAL_ERROR}; + result.inners().resize(ring_array.size() - 1); + if (ring_array[0].getType() != Field::Types::Array) + throw Exception{"Outer polygon ring is not represented by an array", ErrorCodes::TYPE_MISMATCH}; + for (const auto & point : ring_array[0].get()) + bg::append(result.outer(), fieldToPoint(point)); + for (size_t i = 0; i < result.inners().size(); ++i) { + if (ring_array[i + 1].getType() != Field::Types::Array) + throw Exception{"Inner polygon ring is not represented by an array", ErrorCodes::TYPE_MISMATCH}; + for (const auto & point : ring_array[i + 1].get()) + bg::append(result.inners()[i], fieldToPoint(point)); + } + } + else + throw Exception{"Polygon is not represented by an array", ErrorCodes::TYPE_MISMATCH}; + return result; +} + +IPolygonDictionary::MultiPolygon IPolygonDictionary::fieldToMultiPolygon(const Field &field) { + MultiPolygon result; + if (field.getType() == Field::Types::Array) + { + const auto& polygon_array = field.get(); + result.reserve(polygon_array.size()); + for (const auto & polygon : polygon_array) + result.push_back(fieldToPolygon(polygon)); + } + else + throw Exception{"MultiPolygon is not represented by an array", ErrorCodes::TYPE_MISMATCH}; + return result; +} + +void SimplePolygonDictionary::generate() {} + +bool SimplePolygonDictionary::find(const Point &point, size_t & id) const +{ + for (size_t i = 0; i < (this->polygons).size(); ++i) + { + if (bg::within(point, (this->polygons)[i])) { + id = i; + return true; + } + } + return false; +} + +void registerDictionaryPolygon(DictionaryFactory & factory) +{ + auto create_layout = [=](const std::string & name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr) -> DictionaryPtr + { + // TODO: Check that there is only one key and it is of the correct type. + if (dict_struct.range_min || dict_struct.range_max) + throw Exception{name + + ": elements .structure.range_min and .structure.range_max should be defined only " + "for a dictionary of layout 'range_hashed'", + ErrorCodes::BAD_ARGUMENTS}; + const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; + return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime); + }; + factory.registerLayout("polygon", create_layout, true); +} + +} \ No newline at end of file diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h new file mode 100644 index 00000000000..9aa4d7971be --- /dev/null +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -0,0 +1,101 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "DictionaryStructure.h" +#include "IDictionary.h" +#include "IDictionarySource.h" + +namespace DB +{ + +namespace bg = boost::geometry; + +class IPolygonDictionary : public IDictionaryBase +{ +public: + IPolygonDictionary( + const std::string & name_, + const DictionaryStructure & dict_struct_, + DictionarySourcePtr source_ptr_, + DictionaryLifetime dict_lifetime_); + + std::string getName() const override; + + std::string getTypeName() const override; + + size_t getBytesAllocated() const override; + + size_t getQueryCount() const override; + + double getHitRate() const override; + + size_t getElementCount() const override; + + double getLoadFactor() const override; + + const IDictionarySource * getSource() const override; + + const DictionaryStructure & getStructure() const override; + + const DictionaryLifetime & getLifetime() const override; + + bool isInjective(const std::string & attribute_name) const override; + + BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; + + std::shared_ptr clone() const override; + + // TODO: Refactor design to perform stronger checks, i.e. make this an override. + void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray & out) ; + +protected: + using Point = bg::model::point; + using Polygon = bg::model::polygon; + using MultiPolygon = bg::model::multi_polygon; + + std::vector polygons; + +private: + virtual void generate() = 0; + virtual bool find(const Point & point, size_t & id) const = 0; + + void createAttributes(); + void blockToAttributes(const Block & block); + void loadData(); + + void calculateBytesAllocated(); + + const std::string name; + const DictionaryStructure dict_struct; + const DictionarySourcePtr source_ptr; + const DictionaryLifetime dict_lifetime; + + std::map attribute_index_by_name; + std::vector blocks; + + size_t bytes_allocated = 0; + size_t element_count = 0; + mutable std::atomic query_count{0}; + + + static Point fieldToPoint(const Field & field); + static Polygon fieldToPolygon(const Field & field); + static MultiPolygon fieldToMultiPolygon(const Field & field); + + static constexpr size_t DIM = 2; +}; + +class SimplePolygonDictionary : public IPolygonDictionary +{ +private: + void generate() override; + bool find(const Point & point, size_t & id) const override; +}; + +} \ No newline at end of file diff --git a/dbms/src/Dictionaries/registerDictionaries.cpp b/dbms/src/Dictionaries/registerDictionaries.cpp index ee320d7177b..03dd00a1046 100644 --- a/dbms/src/Dictionaries/registerDictionaries.cpp +++ b/dbms/src/Dictionaries/registerDictionaries.cpp @@ -21,6 +21,7 @@ void registerDictionaryTrie(DictionaryFactory & factory); void registerDictionaryFlat(DictionaryFactory & factory); void registerDictionaryHashed(DictionaryFactory & factory); void registerDictionaryCache(DictionaryFactory & factory); +void registerDictionaryPolygon(DictionaryFactory & factory); void registerDictionaries() @@ -48,6 +49,7 @@ void registerDictionaries() registerDictionaryFlat(factory); registerDictionaryHashed(factory); registerDictionaryCache(factory); + registerDictionaryPolygon(factory); } } diff --git a/dbms/src/Functions/FunctionsExternalDictionaries.h b/dbms/src/Functions/FunctionsExternalDictionaries.h index a47dacf5deb..2e93e5b1397 100644 --- a/dbms/src/Functions/FunctionsExternalDictionaries.h +++ b/dbms/src/Functions/FunctionsExternalDictionaries.h @@ -31,6 +31,7 @@ #include #include #include +#include #include @@ -138,7 +139,8 @@ private: !executeDispatchSimple(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && - !executeDispatchComplex(block, arguments, result, dict_ptr)) + !executeDispatchComplex(block, arguments, result, dict_ptr) && + !executeDispatchComplex(block, arguments, result, dict_ptr)) throw Exception{"Unsupported dictionary type " + dict_ptr->getTypeName(), ErrorCodes::UNKNOWN_TYPE}; } From 331e45a970a293a8946930bf8ae231a834f917da Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Mon, 16 Dec 2019 18:11:16 +0300 Subject: [PATCH 02/76] Fixes --- dbms/src/Dictionaries/PolygonDictionary.cpp | 11 ++++++++++- dbms/src/Dictionaries/PolygonDictionary.h | 8 +++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 2dfdcaebdfe..bf6bd1848b7 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -99,7 +99,7 @@ void IPolygonDictionary::createAttributes() { { attribute_index_by_name.emplace(dict_struct.attributes[i].name, i); - if (dict_struct.attributes[i].name.hierarchical) + if (dict_struct.attributes[i].hierarchical) throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), ErrorCodes::TYPE_MISMATCH}; } @@ -215,6 +215,15 @@ IPolygonDictionary::MultiPolygon IPolygonDictionary::fieldToMultiPolygon(const F return result; } +SimplePolygonDictionary::SimplePolygonDictionary( + const std::string & name_, + const DictionaryStructure & dict_struct_, + DictionarySourcePtr source_ptr_, + const DictionaryLifetime dict_lifetime_) + : IPolygonDictionary(name_, dict_struct_, std::move(source_ptr_), dict_lifetime_) +{ +} + void SimplePolygonDictionary::generate() {} bool SimplePolygonDictionary::find(const Point &point, size_t & id) const diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h index 9aa4d7971be..4aa1bb325d2 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.h +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -61,10 +61,10 @@ protected: std::vector polygons; -private: virtual void generate() = 0; virtual bool find(const Point & point, size_t & id) const = 0; +private: void createAttributes(); void blockToAttributes(const Block & block); void loadData(); @@ -93,6 +93,12 @@ private: class SimplePolygonDictionary : public IPolygonDictionary { +public: + SimplePolygonDictionary( + const std::string & name_, + const DictionaryStructure & dict_struct_, + DictionarySourcePtr source_ptr_, + DictionaryLifetime dict_lifetime_); private: void generate() override; bool find(const Point & point, size_t & id) const override; From 3f7fba6a783b67315f7fa7918419309fa3aa23ec Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Mon, 16 Dec 2019 18:24:26 +0300 Subject: [PATCH 03/76] Fix compilation errors --- dbms/src/Dictionaries/PolygonDictionary.cpp | 14 +++++++++----- dbms/src/Dictionaries/PolygonDictionary.h | 15 ++++++++------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index bf6bd1848b7..63d609343bc 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -89,11 +89,6 @@ BlockInputStreamPtr IPolygonDictionary::getBlockInputStream(const Names &, size_ throw Exception{"Reading the dictionary is not allowed", ErrorCodes::UNSUPPORTED_METHOD}; } -std::shared_ptr IPolygonDictionary::clone() const -{ - return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime) -} - void IPolygonDictionary::createAttributes() { for (size_t i = 0; i < dict_struct.attributes.size(); ++i) { @@ -224,6 +219,15 @@ SimplePolygonDictionary::SimplePolygonDictionary( { } +std::shared_ptr SimplePolygonDictionary::clone() const +{ + return std::make_shared( + this->name, + this->dict_struct, + this->source_ptr->clone(), + this->dict_lifetime); +} + void SimplePolygonDictionary::generate() {} bool SimplePolygonDictionary::find(const Point &point, size_t & id) const diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h index 4aa1bb325d2..ed8dfff4077 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.h +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -49,8 +49,6 @@ public: BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; - std::shared_ptr clone() const override; - // TODO: Refactor design to perform stronger checks, i.e. make this an override. void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray & out) ; @@ -64,6 +62,11 @@ protected: virtual void generate() = 0; virtual bool find(const Point & point, size_t & id) const = 0; + const std::string name; + const DictionaryStructure dict_struct; + const DictionarySourcePtr source_ptr; + const DictionaryLifetime dict_lifetime; + private: void createAttributes(); void blockToAttributes(const Block & block); @@ -71,11 +74,6 @@ private: void calculateBytesAllocated(); - const std::string name; - const DictionaryStructure dict_struct; - const DictionarySourcePtr source_ptr; - const DictionaryLifetime dict_lifetime; - std::map attribute_index_by_name; std::vector blocks; @@ -99,6 +97,9 @@ public: const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, DictionaryLifetime dict_lifetime_); + + std::shared_ptr clone() const override; + private: void generate() override; bool find(const Point & point, size_t & id) const override; From e9d2a1a8addb8f77463cb80df827480147a3a00d Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Mon, 16 Dec 2019 18:34:46 +0300 Subject: [PATCH 04/76] More compilation errors --- dbms/src/Dictionaries/PolygonDictionary.cpp | 6 +++--- dbms/src/Dictionaries/PolygonDictionary.h | 4 +--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 63d609343bc..9cf7878dfeb 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -27,6 +27,8 @@ IPolygonDictionary::IPolygonDictionary( , source_ptr(std::move(source_ptr_)) , dict_lifetime(dict_lifetime_) { + createAttributes(); + loadData(); } std::string IPolygonDictionary::getName() const @@ -132,7 +134,7 @@ void IPolygonDictionary::calculateBytesAllocated() } -void IPolygonDictionary::has(const Columns &key_columns, const DataTypes &key_types, PaddedPODArray &out) { +void IPolygonDictionary::has(const Columns &key_columns, const DataTypes &key_types, PaddedPODArray &out) const { // TODO: Use constant in error message? if (key_types.size() != DIM) throw Exception{"Expected two columns of coordinates", ErrorCodes::BAD_ARGUMENTS}; @@ -228,8 +230,6 @@ std::shared_ptr SimplePolygonDictionary::clone() const this->dict_lifetime); } -void SimplePolygonDictionary::generate() {} - bool SimplePolygonDictionary::find(const Point &point, size_t & id) const { for (size_t i = 0; i < (this->polygons).size(); ++i) diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h index ed8dfff4077..1c5ae8b519a 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.h +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -50,7 +50,7 @@ public: BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; // TODO: Refactor design to perform stronger checks, i.e. make this an override. - void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray & out) ; + void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray & out) const; protected: using Point = bg::model::point; @@ -59,7 +59,6 @@ protected: std::vector polygons; - virtual void generate() = 0; virtual bool find(const Point & point, size_t & id) const = 0; const std::string name; @@ -101,7 +100,6 @@ public: std::shared_ptr clone() const override; private: - void generate() override; bool find(const Point & point, size_t & id) const override; }; From 76ff03973c53f0a972d4257b2826d272a1eca31d Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Mon, 16 Dec 2019 18:46:51 +0300 Subject: [PATCH 05/76] More compilation errors --- dbms/src/Dictionaries/PolygonDictionary.cpp | 5 +++++ dbms/src/Dictionaries/PolygonDictionary.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 9cf7878dfeb..a28feaff363 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -41,6 +41,11 @@ std::string IPolygonDictionary::getTypeName() const return "Polygon"; } +std::string IPolygonDictionary::getKeyDescription() const +{ + return dict_struct.getKeyDescription(); +} + size_t IPolygonDictionary::getBytesAllocated() const { return bytes_allocated; diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h index 1c5ae8b519a..9a4cb2e0840 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.h +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -29,6 +29,8 @@ public: std::string getTypeName() const override; + std::string getKeyDescription() const; + size_t getBytesAllocated() const override; size_t getQueryCount() const override; From a8fec15b50aacf02eca89edac6085236314a4a64 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Mon, 23 Dec 2019 16:23:11 +0300 Subject: [PATCH 06/76] First implementation of dictGet --- .../Dictionaries/ComplexKeyHashedDictionary.h | 1 + dbms/src/Dictionaries/PolygonDictionary.cpp | 230 ++++++++++++++++-- dbms/src/Dictionaries/PolygonDictionary.h | 100 +++++++- 3 files changed, 309 insertions(+), 22 deletions(-) diff --git a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h index 77941d6c5df..19d753f3956 100644 --- a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h +++ b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h @@ -142,6 +142,7 @@ public: void getString( const std::string & attribute_name, const Columns & key_columns, + const Columns & key_columns, const DataTypes & key_types, const String & def, ColumnString * const out) const; diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index a28feaff363..7532f6238fb 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -97,6 +97,7 @@ BlockInputStreamPtr IPolygonDictionary::getBlockInputStream(const Names &, size_ } void IPolygonDictionary::createAttributes() { + attributes.resize(dict_struct.attributes.size()); for (size_t i = 0; i < dict_struct.attributes.size(); ++i) { attribute_index_by_name.emplace(dict_struct.attributes[i].name, i); @@ -110,8 +111,17 @@ void IPolygonDictionary::createAttributes() { void IPolygonDictionary::blockToAttributes(const DB::Block &block) { const auto rows = block.rows(); element_count += rows; - // TODO: Only save columns for attributes, since we are converting the key to a boost type separately. - blocks.push_back(block); + for (size_t i = 0; i < attributes.size(); ++i) { + const auto & column = block.safeGetByPosition(i + 1); + if (attributes[i]) + { + MutableColumnPtr mutated = std::move(*attributes[i]).mutate(); + mutated->insertRangeFrom(*column.column, 0, column.column->size()); + attributes[i] = std::move(mutated); + } + else + attributes[i] = column.column; + } polygons.reserve(polygons.size() + rows); const auto & key = block.safeGetByPosition(0).column; @@ -119,6 +129,7 @@ void IPolygonDictionary::blockToAttributes(const DB::Block &block) { for (const auto row : ext::range(0, rows)) { const auto & field = (*key)[row]; + // TODO: Get data more efficiently using polygons.push_back(fieldToMultiPolygon(field)); } } @@ -139,26 +150,207 @@ void IPolygonDictionary::calculateBytesAllocated() } -void IPolygonDictionary::has(const Columns &key_columns, const DataTypes &key_types, PaddedPODArray &out) const { - // TODO: Use constant in error message? - if (key_types.size() != DIM) - throw Exception{"Expected two columns of coordinates", ErrorCodes::BAD_ARGUMENTS}; - for (const auto i : ext::range(0, DIM)) - { - // TODO: Not sure if this is the best way to check. - if (key_types[i]->getName() != "Array(Float64)") - throw Exception{"Expected an array of Float64", ErrorCodes::TYPE_MISMATCH}; - } +std::vector IPolygonDictionary::extractPoints(const Columns &key_columns) +{ + if (key_columns.size() != DIM) + throw Exception{"Expected " + std::to_string(DIM) + " columns of coordinates", ErrorCodes::BAD_ARGUMENTS}; + const auto column_x = typeid_cast*>(key_columns[0].get()); + const auto column_y = typeid_cast*>(key_columns[1].get()); + if (!column_x || !column_y) + throw Exception{"Expected columns of Float64", ErrorCodes::TYPE_MISMATCH}; const auto rows = key_columns.front()->size(); + std::vector result; + result.reserve(rows); for (const auto row : ext::range(0, rows)) + result.emplace_back(column_x->getElement(row), column_y->getElement(row)); + return result; +} + +void IPolygonDictionary::has(const Columns &key_columns, const DataTypes &, PaddedPODArray &out) const { + size_t row = 0; + for (const auto & pt : extractPoints(key_columns)) { - Point pt(key_columns[0]->getFloat64(row), key_columns[1]->getFloat64(row)); // TODO: Check whether this will be optimized by the compiler. - size_t trash; + size_t trash = 0; out[row] = find(pt, trash); + ++row; } } +size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name) const { + const auto it = attribute_index_by_name.find(attribute_name); + if (it == attribute_index_by_name.end()) + throw Exception{"No such attribute: " + attribute_name, ErrorCodes::BAD_ARGUMENTS}; + return it->second; +} + +template +T IPolygonDictionary::getNullValue(const DB::Field &field) const +{ + return field.get>(); +} + +#define DECLARE(TYPE) \ + void IPolygonDictionary::get##TYPE( \ + const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType & out) const \ + { \ + const auto ind = getAttributeIndex(attribute_name); \ + checkAttributeType(name, attribute_name, dict_struct.attributes[ind].type, AttributeUnderlyingType::ut##TYPE); \ +\ + const auto null_value = getNullValue(dict_struct.attributes[ind].null_value); \ +\ + getItemsImpl( \ + ind, \ + key_columns, \ + [&](const size_t row, const auto value) { out[row] = value; }, \ + [&](const size_t) { return null_value; }); \ + } + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + +void IPolygonDictionary::getString( + const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const +{ + dict_struct.validateKeyTypes(key_types); + + const auto ind = getAttributeIndex(attribute_name); + checkAttributeType(name, attribute_name, dict_struct.attributes[ind].type, AttributeUnderlyingType::utString); + + const auto & null_value = StringRef{getNullValue(dict_struct.attributes[ind].null_value)}; + + getItemsImpl( + ind, + key_columns, + [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, + [&](const size_t) { return null_value; }); +} + +#define DECLARE(TYPE) \ + void IPolygonDictionary::get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const PaddedPODArray & def, \ + ResultArrayType & out) const \ + { \ + const auto ind = getAttributeIndex(attribute_name); \ + checkAttributeType(name, attribute_name, dict_struct.attributes[ind].type, AttributeUnderlyingType::ut##TYPE); \ +\ + getItemsImpl( \ + ind, \ + key_columns, \ + [&](const size_t row, const auto value) { out[row] = value; }, \ + [&](const size_t row) { return def[row]; }); \ + } + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + +void IPolygonDictionary::getString( + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const ColumnString * const def, + ColumnString * const out) const +{ + const auto ind = getAttributeIndex(attribute_name); + checkAttributeType(name, attribute_name, dict_struct.attributes[ind].type, AttributeUnderlyingType::utString); + + getItemsImpl( + ind, + key_columns, + [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, + [&](const size_t row) { return def->getDataAt(row); }); +} + +#define DECLARE(TYPE) \ + void IPolygonDictionary::get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const TYPE def, \ + ResultArrayType & out) const \ + { \ + const auto ind = getAttributeIndex(attribute_name); \ + checkAttributeType(name, attribute_name, dict_struct.attributes[ind].type, AttributeUnderlyingType::ut##TYPE); \ +\ + getItemsImpl( \ + ind, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \ + } + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + +void IPolygonDictionary::getString( + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const String & def, + ColumnString * const out) const +{ + const auto ind = getAttributeIndex(attribute_name); + checkAttributeType(name, attribute_name, dict_struct.attributes[ind].type, AttributeUnderlyingType::utString); + + getItemsImpl( + ind, + key_columns, + [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, + [&](const size_t) { return StringRef{def}; }); +} + +template +void IPolygonDictionary::getItemsImpl( + size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const +{ + const auto points = extractPoints(key_columns); + + for (const auto i : ext::range(0, points.size())) + { + size_t id = 0; + auto found = find(points[i], id); + set_value(i, found ? static_cast((*attributes[attribute_ind])[id].get()) : get_default(i)); + } + + query_count.fetch_add(points.size(), std::memory_order_relaxed); +} + IPolygonDictionary::Point IPolygonDictionary::fieldToPoint(const Field &field) { if (field.getType() == Field::Types::Array) @@ -218,11 +410,11 @@ IPolygonDictionary::MultiPolygon IPolygonDictionary::fieldToMultiPolygon(const F } SimplePolygonDictionary::SimplePolygonDictionary( - const std::string & name_, - const DictionaryStructure & dict_struct_, - DictionarySourcePtr source_ptr_, - const DictionaryLifetime dict_lifetime_) - : IPolygonDictionary(name_, dict_struct_, std::move(source_ptr_), dict_lifetime_) + const std::string & name_, + const DictionaryStructure & dict_struct_, + DictionarySourcePtr source_ptr_, + const DictionaryLifetime dict_lifetime_) + : IPolygonDictionary(name_, dict_struct_, std::move(source_ptr_), dict_lifetime_) { } diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h index 9a4cb2e0840..5c79b488642 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.h +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include #include @@ -51,6 +53,90 @@ public: BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; + template + using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; + +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType & out) const; + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + + void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const; + +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const PaddedPODArray & def, \ + ResultArrayType & out) const; + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + + void getString( + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const ColumnString * const def, + ColumnString * const out) const; + +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const TYPE def, \ + ResultArrayType & out) const; + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + + void getString( + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const String & def, + ColumnString * const out) const; + // TODO: Refactor design to perform stronger checks, i.e. make this an override. void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray & out) const; @@ -75,14 +161,22 @@ private: void calculateBytesAllocated(); - std::map attribute_index_by_name; - std::vector blocks; + size_t getAttributeIndex(const std::string & attribute_name) const; + template + static T getNullValue(const Field & field) const; + + template + void getItemsImpl(size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const; + + + std::map attribute_index_by_name; + Columns attributes; size_t bytes_allocated = 0; size_t element_count = 0; mutable std::atomic query_count{0}; - + static std::vector extractPoints(const Columns &key_columns); static Point fieldToPoint(const Field & field); static Polygon fieldToPolygon(const Field & field); static MultiPolygon fieldToMultiPolygon(const Field & field); From fcfe2fb23420d813774e4410c0624baa55b9f341 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Mon, 23 Dec 2019 16:39:17 +0300 Subject: [PATCH 07/76] Compilation errors --- .../Dictionaries/ComplexKeyHashedDictionary.h | 1 - dbms/src/Dictionaries/PolygonDictionary.cpp | 24 +++++++++---------- dbms/src/Dictionaries/PolygonDictionary.h | 22 ++++++++--------- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h index 19d753f3956..77941d6c5df 100644 --- a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h +++ b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h @@ -142,7 +142,6 @@ public: void getString( const std::string & attribute_name, const Columns & key_columns, - const Columns & key_columns, const DataTypes & key_types, const String & def, ColumnString * const out) const; diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 7532f6238fb..efa68099477 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -185,7 +185,7 @@ size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name) } template -T IPolygonDictionary::getNullValue(const DB::Field &field) const +T IPolygonDictionary::getNullValue(const DB::Field &field) { return field.get>(); } @@ -197,7 +197,7 @@ T IPolygonDictionary::getNullValue(const DB::Field &field) const const auto ind = getAttributeIndex(attribute_name); \ checkAttributeType(name, attribute_name, dict_struct.attributes[ind].type, AttributeUnderlyingType::ut##TYPE); \ \ - const auto null_value = getNullValue(dict_struct.attributes[ind].null_value); \ + const auto null_value = getNullValue(dict_struct.attributes[ind].null_value); \ \ getItemsImpl( \ ind, \ @@ -227,9 +227,9 @@ void IPolygonDictionary::getString( dict_struct.validateKeyTypes(key_types); const auto ind = getAttributeIndex(attribute_name); - checkAttributeType(name, attribute_name, dict_struct.attributes[ind].type, AttributeUnderlyingType::utString); + checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString); - const auto & null_value = StringRef{getNullValue(dict_struct.attributes[ind].null_value)}; + const auto & null_value = StringRef{getNullValue(dict_struct.attributes[ind].null_value)}; getItemsImpl( ind, @@ -242,12 +242,12 @@ void IPolygonDictionary::getString( void IPolygonDictionary::get##TYPE( \ const std::string & attribute_name, \ const Columns & key_columns, \ - const DataTypes & key_types, \ + const DataTypes &, \ const PaddedPODArray & def, \ ResultArrayType & out) const \ { \ const auto ind = getAttributeIndex(attribute_name); \ - checkAttributeType(name, attribute_name, dict_struct.attributes[ind].type, AttributeUnderlyingType::ut##TYPE); \ + checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \ \ getItemsImpl( \ ind, \ @@ -274,12 +274,12 @@ void IPolygonDictionary::getString( void IPolygonDictionary::getString( const std::string & attribute_name, const Columns & key_columns, - const DataTypes & key_types, + const DataTypes &, const ColumnString * const def, ColumnString * const out) const { const auto ind = getAttributeIndex(attribute_name); - checkAttributeType(name, attribute_name, dict_struct.attributes[ind].type, AttributeUnderlyingType::utString); + checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString); getItemsImpl( ind, @@ -292,12 +292,12 @@ void IPolygonDictionary::getString( void IPolygonDictionary::get##TYPE( \ const std::string & attribute_name, \ const Columns & key_columns, \ - const DataTypes & key_types, \ + const DataTypes &, \ const TYPE def, \ ResultArrayType & out) const \ { \ const auto ind = getAttributeIndex(attribute_name); \ - checkAttributeType(name, attribute_name, dict_struct.attributes[ind].type, AttributeUnderlyingType::ut##TYPE); \ + checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \ \ getItemsImpl( \ ind, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \ @@ -321,12 +321,12 @@ void IPolygonDictionary::getString( void IPolygonDictionary::getString( const std::string & attribute_name, const Columns & key_columns, - const DataTypes & key_types, + const DataTypes &, const String & def, ColumnString * const out) const { const auto ind = getAttributeIndex(attribute_name); - checkAttributeType(name, attribute_name, dict_struct.attributes[ind].type, AttributeUnderlyingType::utString); + checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString); getItemsImpl( ind, diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h index 5c79b488642..1518f7003c4 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.h +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -58,7 +58,7 @@ public: #define DECLARE(TYPE) \ void get##TYPE( \ - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType & out) const; + const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType & out) const; DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -75,13 +75,13 @@ public: DECLARE(Decimal128) #undef DECLARE - void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const; + void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ColumnString * out) const; #define DECLARE(TYPE) \ void get##TYPE( \ const std::string & attribute_name, \ const Columns & key_columns, \ - const DataTypes & key_types, \ + const DataTypes &, \ const PaddedPODArray & def, \ ResultArrayType & out) const; DECLARE(UInt8) @@ -100,18 +100,18 @@ public: DECLARE(Decimal128) #undef DECLARE - void getString( - const std::string & attribute_name, - const Columns & key_columns, - const DataTypes & key_types, - const ColumnString * const def, - ColumnString * const out) const; + void getString( + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes &, + const ColumnString * const def, + ColumnString * const out) const; #define DECLARE(TYPE) \ void get##TYPE( \ const std::string & attribute_name, \ const Columns & key_columns, \ - const DataTypes & key_types, \ + const DataTypes &, \ const TYPE def, \ ResultArrayType & out) const; DECLARE(UInt8) @@ -163,7 +163,7 @@ private: size_t getAttributeIndex(const std::string & attribute_name) const; template - static T getNullValue(const Field & field) const; + static T getNullValue(const Field & field); template void getItemsImpl(size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const; From bd13a888f5026373ab4e3cadcd61f95ac2216e26 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Mon, 23 Dec 2019 16:40:42 +0300 Subject: [PATCH 08/76] Compilation errors --- dbms/src/Dictionaries/PolygonDictionary.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index efa68099477..206671293ef 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -195,7 +195,7 @@ T IPolygonDictionary::getNullValue(const DB::Field &field) const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType & out) const \ { \ const auto ind = getAttributeIndex(attribute_name); \ - checkAttributeType(name, attribute_name, dict_struct.attributes[ind].type, AttributeUnderlyingType::ut##TYPE); \ + checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \ \ const auto null_value = getNullValue(dict_struct.attributes[ind].null_value); \ \ From c339acddf9ffe096f97b897612e49b5fea3ff4ea Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Mon, 23 Dec 2019 16:41:35 +0300 Subject: [PATCH 09/76] Compilation errors --- dbms/src/Dictionaries/PolygonDictionary.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 206671293ef..10ed734c406 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -192,7 +192,7 @@ T IPolygonDictionary::getNullValue(const DB::Field &field) #define DECLARE(TYPE) \ void IPolygonDictionary::get##TYPE( \ - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType & out) const \ + const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType & out) const \ { \ const auto ind = getAttributeIndex(attribute_name); \ checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \ @@ -222,7 +222,7 @@ T IPolygonDictionary::getNullValue(const DB::Field &field) #undef DECLARE void IPolygonDictionary::getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const + const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ColumnString * out) const { dict_struct.validateKeyTypes(key_types); From 8160b2af6a883b982529889795898d86727ca437 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Mon, 23 Dec 2019 16:43:12 +0300 Subject: [PATCH 10/76] Compilation errors --- dbms/src/Dictionaries/PolygonDictionary.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 10ed734c406..d8d251fcd83 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -145,8 +145,9 @@ void IPolygonDictionary::loadData() { void IPolygonDictionary::calculateBytesAllocated() { - for (const auto & block : blocks) - bytes_allocated += block.allocatedBytes(); + // TODO:: Account for key. + for (const auto & column : attributes) + bytes_allocated += column->allocatedBytes(); } @@ -224,8 +225,6 @@ T IPolygonDictionary::getNullValue(const DB::Field &field) void IPolygonDictionary::getString( const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ColumnString * out) const { - dict_struct.validateKeyTypes(key_types); - const auto ind = getAttributeIndex(attribute_name); checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString); From ff954afec34de172a0319b50c242c88dbec6cec7 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Mon, 23 Dec 2019 16:52:44 +0300 Subject: [PATCH 11/76] Compilation errors --- dbms/src/Dictionaries/PolygonDictionary.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index d8d251fcd83..1d0e7f821b8 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -391,6 +391,7 @@ IPolygonDictionary::Polygon IPolygonDictionary::fieldToPolygon(const Field & fie } else throw Exception{"Polygon is not represented by an array", ErrorCodes::TYPE_MISMATCH}; + bg::correct(result); return result; } From 5804261a87ac2084973bdc73d3a86d2beaf1afbd Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Mon, 23 Dec 2019 16:58:57 +0300 Subject: [PATCH 12/76] Compilation errors --- dbms/src/Functions/FunctionsExternalDictionaries.h | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Functions/FunctionsExternalDictionaries.h b/dbms/src/Functions/FunctionsExternalDictionaries.h index 2e93e5b1397..2cd116ef7af 100644 --- a/dbms/src/Functions/FunctionsExternalDictionaries.h +++ b/dbms/src/Functions/FunctionsExternalDictionaries.h @@ -316,6 +316,7 @@ private: !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && + !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchRange(block, arguments, result, dict_ptr)) throw Exception{"Unsupported dictionary type " + dict_ptr->getTypeName(), ErrorCodes::UNKNOWN_TYPE}; } From 6c7d5d58fbde180a99fb5a81bc8301700064fea1 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Mon, 23 Dec 2019 17:38:42 +0300 Subject: [PATCH 13/76] Allow more dictGets --- dbms/src/Functions/FunctionsExternalDictionaries.h | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Functions/FunctionsExternalDictionaries.h b/dbms/src/Functions/FunctionsExternalDictionaries.h index 2cd116ef7af..a8f1144718d 100644 --- a/dbms/src/Functions/FunctionsExternalDictionaries.h +++ b/dbms/src/Functions/FunctionsExternalDictionaries.h @@ -845,6 +845,7 @@ private: !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && + !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchRange(block, arguments, result, dict_ptr)) throw Exception{"Unsupported dictionary type " + dict_ptr->getTypeName(), ErrorCodes::UNKNOWN_TYPE}; } From 179b93de35f9abba27015d8c7c08fc6bf15c681c Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Mon, 23 Dec 2019 17:59:50 +0300 Subject: [PATCH 14/76] Allow more dictGets --- dbms/src/Functions/FunctionsExternalDictionaries.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Functions/FunctionsExternalDictionaries.h b/dbms/src/Functions/FunctionsExternalDictionaries.h index a8f1144718d..940de1f754b 100644 --- a/dbms/src/Functions/FunctionsExternalDictionaries.h +++ b/dbms/src/Functions/FunctionsExternalDictionaries.h @@ -502,6 +502,7 @@ private: !executeDispatch(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && + !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr)) throw Exception{"Unsupported dictionary type " + dict_ptr->getTypeName(), ErrorCodes::UNKNOWN_TYPE}; } @@ -1109,6 +1110,7 @@ private: !executeDispatch(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && + !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr)) throw Exception{"Unsupported dictionary type " + dict_ptr->getTypeName(), ErrorCodes::UNKNOWN_TYPE}; } From c34fca48c4047169a6e4a0bbe017f0358448e742 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Tue, 24 Dec 2019 21:21:50 +0300 Subject: [PATCH 15/76] Small changes & comments --- dbms/src/Dictionaries/PolygonDictionary.cpp | 23 +++++---- dbms/src/Dictionaries/PolygonDictionary.h | 52 ++++++++++++++++----- 2 files changed, 55 insertions(+), 20 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 1d0e7f821b8..bc0b7e4d793 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -108,7 +108,8 @@ void IPolygonDictionary::createAttributes() { } } -void IPolygonDictionary::blockToAttributes(const DB::Block &block) { +void IPolygonDictionary::blockToAttributes(const DB::Block &block) +{ const auto rows = block.rows(); element_count += rows; for (size_t i = 0; i < attributes.size(); ++i) { @@ -134,12 +135,12 @@ void IPolygonDictionary::blockToAttributes(const DB::Block &block) { } } -void IPolygonDictionary::loadData() { +void IPolygonDictionary::loadData() +{ auto stream = source_ptr->loadAll(); stream->readPrefix(); - while (const auto block = stream->read()) { + while (const auto block = stream->read()) blockToAttributes(block); - } stream->readSuffix(); } @@ -178,7 +179,8 @@ void IPolygonDictionary::has(const Columns &key_columns, const DataTypes &, Padd } } -size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name) const { +size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name) const +{ const auto it = attribute_index_by_name.find(attribute_name); if (it == attribute_index_by_name.end()) throw Exception{"No such attribute: " + attribute_name, ErrorCodes::BAD_ARGUMENTS}; @@ -356,7 +358,7 @@ IPolygonDictionary::Point IPolygonDictionary::fieldToPoint(const Field &field) { auto coordinate_array = field.get(); if (coordinate_array.size() != DIM) - throw Exception{"All points should be two-dimensional", ErrorCodes::LOGICAL_ERROR}; + throw Exception{"All points should be " + std::to_string(DIM) + "-dimensional", ErrorCodes::LOGICAL_ERROR}; Float64 values[DIM]; for (size_t i = 0; i < DIM; ++i) { @@ -370,7 +372,8 @@ IPolygonDictionary::Point IPolygonDictionary::fieldToPoint(const Field &field) throw Exception{"Point is not represented by an array", ErrorCodes::TYPE_MISMATCH}; } -IPolygonDictionary::Polygon IPolygonDictionary::fieldToPolygon(const Field & field) { +IPolygonDictionary::Polygon IPolygonDictionary::fieldToPolygon(const Field & field) +{ Polygon result; if (field.getType() == Field::Types::Array) { @@ -395,7 +398,9 @@ IPolygonDictionary::Polygon IPolygonDictionary::fieldToPolygon(const Field & fie return result; } -IPolygonDictionary::MultiPolygon IPolygonDictionary::fieldToMultiPolygon(const Field &field) { +// TODO: Do this more efficiently by casting to the corresponding Column and avoiding Fields. +IPolygonDictionary::MultiPolygon IPolygonDictionary::fieldToMultiPolygon(const Field &field) +{ MultiPolygon result; if (field.getType() == Field::Types::Array) { @@ -431,7 +436,7 @@ bool SimplePolygonDictionary::find(const Point &point, size_t & id) const { for (size_t i = 0; i < (this->polygons).size(); ++i) { - if (bg::within(point, (this->polygons)[i])) { + if (bg::covered_by(point, (this->polygons)[i])) { id = i; return true; } diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h index 1518f7003c4..018d834655e 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.h +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -56,6 +56,8 @@ public: template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; + /** Functions used to retrieve attributes of specific type by key. */ + #define DECLARE(TYPE) \ void get##TYPE( \ const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType & out) const; @@ -130,14 +132,18 @@ public: DECLARE(Decimal128) #undef DECLARE - void getString( - const std::string & attribute_name, - const Columns & key_columns, - const DataTypes & key_types, - const String & def, - ColumnString * const out) const; + void getString( + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const String & def, + ColumnString * const out) const; - // TODO: Refactor design to perform stronger checks, i.e. make this an override. + /** Checks whether or not a point can be found in one of the polygons in the dictionary. + * The check is performed for multiple points represented by columns of their x and y coordinates. + * The boolean result is written to out. + */ + // TODO: Refactor the whole dictionary design to perform stronger checks, i.e. make this an override. void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray & out) const; protected: @@ -145,45 +151,69 @@ protected: using Polygon = bg::model::polygon; using MultiPolygon = bg::model::multi_polygon; - std::vector polygons; - + /** Returns true if the given point can be found in the polygon dictionary. + * If true id is set to the index of the first polygon containing the given point. + * Overridden in different implementations of this interface. + */ virtual bool find(const Point & point, size_t & id) const = 0; + std::vector polygons; + const std::string name; const DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; const DictionaryLifetime dict_lifetime; private: + /** Helper functions for loading the data from the configuration. + * The polygons serving as keys are extracted into boost types. + * All other values are stored in one column per attribute. + */ void createAttributes(); void blockToAttributes(const Block & block); void loadData(); void calculateBytesAllocated(); + /** Checks whether a given attribute exists and returns its index */ size_t getAttributeIndex(const std::string & attribute_name) const; + + /** Return the default type T value of field. */ template static T getNullValue(const Field & field); + /** Helper function for retrieving the value of an attribute by key. */ template void getItemsImpl(size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const; - - std::map attribute_index_by_name; + std::map attribute_index_by_name; Columns attributes; size_t bytes_allocated = 0; size_t element_count = 0; mutable std::atomic query_count{0}; + /** Extracts a list of points from two columns representing their x and y coordinates. */ static std::vector extractPoints(const Columns &key_columns); + + /** Converts an array containing two Float64s to a point. */ static Point fieldToPoint(const Field & field); + + /** Converts an array of arrays of points to a polygon. The first array represents the outer ring and zero or more + * following arrays represent the rings that are excluded from the polygon. + */ static Polygon fieldToPolygon(const Field & field); + + /** Converts an array of polygons (see above) to a multi-polygon. */ static MultiPolygon fieldToMultiPolygon(const Field & field); + /** The number of dimensions used. Change with great caution. */ static constexpr size_t DIM = 2; }; +/** Simple implementation of the polygon dictionary. Doesn't generate anything on construction. + * Iterates over all stored polygons for each query, checking each of them in linear time. + */ class SimplePolygonDictionary : public IPolygonDictionary { public: From 8a55049ede7d395768a40bb4a9eaffd0bb7dc705 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Tue, 24 Dec 2019 21:22:20 +0300 Subject: [PATCH 16/76] First test --- dbms/tests/polygon_dictionary.xml | 37 ++++++++++ .../0_stateless/01037_polygon_dict.sql | 70 +++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 dbms/tests/polygon_dictionary.xml create mode 100644 dbms/tests/queries/0_stateless/01037_polygon_dict.sql diff --git a/dbms/tests/polygon_dictionary.xml b/dbms/tests/polygon_dictionary.xml new file mode 100644 index 00000000000..86d7f81dc22 --- /dev/null +++ b/dbms/tests/polygon_dictionary.xml @@ -0,0 +1,37 @@ + + + polygons + + + localhost + 9000 + default + + test_01037 + polygons
+
+ + 0 + + + + + + + key + Array(Array(Array(Array(Float64)))) + + + + name + String + + + + u64 + UInt64 + 0 + + +
+
diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql new file mode 100644 index 00000000000..26aea5920aa --- /dev/null +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql @@ -0,0 +1,70 @@ +-- Must use `test_00950` database and these tables - they're configured in dbms/tests/*_dictionary.xml +create database if not exists test_01037; +use test_01037; +drop table if exists polygons; + +create table polygons (key Array(Array(Array(Array(Float64)))), name String, u64 UInt64) Engine = Memory; +insert into polygons values ( + [ + [ + [ + [1, 3], + [1, 1], + [3, 1], + [3, -1], + [1, -1], + [1, -3], + [-1. -3], + [-1, -1], + [-3, -1], + [-3, 1], + [-1, 1], + [-1, 3] + ] + ], + [ + [ + [5, 5], + [5, 1], + [7, 1], + [7, 7], + [1, 7], + [1, 5] + ] + ] + ], + 'Click', + 42); +insert into polygons values ( + [ + [ + [ + [5, 5], + [5, -5], + [-5, -5], + [-5, 5] + ], + [ + [1, 3], + [1, 1], + [3, 1], + [3, -1], + [1, -1], + [1, -3], + [-1. -3], + [-1, -1], + [-3, -1], + [-3, 1], + [-1, 1], + [-1, 3] + ] + ] + ], + 'House', + 314159); + +select 'dictGet', 'polygons' as dict_name, (0.0, 0.0) as key, + dictGet(dict_name, 'name', key), + dictGet(dict_name, 'u64', key); +drop table polygons; +drop database test_01037; From b6892466f2ae396dc4ddf75a8200d58cfdf5c5c3 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Tue, 24 Dec 2019 21:43:18 +0300 Subject: [PATCH 17/76] Trying to fix test --- .../0_stateless/01037_polygon_dict.sql | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql index 26aea5920aa..3f1aa4139d3 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql @@ -4,10 +4,10 @@ use test_01037; drop table if exists polygons; create table polygons (key Array(Array(Array(Array(Float64)))), name String, u64 UInt64) Engine = Memory; -insert into polygons values ( - [ - [ - [ +insert into polygons values (-- + [-- + [-- + [-- [1, 3], [1, 1], [3, 1], @@ -20,21 +20,22 @@ insert into polygons values ( [-3, 1], [-1, 1], [-1, 3] - ] + ]-- ], - [ - [ + [-- + [-- [5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5] - ] - ] + ]-- + ]-- ], 'Click', 42); +/* insert into polygons values ( [ [ @@ -62,7 +63,7 @@ insert into polygons values ( ], 'House', 314159); - +*/ select 'dictGet', 'polygons' as dict_name, (0.0, 0.0) as key, dictGet(dict_name, 'name', key), dictGet(dict_name, 'u64', key); From 453ff6d321b1b3ede019d7ba860e371463d799bb Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Tue, 24 Dec 2019 21:47:32 +0300 Subject: [PATCH 18/76] Trying to fix test --- .../0_stateless/01037_polygon_dict.sql | 30 +------------------ 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql index 3f1aa4139d3..b403f14f673 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql @@ -4,35 +4,7 @@ use test_01037; drop table if exists polygons; create table polygons (key Array(Array(Array(Array(Float64)))), name String, u64 UInt64) Engine = Memory; -insert into polygons values (-- - [-- - [-- - [-- - [1, 3], - [1, 1], - [3, 1], - [3, -1], - [1, -1], - [1, -3], - [-1. -3], - [-1, -1], - [-3, -1], - [-3, 1], - [-1, 1], - [-1, 3] - ]-- - ], - [-- - [-- - [5, 5], - [5, 1], - [7, 1], - [7, 7], - [1, 7], - [1, 5] - ]-- - ]-- - ], +insert into polygons values ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1. -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], 'Click', 42); /* From f7a086b9d99e377871c6abd743d5fe140f4504b7 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Tue, 24 Dec 2019 21:48:48 +0300 Subject: [PATCH 19/76] Trying to fix test --- dbms/tests/queries/0_stateless/01037_polygon_dict.sql | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql index b403f14f673..f36c68e602b 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql @@ -4,9 +4,7 @@ use test_01037; drop table if exists polygons; create table polygons (key Array(Array(Array(Array(Float64)))), name String, u64 UInt64) Engine = Memory; -insert into polygons values ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1. -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], - 'Click', - 42); +insert into polygons values ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1. -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], 'Click', 42); /* insert into polygons values ( [ From 4f66c4bc7de47eac874ac4167b2012b2181fd119 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Tue, 24 Dec 2019 22:28:41 +0300 Subject: [PATCH 20/76] Trying to fix test --- .../0_stateless/01037_polygon_dict.sql | 32 ++----------------- 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql index f36c68e602b..edc935454c7 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql @@ -5,36 +5,8 @@ drop table if exists polygons; create table polygons (key Array(Array(Array(Array(Float64)))), name String, u64 UInt64) Engine = Memory; insert into polygons values ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1. -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], 'Click', 42); -/* -insert into polygons values ( - [ - [ - [ - [5, 5], - [5, -5], - [-5, -5], - [-5, 5] - ], - [ - [1, 3], - [1, 1], - [3, 1], - [3, -1], - [1, -1], - [1, -3], - [-1. -3], - [-1, -1], - [-3, -1], - [-3, 1], - [-1, 1], - [-1, 3] - ] - ] - ], - 'House', - 314159); -*/ -select 'dictGet', 'polygons' as dict_name, (0.0, 0.0) as key, +insert into polygons values ([[[[5, 5], [5, -5], [-5, -5], [-5, 5]], [[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1. -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]]], 'House', 314159); +select 'dictGet', 'polygons' as dict_name, tuple(0.0, 0.0) as key, dictGet(dict_name, 'name', key), dictGet(dict_name, 'u64', key); drop table polygons; From 7c1fa017bac10fcf5af27c79a7b9b318e4c6e8ab Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 25 Dec 2019 15:05:37 +0300 Subject: [PATCH 21/76] Trying to fix test --- .../0_stateless/01037_polygon_dict.sql | 37 ++++++++++++++----- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql index edc935454c7..3bd8618a30a 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql @@ -1,13 +1,30 @@ --- Must use `test_00950` database and these tables - they're configured in dbms/tests/*_dictionary.xml -create database if not exists test_01037; -use test_01037; -drop table if exists polygons; +SET send_logs_level = 'none'; -create table polygons (key Array(Array(Array(Array(Float64)))), name String, u64 UInt64) Engine = Memory; -insert into polygons values ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1. -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], 'Click', 42); -insert into polygons values ([[[[5, 5], [5, -5], [-5, -5], [-5, 5]], [[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1. -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]]], 'House', 314159); -select 'dictGet', 'polygons' as dict_name, tuple(0.0, 0.0) as key, +DROP DATABASE IF EXISTS test_01037; + +CREATE DATABASE test_01037 Engine = Ordinary; + +DROP TABLE IF EXISTS test_01037.polygons; + +CREATE TABLE polygons (key Array(Array(Array(Array(Float64)))), name String, u64 UInt64) Engine = Memory; +INSERT INTO polygons VALUES ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1. -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], 'Click', 42); +INSERT INTO polygons VALUES ([[[[5, 5], [5, -5], [-5, -5], [-5, 5]], [[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1. -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]]], 'House', 314159); + +CREATE DICTIONARY test_01037.dict +( + polygon Array(Array(Array(Array(Float64)))), + name String DEFAULT '', + value UInt64 DEFAULT 42 +) +PRIMARY KEY polygon +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'polygons' PASSWORD '' DB 'test_01037')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(POLYGON()) + +select 'dictGet', 'test_01037.dict' as dict_name, tuple(0.0, 0.0) as key, dictGet(dict_name, 'name', key), dictGet(dict_name, 'u64', key); -drop table polygons; -drop database test_01037; + +DROP DICTIONARY IF EXISTS test_01037.dict; +DROP TABLE IF EXISTS test_01037.polygons; +DROP DATABASE IF EXISTS test_01037; From 21a35f0610678d916d598eddb81e98fac42a43c2 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 25 Dec 2019 15:39:49 +0300 Subject: [PATCH 22/76] Trying to fix test --- .../0_stateless/01037_polygon_dict.sql | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql index 3bd8618a30a..9bc35e597eb 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql @@ -4,27 +4,34 @@ DROP DATABASE IF EXISTS test_01037; CREATE DATABASE test_01037 Engine = Ordinary; +DROP DICTIONARY IF EXISTS test_01037.dict; DROP TABLE IF EXISTS test_01037.polygons; -CREATE TABLE polygons (key Array(Array(Array(Array(Float64)))), name String, u64 UInt64) Engine = Memory; -INSERT INTO polygons VALUES ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1. -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], 'Click', 42); -INSERT INTO polygons VALUES ([[[[5, 5], [5, -5], [-5, -5], [-5, 5]], [[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1. -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]]], 'House', 314159); +CREATE TABLE test_01037.polygons (key Array(Array(Array(Array(Float64)))), name String, value UInt64) Engine = Memory; +INSERT INTO test_01037.polygons VALUES ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], 'Click', 42); +INSERT INTO test_01037.polygons VALUES ([[[[5, 5], [5, -5], [-5, -5], [-5, 5]], [[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]]], 'House', 314159); CREATE DICTIONARY test_01037.dict ( - polygon Array(Array(Array(Array(Float64)))), + key Array(Array(Array(Array(Float64)))), name String DEFAULT '', value UInt64 DEFAULT 42 ) -PRIMARY KEY polygon +PRIMARY KEY key SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'polygons' PASSWORD '' DB 'test_01037')) LIFETIME(MIN 1 MAX 10) -LAYOUT(POLYGON()) +LAYOUT(POLYGON()); -select 'dictGet', 'test_01037.dict' as dict_name, tuple(0.0, 0.0) as key, +DROP TABLE IF EXISTS test_01037.points; + +CREATE TABLE test_01037.points (x Float64, y Float64) ENGINE = Memory; +INSERT INTO test_01037.points VALUES (0.0, 0.0), (3.0, 3.0), (5.0, 6.0), (-100.0, -42.0), (5.0, 5.0); + +select 'dictGet', 'test_01037.dict' as dict_name, tuple(x, y) as key, dictGet(dict_name, 'name', key), - dictGet(dict_name, 'u64', key); + dictGet(dict_name, 'value', key) from test_01037.points; -DROP DICTIONARY IF EXISTS test_01037.dict; -DROP TABLE IF EXISTS test_01037.polygons; -DROP DATABASE IF EXISTS test_01037; +DROP DICTIONARY test_01037.dict; +DROP TABLE test_01037.polygons; +DROP TABLE test_01037.points; +DROP DATABASE test_01037; \ No newline at end of file From 30f4e4b7c5a276368bde62be4d31fadf1d6043ff Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 25 Dec 2019 15:41:17 +0300 Subject: [PATCH 23/76] Trying to fix test --- dbms/tests/queries/0_stateless/01037_polygon_dict.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql index 9bc35e597eb..d2c34725ae6 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql @@ -14,8 +14,8 @@ INSERT INTO test_01037.polygons VALUES ([[[[5, 5], [5, -5], [-5, -5], [-5, 5]], CREATE DICTIONARY test_01037.dict ( key Array(Array(Array(Array(Float64)))), - name String DEFAULT '', - value UInt64 DEFAULT 42 + name String DEFAULT 'Default', + value UInt64 DEFAULT 101 ) PRIMARY KEY key SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'polygons' PASSWORD '' DB 'test_01037')) From 1637c8d0575b346a50a07ebeccfdb22c86b34ec5 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 25 Dec 2019 15:58:43 +0300 Subject: [PATCH 24/76] Trying to fix test --- dbms/tests/queries/0_stateless/01037_polygon_dict.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql index d2c34725ae6..df65e1ad20f 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql @@ -14,7 +14,7 @@ INSERT INTO test_01037.polygons VALUES ([[[[5, 5], [5, -5], [-5, -5], [-5, 5]], CREATE DICTIONARY test_01037.dict ( key Array(Array(Array(Array(Float64)))), - name String DEFAULT 'Default', + name String DEFAULT 'qqq', value UInt64 DEFAULT 101 ) PRIMARY KEY key From c284c1d728aeadb3e64c4bc0520e935ca31d4085 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 25 Dec 2019 16:55:12 +0300 Subject: [PATCH 25/76] Fix after pull --- dbms/src/Dictionaries/registerDictionaries.h | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Dictionaries/registerDictionaries.h b/dbms/src/Dictionaries/registerDictionaries.h index 3f2e730b5e3..b0015e0820b 100644 --- a/dbms/src/Dictionaries/registerDictionaries.h +++ b/dbms/src/Dictionaries/registerDictionaries.h @@ -24,6 +24,7 @@ void registerDictionaryTrie(DictionaryFactory & factory); void registerDictionaryFlat(DictionaryFactory & factory); void registerDictionaryHashed(DictionaryFactory & factory); void registerDictionaryCache(DictionaryFactory & factory); +void registerDictionaryPolygon(DictionaryFactory & factory); void registerDictionaries(); } From cd3277ff0a42bb7e38ab8c577d3f978d018eea9e Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 25 Dec 2019 18:04:34 +0300 Subject: [PATCH 26/76] Fix default string use after free --- dbms/src/Dictionaries/PolygonDictionary.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index bc0b7e4d793..5021894f56c 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -230,12 +230,12 @@ void IPolygonDictionary::getString( const auto ind = getAttributeIndex(attribute_name); checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString); - const auto & null_value = StringRef{getNullValue(dict_struct.attributes[ind].null_value)}; + const auto & null_value = getNullValue(dict_struct.attributes[ind].null_value); - getItemsImpl( + getItemsImpl( ind, key_columns, - [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, + [&](const size_t, const String & value) { out->insertData(value.data(), value.size()); }, [&](const size_t) { return null_value; }); } From fef22a7fd21a6a34730180d337f19b515d73c344 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 25 Dec 2019 18:18:11 +0300 Subject: [PATCH 27/76] Fix default string use after free --- dbms/src/Dictionaries/PolygonDictionary.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 5021894f56c..c4bc3a29231 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -230,12 +230,12 @@ void IPolygonDictionary::getString( const auto ind = getAttributeIndex(attribute_name); checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString); - const auto & null_value = getNullValue(dict_struct.attributes[ind].null_value); + const auto & null_value = StringRef{getNullValue(dict_struct.attributes[ind].null_value)}; - getItemsImpl( + getItemsImpl( ind, key_columns, - [&](const size_t, const String & value) { out->insertData(value.data(), value.size()); }, + [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, [&](const size_t) { return null_value; }); } From dc8f201d4c52ba0ca5ba3b288cf0ba6912789367 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 25 Dec 2019 18:27:04 +0300 Subject: [PATCH 28/76] Fix default string use after free --- dbms/src/Dictionaries/PolygonDictionary.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index c4bc3a29231..94685026589 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -230,12 +230,12 @@ void IPolygonDictionary::getString( const auto ind = getAttributeIndex(attribute_name); checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString); - const auto & null_value = StringRef{getNullValue(dict_struct.attributes[ind].null_value)}; + const auto & null_value = getNullValue(dict_struct.attributes[ind].null_value); - getItemsImpl( + getItemsImpl( ind, key_columns, - [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, + [&](const size_t, const String value) { out->insertData(value.data(), value.size()); }, [&](const size_t) { return null_value; }); } From e1de174f4ccaa982095ce07d9569ce6aa6b3f076 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 25 Dec 2019 19:31:57 +0300 Subject: [PATCH 29/76] More testing & fixes --- dbms/src/Dictionaries/PolygonDictionary.cpp | 6 +++--- dbms/tests/queries/0_stateless/01037_polygon_dict.sql | 10 ++++++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 94685026589..17c2d5e575b 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -235,7 +235,7 @@ void IPolygonDictionary::getString( getItemsImpl( ind, key_columns, - [&](const size_t, const String value) { out->insertData(value.data(), value.size()); }, + [&](const size_t, const String & value) { out->insertData(value.data(), value.size()); }, [&](const size_t) { return null_value; }); } @@ -282,7 +282,7 @@ void IPolygonDictionary::getString( const auto ind = getAttributeIndex(attribute_name); checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString); - getItemsImpl( + getItemsImpl( ind, key_columns, [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, @@ -329,7 +329,7 @@ void IPolygonDictionary::getString( const auto ind = getAttributeIndex(attribute_name); checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString); - getItemsImpl( + getItemsImpl( ind, key_columns, [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql index df65e1ad20f..dadb38a2db2 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql @@ -24,12 +24,18 @@ LAYOUT(POLYGON()); DROP TABLE IF EXISTS test_01037.points; -CREATE TABLE test_01037.points (x Float64, y Float64) ENGINE = Memory; -INSERT INTO test_01037.points VALUES (0.0, 0.0), (3.0, 3.0), (5.0, 6.0), (-100.0, -42.0), (5.0, 5.0); +CREATE TABLE test_01037.points (x Float64, y Float64, def_i UInt64, def_s String) ENGINE = Memory; +INSERT INTO test_01037.points VALUES (0.0, 0.0, 11, 'aa'), (3.0, 3.0, 22, 'bb'), (5.0, 6.0, 33, 'cc'), (-100.0, -42.0, 44, 'dd'), (5.0, 5.0, 55, 'ee'); select 'dictGet', 'test_01037.dict' as dict_name, tuple(x, y) as key, dictGet(dict_name, 'name', key), dictGet(dict_name, 'value', key) from test_01037.points; +select 'dictGetOrDefault', 'test_01037.dict' as dict_name, tuple(x, y) as key, + dictGetOrDefault(dict_name, 'name', key, 'www'), + dictGetOrDefault(dict_name, 'value', key, 1234) from test_01037.points; +select 'dictGetOrDefault', 'test_01037.dict' as dict_name, tuple(x, y) as key, + dictGetOrDefault(dict_name, 'name', key, def_s), + dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points; DROP DICTIONARY test_01037.dict; DROP TABLE test_01037.polygons; From 93b02f31912788c504b83af15c0eb2e2bd94e07c Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 25 Dec 2019 19:44:33 +0300 Subject: [PATCH 30/76] Quick sql fix --- dbms/tests/queries/0_stateless/01037_polygon_dict.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql index dadb38a2db2..20edde7ff60 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql @@ -32,7 +32,7 @@ select 'dictGet', 'test_01037.dict' as dict_name, tuple(x, y) as key, dictGet(dict_name, 'value', key) from test_01037.points; select 'dictGetOrDefault', 'test_01037.dict' as dict_name, tuple(x, y) as key, dictGetOrDefault(dict_name, 'name', key, 'www'), - dictGetOrDefault(dict_name, 'value', key, 1234) from test_01037.points; + dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points; select 'dictGetOrDefault', 'test_01037.dict' as dict_name, tuple(x, y) as key, dictGetOrDefault(dict_name, 'name', key, def_s), dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points; From dcadccf95d67836278581777c284f09fbaed8fc7 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 25 Dec 2019 19:48:09 +0300 Subject: [PATCH 31/76] Remove unused error codes --- dbms/src/Dictionaries/PolygonDictionary.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 17c2d5e575b..a9c7a19e72b 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -9,10 +9,8 @@ namespace DB namespace ErrorCodes { extern const int TYPE_MISMATCH; - extern const int ARGUMENT_OUT_OF_BOUND; extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; - extern const int UNKNOWN_TYPE; extern const int UNSUPPORTED_METHOD; } From 57debeb8035b543ede9ff0231e34de41c7ad35d4 Mon Sep 17 00:00:00 2001 From: Andrei Chulkov Date: Wed, 25 Dec 2019 19:55:02 +0300 Subject: [PATCH 32/76] Add reference for test --- .../0_stateless/01037_polygon_dict.reference | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/01037_polygon_dict.reference diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.reference b/dbms/tests/queries/0_stateless/01037_polygon_dict.reference new file mode 100644 index 00000000000..d773dd5ad76 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.reference @@ -0,0 +1,15 @@ +dictGet test_01037.dict (0,0) Click 42 +dictGet test_01037.dict (3,3) House 314159 +dictGet test_01037.dict (5,6) Click 42 +dictGet test_01037.dict (-100,-42) qqq 101 +dictGet test_01037.dict (5,5) Click 42 +dictGetOrDefault test_01037.dict (0,0) Click 42 +dictGetOrDefault test_01037.dict (3,3) House 314159 +dictGetOrDefault test_01037.dict (5,6) Click 42 +dictGetOrDefault test_01037.dict (-100,-42) www 1234 +dictGetOrDefault test_01037.dict (5,5) Click 42 +dictGetOrDefault test_01037.dict (0,0) Click 42 +dictGetOrDefault test_01037.dict (3,3) House 314159 +dictGetOrDefault test_01037.dict (5,6) Click 42 +dictGetOrDefault test_01037.dict (-100,-42) dd 44 +dictGetOrDefault test_01037.dict (5,5) Click 42 From 26927967341b1c966f483601e217785af9110977 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 25 Dec 2019 20:03:24 +0300 Subject: [PATCH 33/76] Clean up old comments, remove an old approach to testing --- dbms/src/Dictionaries/DictionaryStructure.cpp | 8 +--- dbms/src/Dictionaries/DictionaryStructure.h | 3 +- dbms/tests/polygon_dictionary.xml | 37 ------------------- 3 files changed, 3 insertions(+), 45 deletions(-) delete mode 100644 dbms/tests/polygon_dictionary.xml diff --git a/dbms/src/Dictionaries/DictionaryStructure.cpp b/dbms/src/Dictionaries/DictionaryStructure.cpp index 9da9427976d..6ecc6bf936b 100644 --- a/dbms/src/Dictionaries/DictionaryStructure.cpp +++ b/dbms/src/Dictionaries/DictionaryStructure.cpp @@ -74,6 +74,8 @@ AttributeUnderlyingType getAttributeUnderlyingType(const std::string & type) return AttributeUnderlyingType::utDecimal128; } + // Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries. + // TODO: This should be fixed by fully supporting arrays in dictionaries. if (type.find("Array") == 0) return AttributeUnderlyingType::utString; @@ -115,8 +117,6 @@ std::string toString(const AttributeUnderlyingType type) return "Decimal128"; case AttributeUnderlyingType::utString: return "String"; - //case AttributeUnderlyingType::utArray: - // return "Array"; } throw Exception{"Unknown attribute_type " + toString(static_cast(type)), ErrorCodes::ARGUMENT_OUT_OF_BOUND}; @@ -248,12 +248,8 @@ bool DictionaryStructure::isKeySizeFixed() const return true; for (const auto & key_i : *key) - { if (key_i.underlying_type == AttributeUnderlyingType::utString) return false; - //if (key_i.underlying_type == AttributeUnderlyingType::utArray) - // return false; - } return true; } diff --git a/dbms/src/Dictionaries/DictionaryStructure.h b/dbms/src/Dictionaries/DictionaryStructure.h index d40360041ab..2893dea2e4f 100644 --- a/dbms/src/Dictionaries/DictionaryStructure.h +++ b/dbms/src/Dictionaries/DictionaryStructure.h @@ -36,8 +36,7 @@ enum class AttributeUnderlyingType utDecimal32, utDecimal64, utDecimal128, - utString, - //utArray + utString }; diff --git a/dbms/tests/polygon_dictionary.xml b/dbms/tests/polygon_dictionary.xml deleted file mode 100644 index 86d7f81dc22..00000000000 --- a/dbms/tests/polygon_dictionary.xml +++ /dev/null @@ -1,37 +0,0 @@ - - - polygons - - - localhost - 9000 - default - - test_01037 - polygons
-
- - 0 - - - - - - - key - Array(Array(Array(Array(Float64)))) - - - - name - String - - - - u64 - UInt64 - 0 - - -
-
From 35a3abcb0740081e7d7dda169ca80ded6b62c6d1 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 25 Dec 2019 20:06:37 +0300 Subject: [PATCH 34/76] Add blank line deleted by accident --- dbms/src/Dictionaries/DictionaryStructure.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Dictionaries/DictionaryStructure.cpp b/dbms/src/Dictionaries/DictionaryStructure.cpp index 6ecc6bf936b..45506f246b4 100644 --- a/dbms/src/Dictionaries/DictionaryStructure.cpp +++ b/dbms/src/Dictionaries/DictionaryStructure.cpp @@ -250,6 +250,7 @@ bool DictionaryStructure::isKeySizeFixed() const for (const auto & key_i : *key) if (key_i.underlying_type == AttributeUnderlyingType::utString) return false; + return true; } From ca76e88545fe9a6726a242aabc6df8a44d2fdd29 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 25 Dec 2019 20:32:02 +0300 Subject: [PATCH 35/76] Some more clean-up and comments --- dbms/src/Dictionaries/PolygonDictionary.cpp | 3 ++- dbms/src/Dictionaries/PolygonDictionary.h | 13 +++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index a9c7a19e72b..caa2eff14f8 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -147,7 +147,6 @@ void IPolygonDictionary::calculateBytesAllocated() // TODO:: Account for key. for (const auto & column : attributes) bytes_allocated += column->allocatedBytes(); - } std::vector IPolygonDictionary::extractPoints(const Columns &key_columns) @@ -175,6 +174,8 @@ void IPolygonDictionary::has(const Columns &key_columns, const DataTypes &, Padd out[row] = find(pt, trash); ++row; } + + query_count.fetch_add(rows, std::memory_order_relaxed); } size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name) const diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h index 018d834655e..09305487e5f 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.h +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -18,6 +18,11 @@ namespace DB namespace bg = boost::geometry; +/** An interface for polygon dictionaries. + * Polygons are read and stored as multi_polygons from boost::geometry in Euclidean coordinates. + * An implementation should inherit from this base class and preprocess the data upon construction if needed. + * It must override the find method of this class which retrieves the polygon containing a single point. + */ class IPolygonDictionary : public IDictionaryBase { public: @@ -147,12 +152,15 @@ public: void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray & out) const; protected: + /** A simple two-dimensional point in Euclidean coordinates. */ using Point = bg::model::point; + /** A polygon in boost is a an outer ring of points with zero or more cut out inner rings. */ using Polygon = bg::model::polygon; + /** A multi_polygon in boost is a collection of polygons. */ using MultiPolygon = bg::model::multi_polygon; /** Returns true if the given point can be found in the polygon dictionary. - * If true id is set to the index of the first polygon containing the given point. + * If true id is set to the index of a polygon containing the given point. * Overridden in different implementations of this interface. */ virtual bool find(const Point & point, size_t & id) const = 0; @@ -211,8 +219,9 @@ private: static constexpr size_t DIM = 2; }; -/** Simple implementation of the polygon dictionary. Doesn't generate anything on construction. +/** Simple implementation of the polygon dictionary. Doesn't generate anything during its construction. * Iterates over all stored polygons for each query, checking each of them in linear time. + * Retrieves the first polygon in the dictionary containing a given point. */ class SimplePolygonDictionary : public IPolygonDictionary { From 9e85b507d0a0526e5c6a6de8841b977319d5e9f5 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 25 Dec 2019 21:49:27 +0300 Subject: [PATCH 36/76] Fix stupid typo --- dbms/src/Dictionaries/PolygonDictionary.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index caa2eff14f8..f720de8bd45 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -175,7 +175,7 @@ void IPolygonDictionary::has(const Columns &key_columns, const DataTypes &, Padd ++row; } - query_count.fetch_add(rows, std::memory_order_relaxed); + query_count.fetch_add(row, std::memory_order_relaxed); } size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name) const From b2c2b756178221534a0b5a127fc940faecfaab59 Mon Sep 17 00:00:00 2001 From: Andrey Chulkov Date: Thu, 26 Dec 2019 01:02:55 +0300 Subject: [PATCH 37/76] Add blank line at the end of files --- dbms/src/Dictionaries/PolygonDictionary.cpp | 3 ++- dbms/src/Dictionaries/PolygonDictionary.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index f720de8bd45..f50065a1a04 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -463,4 +463,5 @@ void registerDictionaryPolygon(DictionaryFactory & factory) factory.registerLayout("polygon", create_layout, true); } -} \ No newline at end of file +} + diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h index 09305487e5f..54678d86b90 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.h +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -238,4 +238,5 @@ private: bool find(const Point & point, size_t & id) const override; }; -} \ No newline at end of file +} + From 24e92822f5afc72bb43e1ddefc28d044909e2bd9 Mon Sep 17 00:00:00 2001 From: Andrey Chulkov Date: Thu, 26 Dec 2019 11:19:03 +0300 Subject: [PATCH 38/76] Make test deterministic --- dbms/tests/queries/0_stateless/01037_polygon_dict.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql index 20edde7ff60..e2d7fd31bbe 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql @@ -7,7 +7,7 @@ CREATE DATABASE test_01037 Engine = Ordinary; DROP DICTIONARY IF EXISTS test_01037.dict; DROP TABLE IF EXISTS test_01037.polygons; -CREATE TABLE test_01037.polygons (key Array(Array(Array(Array(Float64)))), name String, value UInt64) Engine = Memory; +CREATE TABLE test_01037.polygons (key Array(Array(Array(Array(Float64)))), name String, value UInt64) ORDER BY name ENGINE = Memory; INSERT INTO test_01037.polygons VALUES ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], 'Click', 42); INSERT INTO test_01037.polygons VALUES ([[[[5, 5], [5, -5], [-5, -5], [-5, 5]], [[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]]], 'House', 314159); @@ -40,4 +40,4 @@ select 'dictGetOrDefault', 'test_01037.dict' as dict_name, tuple(x, y) as key, DROP DICTIONARY test_01037.dict; DROP TABLE test_01037.polygons; DROP TABLE test_01037.points; -DROP DATABASE test_01037; \ No newline at end of file +DROP DATABASE test_01037; From 68c64f50dddb6ddff0632048f43f46db3debc919 Mon Sep 17 00:00:00 2001 From: Andrei Chulkov Date: Thu, 26 Dec 2019 13:19:29 +0300 Subject: [PATCH 39/76] Fix sql query --- dbms/tests/queries/0_stateless/01037_polygon_dict.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql index e2d7fd31bbe..77e6428a493 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql @@ -7,7 +7,7 @@ CREATE DATABASE test_01037 Engine = Ordinary; DROP DICTIONARY IF EXISTS test_01037.dict; DROP TABLE IF EXISTS test_01037.polygons; -CREATE TABLE test_01037.polygons (key Array(Array(Array(Array(Float64)))), name String, value UInt64) ORDER BY name ENGINE = Memory; +CREATE TABLE test_01037.polygons (key Array(Array(Array(Array(Float64)))), name String, value UInt64) ENGINE = MergeTree() ORDER BY name; INSERT INTO test_01037.polygons VALUES ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], 'Click', 42); INSERT INTO test_01037.polygons VALUES ([[[[5, 5], [5, -5], [-5, -5], [-5, 5]], [[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]]], 'House', 314159); From 1bfa779d81118016eb2d7aebfaeffd82d082aa7b Mon Sep 17 00:00:00 2001 From: Andrei Chulkov Date: Thu, 26 Dec 2019 13:41:46 +0300 Subject: [PATCH 40/76] Remove indetermined behavior from test, add dictHas to test --- .../0_stateless/01037_polygon_dict.reference | 18 +++++++++++++++--- .../queries/0_stateless/01037_polygon_dict.sql | 8 ++++++-- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.reference b/dbms/tests/queries/0_stateless/01037_polygon_dict.reference index d773dd5ad76..7525071f5a0 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.reference +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.reference @@ -2,14 +2,26 @@ dictGet test_01037.dict (0,0) Click 42 dictGet test_01037.dict (3,3) House 314159 dictGet test_01037.dict (5,6) Click 42 dictGet test_01037.dict (-100,-42) qqq 101 -dictGet test_01037.dict (5,5) Click 42 +dictGet test_01037.dict (7.01,7.01) qqq 101 +dictGet test_01037.dict (0.99,3) Click 42 dictGetOrDefault test_01037.dict (0,0) Click 42 dictGetOrDefault test_01037.dict (3,3) House 314159 dictGetOrDefault test_01037.dict (5,6) Click 42 dictGetOrDefault test_01037.dict (-100,-42) www 1234 -dictGetOrDefault test_01037.dict (5,5) Click 42 +dictGetOrDefault test_01037.dict (7.01,7.01) www 1234 +dictGetOrDefault test_01037.dict (0.99,3) Click 42 dictGetOrDefault test_01037.dict (0,0) Click 42 dictGetOrDefault test_01037.dict (3,3) House 314159 dictGetOrDefault test_01037.dict (5,6) Click 42 dictGetOrDefault test_01037.dict (-100,-42) dd 44 -dictGetOrDefault test_01037.dict (5,5) Click 42 +dictGetOrDefault test_01037.dict (7.01,7.01) ee 55 +dictGetOrDefault test_01037.dict (0.99,3) Click 42 +dictHas test_01037.dict (0,0) 1 +dictHas test_01037.dict (3,3) 1 +dictHas test_01037.dict (5,6) 1 +dictHas test_01037.dict (-100,-42) 0 +dictHas test_01037.dict (7.01,7.01) 0 +dictHas test_01037.dict (0.99,3) 1 +dictHas test_01037.dict (5,5) 1 +dictHas test_01037.dict (5,1) 1 +dictHas test_01037.dict (1,3) 1 diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql index 77e6428a493..a73859e9d4f 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql @@ -7,7 +7,7 @@ CREATE DATABASE test_01037 Engine = Ordinary; DROP DICTIONARY IF EXISTS test_01037.dict; DROP TABLE IF EXISTS test_01037.polygons; -CREATE TABLE test_01037.polygons (key Array(Array(Array(Array(Float64)))), name String, value UInt64) ENGINE = MergeTree() ORDER BY name; +CREATE TABLE test_01037.polygons (key Array(Array(Array(Array(Float64)))), name String, value UInt64) ENGINE = Memory; INSERT INTO test_01037.polygons VALUES ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], 'Click', 42); INSERT INTO test_01037.polygons VALUES ([[[[5, 5], [5, -5], [-5, -5], [-5, 5]], [[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]]], 'House', 314159); @@ -25,7 +25,7 @@ LAYOUT(POLYGON()); DROP TABLE IF EXISTS test_01037.points; CREATE TABLE test_01037.points (x Float64, y Float64, def_i UInt64, def_s String) ENGINE = Memory; -INSERT INTO test_01037.points VALUES (0.0, 0.0, 11, 'aa'), (3.0, 3.0, 22, 'bb'), (5.0, 6.0, 33, 'cc'), (-100.0, -42.0, 44, 'dd'), (5.0, 5.0, 55, 'ee'); +INSERT INTO test_01037.points VALUES (0.0, 0.0, 11, 'aa'), (3.0, 3.0, 22, 'bb'), (5.0, 6.0, 33, 'cc'), (-100.0, -42.0, 44, 'dd'), (7.01, 7.01, 55, 'ee'), (0.99, 3.0, 0, ''); select 'dictGet', 'test_01037.dict' as dict_name, tuple(x, y) as key, dictGet(dict_name, 'name', key), @@ -37,6 +37,10 @@ select 'dictGetOrDefault', 'test_01037.dict' as dict_name, tuple(x, y) as key, dictGetOrDefault(dict_name, 'name', key, def_s), dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points; +INSERT INTO test_01037.points VALUES (5.0, 5.0, 0, ''), (5.0, 1.0, 0, ''), (1.0, 3.0, 0, ''); +select 'dictHas', 'test_01037.dict' as dict_name, tuple(x, y) as key, + dictHas(dict_name, key) from test_01037.points; + DROP DICTIONARY test_01037.dict; DROP TABLE test_01037.polygons; DROP TABLE test_01037.points; From b3a19f588d07a4b9ef9e74d12c8923c5c3d35f72 Mon Sep 17 00:00:00 2001 From: Andrei Chulkov Date: Thu, 26 Dec 2019 15:27:43 +0300 Subject: [PATCH 41/76] Enforce a strict ordering of queries --- dbms/tests/queries/0_stateless/01037_polygon_dict.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql index a73859e9d4f..bf1c3e848c3 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql @@ -29,17 +29,17 @@ INSERT INTO test_01037.points VALUES (0.0, 0.0, 11, 'aa'), (3.0, 3.0, 22, 'bb'), select 'dictGet', 'test_01037.dict' as dict_name, tuple(x, y) as key, dictGet(dict_name, 'name', key), - dictGet(dict_name, 'value', key) from test_01037.points; + dictGet(dict_name, 'value', key) from test_01037.points order by x, y; select 'dictGetOrDefault', 'test_01037.dict' as dict_name, tuple(x, y) as key, dictGetOrDefault(dict_name, 'name', key, 'www'), - dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points; + dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points order by x, y; select 'dictGetOrDefault', 'test_01037.dict' as dict_name, tuple(x, y) as key, dictGetOrDefault(dict_name, 'name', key, def_s), - dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points; + dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points order by x, y; INSERT INTO test_01037.points VALUES (5.0, 5.0, 0, ''), (5.0, 1.0, 0, ''), (1.0, 3.0, 0, ''); select 'dictHas', 'test_01037.dict' as dict_name, tuple(x, y) as key, - dictHas(dict_name, key) from test_01037.points; + dictHas(dict_name, key) from test_01037.points order by x, y; DROP DICTIONARY test_01037.dict; DROP TABLE test_01037.polygons; From 45f42ce50b29dfd698873d5b3b517b9a074af5e6 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Thu, 26 Dec 2019 16:23:04 +0300 Subject: [PATCH 42/76] Try some improvements --- dbms/src/Dictionaries/PolygonDictionary.cpp | 24 +++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index f50065a1a04..064e8e428bc 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -231,10 +231,10 @@ void IPolygonDictionary::getString( const auto & null_value = getNullValue(dict_struct.attributes[ind].null_value); - getItemsImpl( + getItemsImpl( ind, key_columns, - [&](const size_t, const String & value) { out->insertData(value.data(), value.size()); }, + [&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); }, [&](const size_t) { return null_value; }); } @@ -341,16 +341,32 @@ void IPolygonDictionary::getItemsImpl( { const auto points = extractPoints(key_columns); + using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; + using ColType = std::conditional_t::value, ColumnString, ColVecType>; + const auto column = typeid_cast(attributes[attribute_ind].get()); + if (!column) + throw Exception{"An attribute should be a column of its type", ErrorCodes::LOGICAL_ERROR}; for (const auto i : ext::range(0, points.size())) { size_t id = 0; - auto found = find(points[i], id); - set_value(i, found ? static_cast((*attributes[attribute_ind])[id].get()) : get_default(i)); + const auto found = find(points[i], id); + if (!found) + { + const auto def = get_default(i); + set_value(i, static_cast(def)); + continue; + } + if constexpr (std::is_same::value) + set_value(i, static_cast(column->getDataAt(id))); + else + set_value(i, static_cast(column->getElement(id))); } query_count.fetch_add(points.size(), std::memory_order_relaxed); } + + IPolygonDictionary::Point IPolygonDictionary::fieldToPoint(const Field &field) { if (field.getType() == Field::Types::Array) From f6fd331156cb18de65a11b91f22ab077233629d7 Mon Sep 17 00:00:00 2001 From: Andrei Chulkov Date: Thu, 26 Dec 2019 16:31:28 +0300 Subject: [PATCH 43/76] Update test reference --- .../0_stateless/01037_polygon_dict.reference | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.reference b/dbms/tests/queries/0_stateless/01037_polygon_dict.reference index 7525071f5a0..e5241418fd1 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.reference +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.reference @@ -1,27 +1,27 @@ +dictGet test_01037.dict (-100,-42) qqq 101 dictGet test_01037.dict (0,0) Click 42 +dictGet test_01037.dict (0.99,3) Click 42 dictGet test_01037.dict (3,3) House 314159 dictGet test_01037.dict (5,6) Click 42 -dictGet test_01037.dict (-100,-42) qqq 101 dictGet test_01037.dict (7.01,7.01) qqq 101 -dictGet test_01037.dict (0.99,3) Click 42 -dictGetOrDefault test_01037.dict (0,0) Click 42 -dictGetOrDefault test_01037.dict (3,3) House 314159 -dictGetOrDefault test_01037.dict (5,6) Click 42 dictGetOrDefault test_01037.dict (-100,-42) www 1234 -dictGetOrDefault test_01037.dict (7.01,7.01) www 1234 -dictGetOrDefault test_01037.dict (0.99,3) Click 42 dictGetOrDefault test_01037.dict (0,0) Click 42 +dictGetOrDefault test_01037.dict (0.99,3) Click 42 dictGetOrDefault test_01037.dict (3,3) House 314159 dictGetOrDefault test_01037.dict (5,6) Click 42 +dictGetOrDefault test_01037.dict (7.01,7.01) www 1234 dictGetOrDefault test_01037.dict (-100,-42) dd 44 -dictGetOrDefault test_01037.dict (7.01,7.01) ee 55 +dictGetOrDefault test_01037.dict (0,0) Click 42 dictGetOrDefault test_01037.dict (0.99,3) Click 42 -dictHas test_01037.dict (0,0) 1 -dictHas test_01037.dict (3,3) 1 -dictHas test_01037.dict (5,6) 1 +dictGetOrDefault test_01037.dict (3,3) House 314159 +dictGetOrDefault test_01037.dict (5,6) Click 42 +dictGetOrDefault test_01037.dict (7.01,7.01) ee 55 dictHas test_01037.dict (-100,-42) 0 -dictHas test_01037.dict (7.01,7.01) 0 +dictHas test_01037.dict (0,0) 1 dictHas test_01037.dict (0.99,3) 1 -dictHas test_01037.dict (5,5) 1 -dictHas test_01037.dict (5,1) 1 dictHas test_01037.dict (1,3) 1 +dictHas test_01037.dict (3,3) 1 +dictHas test_01037.dict (5,1) 1 +dictHas test_01037.dict (5,5) 1 +dictHas test_01037.dict (5,6) 1 +dictHas test_01037.dict (7.01,7.01) 0 From 5a8d835faf0a739ac6f7579152931c26cff60a72 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Thu, 26 Dec 2019 16:23:04 +0300 Subject: [PATCH 44/76] Try some improvements --- dbms/src/Dictionaries/PolygonDictionary.cpp | 24 +++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index f50065a1a04..064e8e428bc 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -231,10 +231,10 @@ void IPolygonDictionary::getString( const auto & null_value = getNullValue(dict_struct.attributes[ind].null_value); - getItemsImpl( + getItemsImpl( ind, key_columns, - [&](const size_t, const String & value) { out->insertData(value.data(), value.size()); }, + [&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); }, [&](const size_t) { return null_value; }); } @@ -341,16 +341,32 @@ void IPolygonDictionary::getItemsImpl( { const auto points = extractPoints(key_columns); + using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; + using ColType = std::conditional_t::value, ColumnString, ColVecType>; + const auto column = typeid_cast(attributes[attribute_ind].get()); + if (!column) + throw Exception{"An attribute should be a column of its type", ErrorCodes::LOGICAL_ERROR}; for (const auto i : ext::range(0, points.size())) { size_t id = 0; - auto found = find(points[i], id); - set_value(i, found ? static_cast((*attributes[attribute_ind])[id].get()) : get_default(i)); + const auto found = find(points[i], id); + if (!found) + { + const auto def = get_default(i); + set_value(i, static_cast(def)); + continue; + } + if constexpr (std::is_same::value) + set_value(i, static_cast(column->getDataAt(id))); + else + set_value(i, static_cast(column->getElement(id))); } query_count.fetch_add(points.size(), std::memory_order_relaxed); } + + IPolygonDictionary::Point IPolygonDictionary::fieldToPoint(const Field &field) { if (field.getType() == Field::Types::Array) From 9e5c6af11d5b72a757cf684080aee94e8b998a84 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Thu, 26 Dec 2019 18:21:49 +0300 Subject: [PATCH 45/76] Trying more improvements around null_values --- dbms/src/Dictionaries/PolygonDictionary.cpp | 78 +++++++++++++++++---- dbms/src/Dictionaries/PolygonDictionary.h | 20 +++++- 2 files changed, 84 insertions(+), 14 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 064e8e428bc..758cfcef2b6 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -94,13 +94,74 @@ BlockInputStreamPtr IPolygonDictionary::getBlockInputStream(const Names &, size_ throw Exception{"Reading the dictionary is not allowed", ErrorCodes::UNSUPPORTED_METHOD}; } +template +void IPolygonDictionary::appendNullValueImpl(const Field & null_value) +{ + null_values.emplace_back(T(null_value.get>())); +} + +void IPolygonDictionary::appendNullValue(AttributeUnderlyingType type, const Field & null_value) +{ + switch (type) + { + case AttributeUnderlyingType::utUInt8: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utUInt16: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utUInt32: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utUInt64: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utUInt128: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utInt8: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utInt16: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utInt32: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utInt64: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utFloat32: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utFloat64: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utDecimal32: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utDecimal64: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utDecimal128: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utString: + appendNullValueImpl(null_value); + break; + } +} + void IPolygonDictionary::createAttributes() { attributes.resize(dict_struct.attributes.size()); for (size_t i = 0; i < dict_struct.attributes.size(); ++i) { - attribute_index_by_name.emplace(dict_struct.attributes[i].name, i); + const auto & attr = dict_struct.attributes[i]; + attribute_index_by_name.emplace(attr.name, i); - if (dict_struct.attributes[i].hierarchical) + appendNullValue(attr.underlying_type, attr.null_value); + + if (attr.hierarchical) throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), ErrorCodes::TYPE_MISMATCH}; } @@ -186,12 +247,6 @@ size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name) return it->second; } -template -T IPolygonDictionary::getNullValue(const DB::Field &field) -{ - return field.get>(); -} - #define DECLARE(TYPE) \ void IPolygonDictionary::get##TYPE( \ const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType & out) const \ @@ -199,7 +254,7 @@ T IPolygonDictionary::getNullValue(const DB::Field &field) const auto ind = getAttributeIndex(attribute_name); \ checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \ \ - const auto null_value = getNullValue(dict_struct.attributes[ind].null_value); \ + const auto null_value = std::get(null_values[ind]); \ \ getItemsImpl( \ ind, \ @@ -229,7 +284,7 @@ void IPolygonDictionary::getString( const auto ind = getAttributeIndex(attribute_name); checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString); - const auto & null_value = getNullValue(dict_struct.attributes[ind].null_value); + const auto & null_value = StringRef{std::get(null_values[ind])}; getItemsImpl( ind, @@ -352,8 +407,7 @@ void IPolygonDictionary::getItemsImpl( const auto found = find(points[i], id); if (!found) { - const auto def = get_default(i); - set_value(i, static_cast(def)); + set_value(i, static_cast(get_default(i))); continue; } if constexpr (std::is_same::value) diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h index 54678d86b90..c16b4e4d752 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.h +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -186,9 +186,9 @@ private: /** Checks whether a given attribute exists and returns its index */ size_t getAttributeIndex(const std::string & attribute_name) const; - /** Return the default type T value of field. */ template - static T getNullValue(const Field & field); + void appendNullValueImpl(const Field & null_value); + void appendNullValue(AttributeUnderlyingType type, const Field & value); /** Helper function for retrieving the value of an attribute by key. */ template @@ -196,6 +196,22 @@ private: std::map attribute_index_by_name; Columns attributes; + std::vector> null_values; size_t bytes_allocated = 0; size_t element_count = 0; From 1fb0daf2097b0677ae4bd53df4d03e0c7892b4db Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Thu, 26 Dec 2019 16:23:04 +0300 Subject: [PATCH 46/76] Try some improvements --- dbms/src/Dictionaries/PolygonDictionary.cpp | 24 +++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index f50065a1a04..064e8e428bc 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -231,10 +231,10 @@ void IPolygonDictionary::getString( const auto & null_value = getNullValue(dict_struct.attributes[ind].null_value); - getItemsImpl( + getItemsImpl( ind, key_columns, - [&](const size_t, const String & value) { out->insertData(value.data(), value.size()); }, + [&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); }, [&](const size_t) { return null_value; }); } @@ -341,16 +341,32 @@ void IPolygonDictionary::getItemsImpl( { const auto points = extractPoints(key_columns); + using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; + using ColType = std::conditional_t::value, ColumnString, ColVecType>; + const auto column = typeid_cast(attributes[attribute_ind].get()); + if (!column) + throw Exception{"An attribute should be a column of its type", ErrorCodes::LOGICAL_ERROR}; for (const auto i : ext::range(0, points.size())) { size_t id = 0; - auto found = find(points[i], id); - set_value(i, found ? static_cast((*attributes[attribute_ind])[id].get()) : get_default(i)); + const auto found = find(points[i], id); + if (!found) + { + const auto def = get_default(i); + set_value(i, static_cast(def)); + continue; + } + if constexpr (std::is_same::value) + set_value(i, static_cast(column->getDataAt(id))); + else + set_value(i, static_cast(column->getElement(id))); } query_count.fetch_add(points.size(), std::memory_order_relaxed); } + + IPolygonDictionary::Point IPolygonDictionary::fieldToPoint(const Field &field) { if (field.getType() == Field::Types::Array) From 67b7cafd456be1fba4ef2cea803533e936ca53ee Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Thu, 26 Dec 2019 18:21:49 +0300 Subject: [PATCH 47/76] Trying more improvements around null_values --- dbms/src/Dictionaries/PolygonDictionary.cpp | 78 +++++++++++++++++---- dbms/src/Dictionaries/PolygonDictionary.h | 20 +++++- 2 files changed, 84 insertions(+), 14 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 064e8e428bc..758cfcef2b6 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -94,13 +94,74 @@ BlockInputStreamPtr IPolygonDictionary::getBlockInputStream(const Names &, size_ throw Exception{"Reading the dictionary is not allowed", ErrorCodes::UNSUPPORTED_METHOD}; } +template +void IPolygonDictionary::appendNullValueImpl(const Field & null_value) +{ + null_values.emplace_back(T(null_value.get>())); +} + +void IPolygonDictionary::appendNullValue(AttributeUnderlyingType type, const Field & null_value) +{ + switch (type) + { + case AttributeUnderlyingType::utUInt8: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utUInt16: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utUInt32: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utUInt64: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utUInt128: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utInt8: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utInt16: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utInt32: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utInt64: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utFloat32: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utFloat64: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utDecimal32: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utDecimal64: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utDecimal128: + appendNullValueImpl(null_value); + break; + case AttributeUnderlyingType::utString: + appendNullValueImpl(null_value); + break; + } +} + void IPolygonDictionary::createAttributes() { attributes.resize(dict_struct.attributes.size()); for (size_t i = 0; i < dict_struct.attributes.size(); ++i) { - attribute_index_by_name.emplace(dict_struct.attributes[i].name, i); + const auto & attr = dict_struct.attributes[i]; + attribute_index_by_name.emplace(attr.name, i); - if (dict_struct.attributes[i].hierarchical) + appendNullValue(attr.underlying_type, attr.null_value); + + if (attr.hierarchical) throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), ErrorCodes::TYPE_MISMATCH}; } @@ -186,12 +247,6 @@ size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name) return it->second; } -template -T IPolygonDictionary::getNullValue(const DB::Field &field) -{ - return field.get>(); -} - #define DECLARE(TYPE) \ void IPolygonDictionary::get##TYPE( \ const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType & out) const \ @@ -199,7 +254,7 @@ T IPolygonDictionary::getNullValue(const DB::Field &field) const auto ind = getAttributeIndex(attribute_name); \ checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \ \ - const auto null_value = getNullValue(dict_struct.attributes[ind].null_value); \ + const auto null_value = std::get(null_values[ind]); \ \ getItemsImpl( \ ind, \ @@ -229,7 +284,7 @@ void IPolygonDictionary::getString( const auto ind = getAttributeIndex(attribute_name); checkAttributeType(name, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString); - const auto & null_value = getNullValue(dict_struct.attributes[ind].null_value); + const auto & null_value = StringRef{std::get(null_values[ind])}; getItemsImpl( ind, @@ -352,8 +407,7 @@ void IPolygonDictionary::getItemsImpl( const auto found = find(points[i], id); if (!found) { - const auto def = get_default(i); - set_value(i, static_cast(def)); + set_value(i, static_cast(get_default(i))); continue; } if constexpr (std::is_same::value) diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h index 54678d86b90..c16b4e4d752 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.h +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -186,9 +186,9 @@ private: /** Checks whether a given attribute exists and returns its index */ size_t getAttributeIndex(const std::string & attribute_name) const; - /** Return the default type T value of field. */ template - static T getNullValue(const Field & field); + void appendNullValueImpl(const Field & null_value); + void appendNullValue(AttributeUnderlyingType type, const Field & value); /** Helper function for retrieving the value of an attribute by key. */ template @@ -196,6 +196,22 @@ private: std::map attribute_index_by_name; Columns attributes; + std::vector> null_values; size_t bytes_allocated = 0; size_t element_count = 0; From ce260e721254ac889dc63e5b3f845d03d0ef6896 Mon Sep 17 00:00:00 2001 From: Andrey Chulkov Date: Fri, 27 Dec 2019 13:57:32 +0300 Subject: [PATCH 48/76] Fix style errors --- dbms/src/Dictionaries/PolygonDictionary.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index f50065a1a04..f8fb36f543f 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -89,12 +89,14 @@ bool IPolygonDictionary::isInjective(const std::string &) const return false; } -BlockInputStreamPtr IPolygonDictionary::getBlockInputStream(const Names &, size_t) const { +BlockInputStreamPtr IPolygonDictionary::getBlockInputStream(const Names &, size_t) const +{ // TODO: Better error message. throw Exception{"Reading the dictionary is not allowed", ErrorCodes::UNSUPPORTED_METHOD}; } -void IPolygonDictionary::createAttributes() { +void IPolygonDictionary::createAttributes() +{ attributes.resize(dict_struct.attributes.size()); for (size_t i = 0; i < dict_struct.attributes.size(); ++i) { @@ -110,7 +112,8 @@ void IPolygonDictionary::blockToAttributes(const DB::Block &block) { const auto rows = block.rows(); element_count += rows; - for (size_t i = 0; i < attributes.size(); ++i) { + for (size_t i = 0; i < attributes.size(); ++i) + { const auto & column = block.safeGetByPosition(i + 1); if (attributes[i]) { @@ -165,7 +168,8 @@ std::vector IPolygonDictionary::extractPoints(const C return result; } -void IPolygonDictionary::has(const Columns &key_columns, const DataTypes &, PaddedPODArray &out) const { +void IPolygonDictionary::has(const Columns &key_columns, const DataTypes &, PaddedPODArray &out) const +{ size_t row = 0; for (const auto & pt : extractPoints(key_columns)) { @@ -384,7 +388,8 @@ IPolygonDictionary::Polygon IPolygonDictionary::fieldToPolygon(const Field & fie throw Exception{"Outer polygon ring is not represented by an array", ErrorCodes::TYPE_MISMATCH}; for (const auto & point : ring_array[0].get()) bg::append(result.outer(), fieldToPoint(point)); - for (size_t i = 0; i < result.inners().size(); ++i) { + for (size_t i = 0; i < result.inners().size(); ++i) + { if (ring_array[i + 1].getType() != Field::Types::Array) throw Exception{"Inner polygon ring is not represented by an array", ErrorCodes::TYPE_MISMATCH}; for (const auto & point : ring_array[i + 1].get()) @@ -435,7 +440,8 @@ bool SimplePolygonDictionary::find(const Point &point, size_t & id) const { for (size_t i = 0; i < (this->polygons).size(); ++i) { - if (bg::covered_by(point, (this->polygons)[i])) { + if (bg::covered_by(point, (this->polygons)[i])) + { id = i; return true; } From 69b1bcdf2d71d17ec721ef80d168f91cd550a6d5 Mon Sep 17 00:00:00 2001 From: Andrei Chulkov Date: Fri, 27 Dec 2019 15:23:42 +0300 Subject: [PATCH 49/76] One more stupid bug --- .../queries/0_stateless/01037_polygon_dict.reference | 8 ++++---- dbms/tests/queries/0_stateless/01037_polygon_dict.sql | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.reference b/dbms/tests/queries/0_stateless/01037_polygon_dict.reference index e5241418fd1..66171de2526 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.reference +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.reference @@ -1,24 +1,24 @@ dictGet test_01037.dict (-100,-42) qqq 101 dictGet test_01037.dict (0,0) Click 42 -dictGet test_01037.dict (0.99,3) Click 42 +dictGet test_01037.dict (0.99,2.99) Click 42 dictGet test_01037.dict (3,3) House 314159 dictGet test_01037.dict (5,6) Click 42 dictGet test_01037.dict (7.01,7.01) qqq 101 dictGetOrDefault test_01037.dict (-100,-42) www 1234 dictGetOrDefault test_01037.dict (0,0) Click 42 -dictGetOrDefault test_01037.dict (0.99,3) Click 42 +dictGetOrDefault test_01037.dict (0.99,2.99) Click 42 dictGetOrDefault test_01037.dict (3,3) House 314159 dictGetOrDefault test_01037.dict (5,6) Click 42 dictGetOrDefault test_01037.dict (7.01,7.01) www 1234 dictGetOrDefault test_01037.dict (-100,-42) dd 44 dictGetOrDefault test_01037.dict (0,0) Click 42 -dictGetOrDefault test_01037.dict (0.99,3) Click 42 +dictGetOrDefault test_01037.dict (0.99,2.99) Click 42 dictGetOrDefault test_01037.dict (3,3) House 314159 dictGetOrDefault test_01037.dict (5,6) Click 42 dictGetOrDefault test_01037.dict (7.01,7.01) ee 55 dictHas test_01037.dict (-100,-42) 0 dictHas test_01037.dict (0,0) 1 -dictHas test_01037.dict (0.99,3) 1 +dictHas test_01037.dict (0.99,2.99) 1 dictHas test_01037.dict (1,3) 1 dictHas test_01037.dict (3,3) 1 dictHas test_01037.dict (5,1) 1 diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql index bf1c3e848c3..c99f76100f5 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql @@ -25,7 +25,7 @@ LAYOUT(POLYGON()); DROP TABLE IF EXISTS test_01037.points; CREATE TABLE test_01037.points (x Float64, y Float64, def_i UInt64, def_s String) ENGINE = Memory; -INSERT INTO test_01037.points VALUES (0.0, 0.0, 11, 'aa'), (3.0, 3.0, 22, 'bb'), (5.0, 6.0, 33, 'cc'), (-100.0, -42.0, 44, 'dd'), (7.01, 7.01, 55, 'ee'), (0.99, 3.0, 0, ''); +INSERT INTO test_01037.points VALUES (0.0, 0.0, 11, 'aa'), (3.0, 3.0, 22, 'bb'), (5.0, 6.0, 33, 'cc'), (-100.0, -42.0, 44, 'dd'), (7.01, 7.01, 55, 'ee'), (0.99, 2.99, 66, 'ee'); select 'dictGet', 'test_01037.dict' as dict_name, tuple(x, y) as key, dictGet(dict_name, 'name', key), From 288005162ea70e07c08642f939768f66be40395c Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Fri, 27 Dec 2019 16:06:03 +0300 Subject: [PATCH 50/76] Add some comments --- dbms/src/Dictionaries/PolygonDictionary.cpp | 6 ++++-- dbms/src/Dictionaries/PolygonDictionary.h | 6 ++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 758cfcef2b6..f53ba578245 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -90,7 +90,8 @@ bool IPolygonDictionary::isInjective(const std::string &) const } BlockInputStreamPtr IPolygonDictionary::getBlockInputStream(const Names &, size_t) const { - // TODO: Better error message. + // TODO: In order for this to work one would first have to support retrieving arrays from dictionaries. + // I believe this is a separate task done by some other people. throw Exception{"Reading the dictionary is not allowed", ErrorCodes::UNSUPPORTED_METHOD}; } @@ -152,7 +153,8 @@ void IPolygonDictionary::appendNullValue(AttributeUnderlyingType type, const Fie } } -void IPolygonDictionary::createAttributes() { +void IPolygonDictionary::createAttributes() +{ attributes.resize(dict_struct.attributes.size()); for (size_t i = 0; i < dict_struct.attributes.size(); ++i) { diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h index c16b4e4d752..7c7ab9a57b2 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.h +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -186,6 +186,9 @@ private: /** Checks whether a given attribute exists and returns its index */ size_t getAttributeIndex(const std::string & attribute_name) const; + /** Helper functions to retrieve and instantiate the provided null value of an attribute. + * Since a null value is obligatory for every attribute they are simply appended to null_values defined below. + */ template void appendNullValueImpl(const Field & null_value); void appendNullValue(AttributeUnderlyingType type, const Field & value); @@ -194,8 +197,11 @@ private: template void getItemsImpl(size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const; + /** A mapping from the names of the attributes to their index in the two vectors defined below. */ std::map attribute_index_by_name; + /** A vector of columns storing the values of each attribute. */ Columns attributes; + /** A vector of null values corresponding to each attribute. */ std::vector Date: Fri, 27 Dec 2019 17:22:51 +0300 Subject: [PATCH 51/76] Fix style error --- dbms/src/Dictionaries/PolygonDictionary.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 459f88ebf4e..be070d26345 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -151,7 +151,7 @@ void IPolygonDictionary::appendNullValue(AttributeUnderlyingType type, const Fie case AttributeUnderlyingType::utString: appendNullValueImpl(null_value); break; - } + } } void IPolygonDictionary::createAttributes() From bc5edee8cf40f6da65af19c0d099a1ecf98a48a1 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Tue, 14 Jan 2020 17:40:34 +0300 Subject: [PATCH 52/76] Support new methods added in recent pull requests --- dbms/src/Dictionaries/PolygonDictionary.cpp | 65 +++++++++++++++++++-- dbms/src/Dictionaries/PolygonDictionary.h | 9 ++- 2 files changed, 69 insertions(+), 5 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index be070d26345..bd03147f7d6 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -1,4 +1,5 @@ #include +#include #include "PolygonDictionary.h" #include "DictionaryBlockInputStream.h" #include "DictionaryFactory.h" @@ -16,11 +17,14 @@ namespace ErrorCodes IPolygonDictionary::IPolygonDictionary( + const std::string & database_, const std::string & name_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, const DictionaryLifetime dict_lifetime_) - : name(name_) + : database(database_) + , name(name_) + , full_name{database_.empty() ? name_ : (database_ + "." + name_)} , dict_struct(dict_struct_) , source_ptr(std::move(source_ptr_)) , dict_lifetime(dict_lifetime_) @@ -29,11 +33,21 @@ IPolygonDictionary::IPolygonDictionary( loadData(); } -std::string IPolygonDictionary::getName() const +const std::string & IPolygonDictionary::getDatabase() const +{ + return database; +} + +const std::string & IPolygonDictionary::getName() const { return name; } +const std::string & IPolygonDictionary::getFullName() const +{ + return full_name; +} + std::string IPolygonDictionary::getTypeName() const { return "Polygon"; @@ -424,7 +438,47 @@ void IPolygonDictionary::getItemsImpl( query_count.fetch_add(points.size(), std::memory_order_relaxed); } +void IPolygonDictionary::extractMultiPolygons(const ColumnPtr &column, std::vector &dest) { + const auto ptr_multi_polygons = typeid_cast(column.get()); + if (!ptr_multi_polygons) + throw Exception{"Expected a column containing arrays of polygons", ErrorCodes::TYPE_MISMATCH}; + const auto ptr_polygons = typeid_cast(&ptr_multi_polygons->getData()); + if (!ptr_polygons) + throw Exception{"Expected a column containing arrays of rings when reading polygons", ErrorCodes::TYPE_MISMATCH}; + const auto polygons = std::move(ptr_multi_polygons->getOffsets()); + + const auto ptr_rings = typeid_cast(&ptr_polygons->getData()); + if (!ptr_rings) + throw Exceptions{"Expected a column containing arrays of points when reading rings", ErrorCodes::TYPE_MISMATCH}; + const auto rings = std::move(ptr_polygons->getOffsets()); + + const auto ptr_points = typeid_cast(&ptr_rings->getData()); + if (!ptr_points) + throw Exception{"Expected a column containing arrays of Float64s when reading points", ErrorCodes::TYPE_MISMATCH}; + const auto points = std::move(ptr_rings->getOffsets()); + + const auto ptr_coord = typeid_cast*>(&ptr_points->getData()); + if (!ptr_coord) + throw Exception{"Expected a column containing Float64s when reading coordinates", ErrorCodes::TYPE_MISMATCH}; + const auto coordinates = std::move(ptr_points->getOffsets()); + + IColumn::Offset point_offset = 0, ring_offset = 0, polygon_offset = 0; + dest.emplace_back(); + for (size_t i = 0; i < coordinates.size(); ++i) + { + if (polygons[polygon_offset] == 0) + { + dest.emplace_back(); + ++polygon_offset; + } + + if (coordinates[i] - (i == 0 ? 0 : coordinates[i - 1]) != DIM) + throw Exception{"All points should be " + std::to_string(DIM) + "-dimensional", ErrorCodes::LOGICAL_ERROR}; + Point pt(ptr_coord->getElement(2 * i), ptr_coord->getElement(2 * i + 1)); + + } +} IPolygonDictionary::Point IPolygonDictionary::fieldToPoint(const Field &field) { @@ -522,7 +576,7 @@ bool SimplePolygonDictionary::find(const Point &point, size_t & id) const void registerDictionaryPolygon(DictionaryFactory & factory) { - auto create_layout = [=](const std::string & name, + auto create_layout = [=](const std::string &, const DictionaryStructure & dict_struct, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, @@ -534,8 +588,11 @@ void registerDictionaryPolygon(DictionaryFactory & factory) + ": elements .structure.range_min and .structure.range_max should be defined only " "for a dictionary of layout 'range_hashed'", ErrorCodes::BAD_ARGUMENTS}; + + const String database = config.getString(config_prefix + ".database", ""); + const String name = config.getString(config_prefix + ".name"); const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; - return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime); + return std::make_unique(database, name, dict_struct, std::move(source_ptr), dict_lifetime); }; factory.registerLayout("polygon", create_layout, true); } diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h index 7c7ab9a57b2..a2c017ff9f3 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.h +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -27,12 +27,15 @@ class IPolygonDictionary : public IDictionaryBase { public: IPolygonDictionary( + const std::string & database_, const std::string & name_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, DictionaryLifetime dict_lifetime_); - std::string getName() const override; + const std::string & getDatabase() const override; + const std::string & getName() const override; + const std::string & getFullName() const override; std::string getTypeName() const override; @@ -167,7 +170,9 @@ protected: std::vector polygons; + const std::string database; const std::string name; + const std::string full_name; const DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; const DictionaryLifetime dict_lifetime; @@ -222,6 +227,8 @@ private: size_t bytes_allocated = 0; size_t element_count = 0; mutable std::atomic query_count{0}; + + static void extractMultiPolygons(const ColumnPtr & column, std::vector & dest); /** Extracts a list of points from two columns representing their x and y coordinates. */ static std::vector extractPoints(const Columns &key_columns); From c12727bfb591fe7cda7aa4510a6b3cc77213bbca Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Tue, 14 Jan 2020 17:42:16 +0300 Subject: [PATCH 53/76] Support new methods added in recent pull requests 2.0 --- dbms/src/Dictionaries/PolygonDictionary.cpp | 3 ++- dbms/src/Dictionaries/PolygonDictionary.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index bd03147f7d6..d76584dc737 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -544,11 +544,12 @@ IPolygonDictionary::MultiPolygon IPolygonDictionary::fieldToMultiPolygon(const F } SimplePolygonDictionary::SimplePolygonDictionary( + const std::string & database_, const std::string & name_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, const DictionaryLifetime dict_lifetime_) - : IPolygonDictionary(name_, dict_struct_, std::move(source_ptr_), dict_lifetime_) + : IPolygonDictionary(database_, name_, dict_struct_, std::move(source_ptr_), dict_lifetime_) { } diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h index a2c017ff9f3..87841441ae4 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.h +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -256,6 +256,7 @@ class SimplePolygonDictionary : public IPolygonDictionary { public: SimplePolygonDictionary( + const std::string & database_, const std::string & name_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, From 9a213f79ed59669acd6f497df0916f2dd62689ff Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Tue, 14 Jan 2020 17:59:21 +0300 Subject: [PATCH 54/76] Support new methods added in recent pull requests 3.0 --- dbms/src/Dictionaries/PolygonDictionary.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index d76584dc737..35baca54d27 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -446,22 +446,22 @@ void IPolygonDictionary::extractMultiPolygons(const ColumnPtr &column, std::vect const auto ptr_polygons = typeid_cast(&ptr_multi_polygons->getData()); if (!ptr_polygons) throw Exception{"Expected a column containing arrays of rings when reading polygons", ErrorCodes::TYPE_MISMATCH}; - const auto polygons = std::move(ptr_multi_polygons->getOffsets()); + const auto & polygons = ptr_multi_polygons->getOffsets(); const auto ptr_rings = typeid_cast(&ptr_polygons->getData()); if (!ptr_rings) - throw Exceptions{"Expected a column containing arrays of points when reading rings", ErrorCodes::TYPE_MISMATCH}; - const auto rings = std::move(ptr_polygons->getOffsets()); + throw Exception{"Expected a column containing arrays of points when reading rings", ErrorCodes::TYPE_MISMATCH}; + const auto & rings = ptr_polygons->getOffsets(); const auto ptr_points = typeid_cast(&ptr_rings->getData()); if (!ptr_points) throw Exception{"Expected a column containing arrays of Float64s when reading points", ErrorCodes::TYPE_MISMATCH}; - const auto points = std::move(ptr_rings->getOffsets()); + const auto & points = ptr_rings->getOffsets(); const auto ptr_coord = typeid_cast*>(&ptr_points->getData()); if (!ptr_coord) throw Exception{"Expected a column containing Float64s when reading coordinates", ErrorCodes::TYPE_MISMATCH}; - const auto coordinates = std::move(ptr_points->getOffsets()); + const auto & coordinates = ptr_points->getOffsets(); IColumn::Offset point_offset = 0, ring_offset = 0, polygon_offset = 0; dest.emplace_back(); @@ -476,7 +476,6 @@ void IPolygonDictionary::extractMultiPolygons(const ColumnPtr &column, std::vect if (coordinates[i] - (i == 0 ? 0 : coordinates[i - 1]) != DIM) throw Exception{"All points should be " + std::to_string(DIM) + "-dimensional", ErrorCodes::LOGICAL_ERROR}; Point pt(ptr_coord->getElement(2 * i), ptr_coord->getElement(2 * i + 1)); - } } @@ -556,6 +555,7 @@ SimplePolygonDictionary::SimplePolygonDictionary( std::shared_ptr SimplePolygonDictionary::clone() const { return std::make_shared( + this->database, this->name, this->dict_struct, this->source_ptr->clone(), From c3ddbf8ab93c4c629df4887c9783900b1bc2187a Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Tue, 14 Jan 2020 21:32:47 +0300 Subject: [PATCH 55/76] Implementing faster parsing of polygons 1.0 --- dbms/src/Dictionaries/PolygonDictionary.cpp | 66 +++++++++++++++------ 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 35baca54d27..7901807ceab 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -11,7 +11,6 @@ namespace ErrorCodes { extern const int TYPE_MISMATCH; extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; extern const int UNSUPPORTED_METHOD; } @@ -419,7 +418,7 @@ void IPolygonDictionary::getItemsImpl( using ColType = std::conditional_t::value, ColumnString, ColVecType>; const auto column = typeid_cast(attributes[attribute_ind].get()); if (!column) - throw Exception{"An attribute should be a column of its type", ErrorCodes::LOGICAL_ERROR}; + throw Exception{"An attribute should be a column of its type", ErrorCodes::BAD_ARGUMENTS}; for (const auto i : ext::range(0, points.size())) { size_t id = 0; @@ -438,6 +437,17 @@ void IPolygonDictionary::getItemsImpl( query_count.fetch_add(points.size(), std::memory_order_relaxed); } +namespace +{ + +inline void makeDifferences(IColumn::Offsets & values) +{ + for (size_t i = 1; i < values.size(); ++i) + values[i] -= values[i - 1]; +} + +} + void IPolygonDictionary::extractMultiPolygons(const ColumnPtr &column, std::vector &dest) { const auto ptr_multi_polygons = typeid_cast(column.get()); if (!ptr_multi_polygons) @@ -446,36 +456,57 @@ void IPolygonDictionary::extractMultiPolygons(const ColumnPtr &column, std::vect const auto ptr_polygons = typeid_cast(&ptr_multi_polygons->getData()); if (!ptr_polygons) throw Exception{"Expected a column containing arrays of rings when reading polygons", ErrorCodes::TYPE_MISMATCH}; - const auto & polygons = ptr_multi_polygons->getOffsets(); + IColumn::Offsets & polygons = ptr_multi_polygons->getOffsets(); const auto ptr_rings = typeid_cast(&ptr_polygons->getData()); if (!ptr_rings) throw Exception{"Expected a column containing arrays of points when reading rings", ErrorCodes::TYPE_MISMATCH}; - const auto & rings = ptr_polygons->getOffsets(); + IColumn::Offsets & rings = ptr_polygons->getOffsets(); const auto ptr_points = typeid_cast(&ptr_rings->getData()); if (!ptr_points) throw Exception{"Expected a column containing arrays of Float64s when reading points", ErrorCodes::TYPE_MISMATCH}; - const auto & points = ptr_rings->getOffsets(); + IColumn::Offsets & points = ptr_rings->getOffsets(); const auto ptr_coord = typeid_cast*>(&ptr_points->getData()); if (!ptr_coord) throw Exception{"Expected a column containing Float64s when reading coordinates", ErrorCodes::TYPE_MISMATCH}; const auto & coordinates = ptr_points->getOffsets(); - + makeDifferences(polygons), makeDifferences(rings), makeDifferences(points); IColumn::Offset point_offset = 0, ring_offset = 0, polygon_offset = 0; dest.emplace_back(); for (size_t i = 0; i < coordinates.size(); ++i) { - if (polygons[polygon_offset] == 0) - { - dest.emplace_back(); - ++polygon_offset; - } if (coordinates[i] - (i == 0 ? 0 : coordinates[i - 1]) != DIM) - throw Exception{"All points should be " + std::to_string(DIM) + "-dimensional", ErrorCodes::LOGICAL_ERROR}; - Point pt(ptr_coord->getElement(2 * i), ptr_coord->getElement(2 * i + 1)); + throw Exception{"All points should be " + std::to_string(DIM) + "-dimensional", ErrorCodes::BAD_ARGUMENTS}; + if (points[point_offset] == 0) + { + ++point_offset; + if (!dest.back().back().outer().empty()) + dest.back().back().inners().emplace_back(); + if (rings[ring_offset] == 0) + { + ++ring_offset; + dest.back().emplace_back(); + if (polygons[polygon_offset] == 0) + { + dest.emplace_back(); + ++polygon_offset; + } + if (polygon_offset == polygons.size()) + throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS}; + --polygons[polygon_offset]; + } + if (ring_offset == rings.size()) + throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS}; + --rings[ring_offset]; + } + if (point_offset == points.size()) + throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS}; + --points[point_offset]; + auto & ring = (dest.back().back().inners().empty() ? dest.back().back().outer() : dest.back().back().inners().back()); + ring.emplace_back(ptr_coord->getElement(2 * i), ptr_coord->getElement(2 * i + 1)); } } @@ -485,7 +516,7 @@ IPolygonDictionary::Point IPolygonDictionary::fieldToPoint(const Field &field) { auto coordinate_array = field.get(); if (coordinate_array.size() != DIM) - throw Exception{"All points should be " + std::to_string(DIM) + "-dimensional", ErrorCodes::LOGICAL_ERROR}; + throw Exception{"All points should be " + std::to_string(DIM) + "-dimensional", ErrorCodes::BAD_ARGUMENTS}; Float64 values[DIM]; for (size_t i = 0; i < DIM; ++i) { @@ -506,7 +537,7 @@ IPolygonDictionary::Polygon IPolygonDictionary::fieldToPolygon(const Field & fie { const auto & ring_array = field.get(); if (ring_array.empty()) - throw Exception{"Empty polygons are not allowed", ErrorCodes::LOGICAL_ERROR}; + throw Exception{"Empty polygons are not allowed", ErrorCodes::BAD_ARGUMENTS}; result.inners().resize(ring_array.size() - 1); if (ring_array[0].getType() != Field::Types::Array) throw Exception{"Outer polygon ring is not represented by an array", ErrorCodes::TYPE_MISMATCH}; @@ -583,6 +614,9 @@ void registerDictionaryPolygon(DictionaryFactory & factory) const std::string & config_prefix, DictionarySourcePtr source_ptr) -> DictionaryPtr { + const String database = config.getString(config_prefix + ".database", ""); + const String name = config.getString(config_prefix + ".name"); + // TODO: Check that there is only one key and it is of the correct type. if (dict_struct.range_min || dict_struct.range_max) throw Exception{name @@ -590,8 +624,6 @@ void registerDictionaryPolygon(DictionaryFactory & factory) "for a dictionary of layout 'range_hashed'", ErrorCodes::BAD_ARGUMENTS}; - const String database = config.getString(config_prefix + ".database", ""); - const String name = config.getString(config_prefix + ".name"); const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; return std::make_unique(database, name, dict_struct, std::move(source_ptr), dict_lifetime); }; From 4da1fffc06b4a497c4e013e5d67d19c62c097907 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Tue, 14 Jan 2020 21:36:25 +0300 Subject: [PATCH 56/76] Implementing faster parsing of polygons 2.0 --- dbms/src/Dictionaries/PolygonDictionary.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 7901807ceab..9b1d05491c1 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -449,21 +449,21 @@ inline void makeDifferences(IColumn::Offsets & values) } void IPolygonDictionary::extractMultiPolygons(const ColumnPtr &column, std::vector &dest) { - const auto ptr_multi_polygons = typeid_cast(column.get()); + auto ptr_multi_polygons = typeid_cast(column.get()); if (!ptr_multi_polygons) throw Exception{"Expected a column containing arrays of polygons", ErrorCodes::TYPE_MISMATCH}; - const auto ptr_polygons = typeid_cast(&ptr_multi_polygons->getData()); + auto ptr_polygons = typeid_cast(&ptr_multi_polygons->getData()); if (!ptr_polygons) throw Exception{"Expected a column containing arrays of rings when reading polygons", ErrorCodes::TYPE_MISMATCH}; IColumn::Offsets & polygons = ptr_multi_polygons->getOffsets(); - const auto ptr_rings = typeid_cast(&ptr_polygons->getData()); + auto ptr_rings = typeid_cast(&ptr_polygons->getData()); if (!ptr_rings) throw Exception{"Expected a column containing arrays of points when reading rings", ErrorCodes::TYPE_MISMATCH}; IColumn::Offsets & rings = ptr_polygons->getOffsets(); - const auto ptr_points = typeid_cast(&ptr_rings->getData()); + auto ptr_points = typeid_cast(&ptr_rings->getData()); if (!ptr_points) throw Exception{"Expected a column containing arrays of Float64s when reading points", ErrorCodes::TYPE_MISMATCH}; IColumn::Offsets & points = ptr_rings->getOffsets(); From 32d4798872376d36b9bf8cb3028b725b380afc6d Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Tue, 14 Jan 2020 21:46:37 +0300 Subject: [PATCH 57/76] Implementing faster parsing of polygons 3.0 --- dbms/src/Dictionaries/PolygonDictionary.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 9b1d05491c1..c286e58b37b 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -449,24 +449,25 @@ inline void makeDifferences(IColumn::Offsets & values) } void IPolygonDictionary::extractMultiPolygons(const ColumnPtr &column, std::vector &dest) { - auto ptr_multi_polygons = typeid_cast(column.get()); + IColumn::Offsets polygons, rings, points; + const auto ptr_multi_polygons = typeid_cast(column.get()); if (!ptr_multi_polygons) throw Exception{"Expected a column containing arrays of polygons", ErrorCodes::TYPE_MISMATCH}; - auto ptr_polygons = typeid_cast(&ptr_multi_polygons->getData()); + const auto ptr_polygons = typeid_cast(&ptr_multi_polygons->getData()); if (!ptr_polygons) throw Exception{"Expected a column containing arrays of rings when reading polygons", ErrorCodes::TYPE_MISMATCH}; - IColumn::Offsets & polygons = ptr_multi_polygons->getOffsets(); + polygons.assign(ptr_multi_polygons->getOffsets()); - auto ptr_rings = typeid_cast(&ptr_polygons->getData()); + const auto ptr_rings = typeid_cast(&ptr_polygons->getData()); if (!ptr_rings) throw Exception{"Expected a column containing arrays of points when reading rings", ErrorCodes::TYPE_MISMATCH}; - IColumn::Offsets & rings = ptr_polygons->getOffsets(); + rings.assign(ptr_polygons->getOffsets()); - auto ptr_points = typeid_cast(&ptr_rings->getData()); + const auto ptr_points = typeid_cast(&ptr_rings->getData()); if (!ptr_points) throw Exception{"Expected a column containing arrays of Float64s when reading points", ErrorCodes::TYPE_MISMATCH}; - IColumn::Offsets & points = ptr_rings->getOffsets(); + points.assign(tr_rings->getOffsets()); const auto ptr_coord = typeid_cast*>(&ptr_points->getData()); if (!ptr_coord) From e852d818e47e7a64aec9b944eccfb1814a7ccce6 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Tue, 14 Jan 2020 21:48:31 +0300 Subject: [PATCH 58/76] Implementing faster parsing of polygons 4.0 --- dbms/src/Dictionaries/PolygonDictionary.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index c286e58b37b..22820b118dc 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -467,7 +467,7 @@ void IPolygonDictionary::extractMultiPolygons(const ColumnPtr &column, std::vect const auto ptr_points = typeid_cast(&ptr_rings->getData()); if (!ptr_points) throw Exception{"Expected a column containing arrays of Float64s when reading points", ErrorCodes::TYPE_MISMATCH}; - points.assign(tr_rings->getOffsets()); + points.assign(ptr_rings->getOffsets()); const auto ptr_coord = typeid_cast*>(&ptr_points->getData()); if (!ptr_coord) From f2a942200a4358cb2d97fef104ea6f9f7d0545b1 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Tue, 14 Jan 2020 22:08:17 +0300 Subject: [PATCH 59/76] Implementing faster parsing of polygons 5.0 --- dbms/src/Dictionaries/PolygonDictionary.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 22820b118dc..45ff4170532 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -203,12 +203,14 @@ void IPolygonDictionary::blockToAttributes(const DB::Block &block) const auto & key = block.safeGetByPosition(0).column; - for (const auto row : ext::range(0, rows)) + /*for (const auto row : ext::range(0, rows)) { const auto & field = (*key)[row]; // TODO: Get data more efficiently using polygons.push_back(fieldToMultiPolygon(field)); - } + }*/ + + extractMultiPolygons(key, polygons); } void IPolygonDictionary::loadData() From 510ddf21abfb838534e50c4c05e82f4be608b062 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Tue, 14 Jan 2020 23:26:56 +0300 Subject: [PATCH 60/76] Implementing faster parsing of polygons 6.0 --- dbms/src/Dictionaries/PolygonDictionary.cpp | 24 +++++++++++++-------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 45ff4170532..0f2c06890a6 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -478,6 +478,7 @@ void IPolygonDictionary::extractMultiPolygons(const ColumnPtr &column, std::vect makeDifferences(polygons), makeDifferences(rings), makeDifferences(points); IColumn::Offset point_offset = 0, ring_offset = 0, polygon_offset = 0; dest.emplace_back(); + dest.back().emplace_back(); for (size_t i = 0; i < coordinates.size(); ++i) { @@ -486,24 +487,26 @@ void IPolygonDictionary::extractMultiPolygons(const ColumnPtr &column, std::vect if (points[point_offset] == 0) { ++point_offset; - if (!dest.back().back().outer().empty()) - dest.back().back().inners().emplace_back(); + --rings[ring_offset]; if (rings[ring_offset] == 0) { ++ring_offset; - dest.back().emplace_back(); + if (ring_offset == rings.size()) + throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS}; + --polygons[polygon_offset]; if (polygons[polygon_offset] == 0) { dest.emplace_back(); ++polygon_offset; + if (polygon_offset == polygons.size()) + throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS}; } - if (polygon_offset == polygons.size()) - throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS}; - --polygons[polygon_offset]; + else + dest.back().emplace_back(); } - if (ring_offset == rings.size()) - throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS}; - --rings[ring_offset]; + else + if (!dest.back().back().outer().empty()) + dest.back().back().inners().emplace_back(); } if (point_offset == points.size()) throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS}; @@ -511,6 +514,9 @@ void IPolygonDictionary::extractMultiPolygons(const ColumnPtr &column, std::vect auto & ring = (dest.back().back().inners().empty() ? dest.back().back().outer() : dest.back().back().inners().back()); ring.emplace_back(ptr_coord->getElement(2 * i), ptr_coord->getElement(2 * i + 1)); } + + for (auto & multi_polygon : dest) + bg::correct(multi_polygon); } IPolygonDictionary::Point IPolygonDictionary::fieldToPoint(const Field &field) From bf83e6690542861eba470e40482715637e6c7691 Mon Sep 17 00:00:00 2001 From: Andrey Chulkov Date: Wed, 15 Jan 2020 13:45:04 +0300 Subject: [PATCH 61/76] Fix clang compilation error --- dbms/src/Dictionaries/PolygonDictionary.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 0f2c06890a6..aa61e3e5918 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -475,7 +475,9 @@ void IPolygonDictionary::extractMultiPolygons(const ColumnPtr &column, std::vect if (!ptr_coord) throw Exception{"Expected a column containing Float64s when reading coordinates", ErrorCodes::TYPE_MISMATCH}; const auto & coordinates = ptr_points->getOffsets(); - makeDifferences(polygons), makeDifferences(rings), makeDifferences(points); + makeDifferences(polygons); + makeDifferences(rings); + makeDifferences(points); IColumn::Offset point_offset = 0, ring_offset = 0, polygon_offset = 0; dest.emplace_back(); dest.back().emplace_back(); From 45d3537b99a33b9b7d4dd942e2b5120b53ddf2de Mon Sep 17 00:00:00 2001 From: Andrei Chulkov Date: Wed, 15 Jan 2020 16:28:18 +0300 Subject: [PATCH 62/76] Slight modifications for simple algorithm, improved test --- dbms/src/Dictionaries/PolygonDictionary.cpp | 16 +++++-- .../0_stateless/01037_polygon_dict.reference | 44 ++++++++++++++++--- .../0_stateless/01037_polygon_dict.sql | 27 +++++++++++- 3 files changed, 75 insertions(+), 12 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index aa61e3e5918..47f048259c1 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -450,7 +450,8 @@ inline void makeDifferences(IColumn::Offsets & values) } -void IPolygonDictionary::extractMultiPolygons(const ColumnPtr &column, std::vector &dest) { +void IPolygonDictionary::extractMultiPolygons(const ColumnPtr &column, std::vector &dest) +{ IColumn::Offsets polygons, rings, points; const auto ptr_multi_polygons = typeid_cast(column.get()); if (!ptr_multi_polygons) @@ -606,15 +607,22 @@ std::shared_ptr SimplePolygonDictionary::clone() const bool SimplePolygonDictionary::find(const Point &point, size_t & id) const { + bool found = false; + double area = 0; for (size_t i = 0; i < (this->polygons).size(); ++i) { if (bg::covered_by(point, (this->polygons)[i])) { - id = i; - return true; + double new_area = bg::area((this->polygons)[i]); + if (!found || new_area < area) + { + found = true; + id = i; + area = new_area; + } } } - return false; + return found; } void registerDictionaryPolygon(DictionaryFactory & factory) diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.reference b/dbms/tests/queries/0_stateless/01037_polygon_dict.reference index 66171de2526..b296fa1cc38 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.reference +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.reference @@ -1,24 +1,56 @@ dictGet test_01037.dict (-100,-42) qqq 101 -dictGet test_01037.dict (0,0) Click 42 -dictGet test_01037.dict (0.99,2.99) Click 42 +dictGet test_01037.dict (-1,0) Click South 423 +dictGet test_01037.dict (-0.1,0) Click South 423 +dictGet test_01037.dict (0,-2) Click West 424 +dictGet test_01037.dict (0,-1.1) Click West 424 +dictGet test_01037.dict (0,1.1) Click North 422 +dictGet test_01037.dict (0,2) Click North 422 +dictGet test_01037.dict (0.1,0) Click East 421 +dictGet test_01037.dict (0.99,2.99) Click North 422 +dictGet test_01037.dict (1,0) Click East 421 dictGet test_01037.dict (3,3) House 314159 dictGet test_01037.dict (5,6) Click 42 dictGet test_01037.dict (7.01,7.01) qqq 101 dictGetOrDefault test_01037.dict (-100,-42) www 1234 -dictGetOrDefault test_01037.dict (0,0) Click 42 -dictGetOrDefault test_01037.dict (0.99,2.99) Click 42 +dictGetOrDefault test_01037.dict (-1,0) Click South 423 +dictGetOrDefault test_01037.dict (-0.1,0) Click South 423 +dictGetOrDefault test_01037.dict (0,-2) Click West 424 +dictGetOrDefault test_01037.dict (0,-1.1) Click West 424 +dictGetOrDefault test_01037.dict (0,1.1) Click North 422 +dictGetOrDefault test_01037.dict (0,2) Click North 422 +dictGetOrDefault test_01037.dict (0.1,0) Click East 421 +dictGetOrDefault test_01037.dict (0.99,2.99) Click North 422 +dictGetOrDefault test_01037.dict (1,0) Click East 421 dictGetOrDefault test_01037.dict (3,3) House 314159 dictGetOrDefault test_01037.dict (5,6) Click 42 dictGetOrDefault test_01037.dict (7.01,7.01) www 1234 dictGetOrDefault test_01037.dict (-100,-42) dd 44 -dictGetOrDefault test_01037.dict (0,0) Click 42 -dictGetOrDefault test_01037.dict (0.99,2.99) Click 42 +dictGetOrDefault test_01037.dict (-1,0) Click South 423 +dictGetOrDefault test_01037.dict (-0.1,0) Click South 423 +dictGetOrDefault test_01037.dict (0,-2) Click West 424 +dictGetOrDefault test_01037.dict (0,-1.1) Click West 424 +dictGetOrDefault test_01037.dict (0,1.1) Click North 422 +dictGetOrDefault test_01037.dict (0,2) Click North 422 +dictGetOrDefault test_01037.dict (0.1,0) Click East 421 +dictGetOrDefault test_01037.dict (0.99,2.99) Click North 422 +dictGetOrDefault test_01037.dict (1,0) Click East 421 dictGetOrDefault test_01037.dict (3,3) House 314159 dictGetOrDefault test_01037.dict (5,6) Click 42 dictGetOrDefault test_01037.dict (7.01,7.01) ee 55 dictHas test_01037.dict (-100,-42) 0 +dictHas test_01037.dict (-1,0) 1 +dictHas test_01037.dict (-0.1,0) 1 +dictHas test_01037.dict (0,-2) 1 +dictHas test_01037.dict (0,-1.1) 1 +dictHas test_01037.dict (0,-1) 1 dictHas test_01037.dict (0,0) 1 +dictHas test_01037.dict (0,1) 1 +dictHas test_01037.dict (0,1.1) 1 +dictHas test_01037.dict (0,2) 1 +dictHas test_01037.dict (0.1,0) 1 dictHas test_01037.dict (0.99,2.99) 1 +dictHas test_01037.dict (1,0) 1 +dictHas test_01037.dict (1,1) 1 dictHas test_01037.dict (1,3) 1 dictHas test_01037.dict (3,3) 1 dictHas test_01037.dict (5,1) 1 diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql index c99f76100f5..7b247737f80 100644 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql @@ -10,6 +10,10 @@ DROP TABLE IF EXISTS test_01037.polygons; CREATE TABLE test_01037.polygons (key Array(Array(Array(Array(Float64)))), name String, value UInt64) ENGINE = Memory; INSERT INTO test_01037.polygons VALUES ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], 'Click', 42); INSERT INTO test_01037.polygons VALUES ([[[[5, 5], [5, -5], [-5, -5], [-5, 5]], [[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]]], 'House', 314159); +INSERT INTO test_01037.polygons VALUES ([[[[3, 1], [0, 1], [0, -1], [3, -1]]]], 'Click East', 421); +INSERT INTO test_01037.polygons VALUES ([[[[-1, 1], [1, 1], [1, 3], [-1, 3]]]], 'Click North', 422); +INSERT INTO test_01037.polygons VALUES ([[[[-3, 1], [-3, -1], [0, -1], [0, 1]]]], 'Click South', 423); +INSERT INTO test_01037.polygons VALUES ([[[[-1, -1], [1, -1], [1, -3], [-1, -3]]]], 'Click West', 424); CREATE DICTIONARY test_01037.dict ( @@ -25,7 +29,19 @@ LAYOUT(POLYGON()); DROP TABLE IF EXISTS test_01037.points; CREATE TABLE test_01037.points (x Float64, y Float64, def_i UInt64, def_s String) ENGINE = Memory; -INSERT INTO test_01037.points VALUES (0.0, 0.0, 11, 'aa'), (3.0, 3.0, 22, 'bb'), (5.0, 6.0, 33, 'cc'), (-100.0, -42.0, 44, 'dd'), (7.01, 7.01, 55, 'ee'), (0.99, 2.99, 66, 'ee'); +INSERT INTO test_01037.points VALUES (0.1, 0.0, 112, 'aax'); +INSERT INTO test_01037.points VALUES (-0.1, 0.0, 113, 'aay'); +INSERT INTO test_01037.points VALUES (0.0, 1.1, 114, 'aaz'); +INSERT INTO test_01037.points VALUES (0.0, -1.1, 115, 'aat'); +INSERT INTO test_01037.points VALUES (3.0, 3.0, 22, 'bb'); +INSERT INTO test_01037.points VALUES (5.0, 6.0, 33, 'cc'); +INSERT INTO test_01037.points VALUES (-100.0, -42.0, 44, 'dd'); +INSERT INTO test_01037.points VALUES (7.01, 7.01, 55, 'ee') +INSERT INTO test_01037.points VALUES (0.99, 2.99, 66, 'ee'); +INSERT INTO test_01037.points VALUES (1.0, 0.0, 771, 'ffa'); +INSERT INTO test_01037.points VALUES (-1.0, 0.0, 772, 'ffb'); +INSERT INTO test_01037.points VALUES (0.0, 2.0, 773, 'ffc'); +INSERT INTO test_01037.points VALUES (0.0, -2.0, 774, 'ffd'); select 'dictGet', 'test_01037.dict' as dict_name, tuple(x, y) as key, dictGet(dict_name, 'name', key), @@ -37,7 +53,14 @@ select 'dictGetOrDefault', 'test_01037.dict' as dict_name, tuple(x, y) as key, dictGetOrDefault(dict_name, 'name', key, def_s), dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points order by x, y; -INSERT INTO test_01037.points VALUES (5.0, 5.0, 0, ''), (5.0, 1.0, 0, ''), (1.0, 3.0, 0, ''); +INSERT INTO test_01037.points VALUES (5.0, 5.0, 0, ''); +INSERT INTO test_01037.points VALUES (5.0, 1.0, 0, ''); +INSERT INTO test_01037.points VALUES (1.0, 3.0, 0, ''); +INSERT INTO test_01037.points VALUES (0.0, 0.0, 0, ''); +INSERT INTO test_01037.points VALUES (0.0, 1.0, 0, ''); +INSERT INTO test_01037.points VALUES (0.0, -1.0, 0, ''); +INSERT INTO test_01037.points VALUES (1.0, 1.0, 0, ''); + select 'dictHas', 'test_01037.dict' as dict_name, tuple(x, y) as key, dictHas(dict_name, key) from test_01037.points order by x, y; From 2ed7378b69b16a184b7b2f2d7a618de96f3d655a Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 15 Jan 2020 16:50:03 +0300 Subject: [PATCH 63/76] Fix style error --- dbms/src/Dictionaries/PolygonDictionary.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h index 87841441ae4..699b9eaf82b 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.h +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -227,7 +227,7 @@ private: size_t bytes_allocated = 0; size_t element_count = 0; mutable std::atomic query_count{0}; - + static void extractMultiPolygons(const ColumnPtr & column, std::vector & dest); /** Extracts a list of points from two columns representing their x and y coordinates. */ From a4fefc7a4554831185137fc5006eb24955592c0a Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 15 Jan 2020 17:04:19 +0300 Subject: [PATCH 64/76] Add some comments --- dbms/src/Dictionaries/PolygonDictionary.h | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h index 699b9eaf82b..0cdf8f98151 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.h +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -155,8 +155,10 @@ public: void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray & out) const; protected: - /** A simple two-dimensional point in Euclidean coordinates. */ - using Point = bg::model::point; + /** The number of dimensions used. Change with great caution, some extra work will be required. */ + static constexpr size_t DIM = 2; + /** A point in Euclidean coordinates. */ + using Point = bg::model::point; /** A polygon in boost is a an outer ring of points with zero or more cut out inner rings. */ using Polygon = bg::model::polygon; /** A multi_polygon in boost is a collection of polygons. */ @@ -228,6 +230,15 @@ private: size_t element_count = 0; mutable std::atomic query_count{0}; + /** Extracts a list of multi-polygons from a column of 4-dimensional arrays of Float64 values. The results are + * written to dest. + * The structure is as follows: + * - A multi-polygon is represented by a nonempty array of polygons. + * - A polygon is represented by a nonempty array of rings. The first element represents the outer ring. Zero + * or more following rings are cut out from the polygon. + * - A ring is represented by a nonempty array of points. + * - A point is represented by an array of coordinates. + */ static void extractMultiPolygons(const ColumnPtr & column, std::vector & dest); /** Extracts a list of points from two columns representing their x and y coordinates. */ @@ -243,14 +254,12 @@ private: /** Converts an array of polygons (see above) to a multi-polygon. */ static MultiPolygon fieldToMultiPolygon(const Field & field); - - /** The number of dimensions used. Change with great caution. */ - static constexpr size_t DIM = 2; }; /** Simple implementation of the polygon dictionary. Doesn't generate anything during its construction. * Iterates over all stored polygons for each query, checking each of them in linear time. - * Retrieves the first polygon in the dictionary containing a given point. + * Retrieves the polygon with the smallest area containing the given point. If there is more than one any such polygon + * may be returned. */ class SimplePolygonDictionary : public IPolygonDictionary { From 1d68725814a5621a97dbc55e9a6472c570167d30 Mon Sep 17 00:00:00 2001 From: Andrei Chulkov Date: Wed, 15 Jan 2020 18:47:41 +0300 Subject: [PATCH 65/76] Delete old inefficient code, add some more checks upon dictionary creation --- dbms/src/Dictionaries/PolygonDictionary.cpp | 76 +++------------------ 1 file changed, 11 insertions(+), 65 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 47f048259c1..e665210b06d 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -522,69 +522,6 @@ void IPolygonDictionary::extractMultiPolygons(const ColumnPtr &column, std::vect bg::correct(multi_polygon); } -IPolygonDictionary::Point IPolygonDictionary::fieldToPoint(const Field &field) -{ - if (field.getType() == Field::Types::Array) - { - auto coordinate_array = field.get(); - if (coordinate_array.size() != DIM) - throw Exception{"All points should be " + std::to_string(DIM) + "-dimensional", ErrorCodes::BAD_ARGUMENTS}; - Float64 values[DIM]; - for (size_t i = 0; i < DIM; ++i) - { - if (coordinate_array[i].getType() != Field::Types::Float64) - throw Exception{"Coordinates should be Float64", ErrorCodes::TYPE_MISMATCH}; - values[i] = coordinate_array[i].get(); - } - return {values[0], values[1]}; - } - else - throw Exception{"Point is not represented by an array", ErrorCodes::TYPE_MISMATCH}; -} - -IPolygonDictionary::Polygon IPolygonDictionary::fieldToPolygon(const Field & field) -{ - Polygon result; - if (field.getType() == Field::Types::Array) - { - const auto & ring_array = field.get(); - if (ring_array.empty()) - throw Exception{"Empty polygons are not allowed", ErrorCodes::BAD_ARGUMENTS}; - result.inners().resize(ring_array.size() - 1); - if (ring_array[0].getType() != Field::Types::Array) - throw Exception{"Outer polygon ring is not represented by an array", ErrorCodes::TYPE_MISMATCH}; - for (const auto & point : ring_array[0].get()) - bg::append(result.outer(), fieldToPoint(point)); - for (size_t i = 0; i < result.inners().size(); ++i) - { - if (ring_array[i + 1].getType() != Field::Types::Array) - throw Exception{"Inner polygon ring is not represented by an array", ErrorCodes::TYPE_MISMATCH}; - for (const auto & point : ring_array[i + 1].get()) - bg::append(result.inners()[i], fieldToPoint(point)); - } - } - else - throw Exception{"Polygon is not represented by an array", ErrorCodes::TYPE_MISMATCH}; - bg::correct(result); - return result; -} - -// TODO: Do this more efficiently by casting to the corresponding Column and avoiding Fields. -IPolygonDictionary::MultiPolygon IPolygonDictionary::fieldToMultiPolygon(const Field &field) -{ - MultiPolygon result; - if (field.getType() == Field::Types::Array) - { - const auto& polygon_array = field.get(); - result.reserve(polygon_array.size()); - for (const auto & polygon : polygon_array) - result.push_back(fieldToPolygon(polygon)); - } - else - throw Exception{"MultiPolygon is not represented by an array", ErrorCodes::TYPE_MISMATCH}; - return result; -} - SimplePolygonDictionary::SimplePolygonDictionary( const std::string & database_, const std::string & name_, @@ -636,11 +573,20 @@ void registerDictionaryPolygon(DictionaryFactory & factory) const String database = config.getString(config_prefix + ".database", ""); const String name = config.getString(config_prefix + ".name"); - // TODO: Check that there is only one key and it is of the correct type. + if (!dict_struct.key) + throw Exception{"'key' is required for dictionary of layout 'polygon'", ErrorCodes::BAD_ARGUMENTS}; + if (dict_struct.key->size() != 1) + throw Exception{"The 'key' should consist of a single attribute for dictionary of layout 'polygon'", + ErrorCodes::BAD_ARGUMENTS}; + // TODO: Once arrays are fully supported this should be changed to a more reasonable check. + if ((*dict_struct.key)[0].type->getName() != "Array(Array(Array(Array(Float64))))") + throw Exception{"The 'key' should be a 4-dimensional array of Float64 for dictionary of layout 'polygon'", + ErrorCodes::BAD_ARGUMENTS}; + if (dict_struct.range_min || dict_struct.range_max) throw Exception{name + ": elements .structure.range_min and .structure.range_max should be defined only " - "for a dictionary of layout 'range_hashed'", + "for a dictionary of layout 'polygon'", ErrorCodes::BAD_ARGUMENTS}; const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; From 2590eaa782fcdab7d0b1db981a9efb6eb903870f Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 15 Jan 2020 18:54:45 +0300 Subject: [PATCH 66/76] Improved some error messages --- dbms/src/Dictionaries/PolygonDictionary.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index e665210b06d..c0fbb371923 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -574,13 +574,13 @@ void registerDictionaryPolygon(DictionaryFactory & factory) const String name = config.getString(config_prefix + ".name"); if (!dict_struct.key) - throw Exception{"'key' is required for dictionary of layout 'polygon'", ErrorCodes::BAD_ARGUMENTS}; + throw Exception{"'key' is required for a dictionary of layout 'polygon'", ErrorCodes::BAD_ARGUMENTS}; if (dict_struct.key->size() != 1) - throw Exception{"The 'key' should consist of a single attribute for dictionary of layout 'polygon'", + throw Exception{"The 'key' should consist of a single attribute for a dictionary of layout 'polygon'", ErrorCodes::BAD_ARGUMENTS}; // TODO: Once arrays are fully supported this should be changed to a more reasonable check. if ((*dict_struct.key)[0].type->getName() != "Array(Array(Array(Array(Float64))))") - throw Exception{"The 'key' should be a 4-dimensional array of Float64 for dictionary of layout 'polygon'", + throw Exception{"The 'key' attribute should be a 4-dimensional array of Float64s for a dictionary of layout 'polygon'", ErrorCodes::BAD_ARGUMENTS}; if (dict_struct.range_min || dict_struct.range_max) From 56b696e899181090e2330ab70372fbd82ad2b71a Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 15 Jan 2020 19:00:32 +0300 Subject: [PATCH 67/76] Remove forgotten unused code --- dbms/src/Dictionaries/PolygonDictionary.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index c0fbb371923..aca7aa19699 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -200,16 +200,7 @@ void IPolygonDictionary::blockToAttributes(const DB::Block &block) attributes[i] = column.column; } polygons.reserve(polygons.size() + rows); - const auto & key = block.safeGetByPosition(0).column; - - /*for (const auto row : ext::range(0, rows)) - { - const auto & field = (*key)[row]; - // TODO: Get data more efficiently using - polygons.push_back(fieldToMultiPolygon(field)); - }*/ - extractMultiPolygons(key, polygons); } From e30956d05a27493876f53d69a05e4b6f5a996bc0 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Mon, 27 Jan 2020 20:08:29 +0300 Subject: [PATCH 68/76] Rewrite some code and try new input methods 1.0 --- dbms/src/Dictionaries/PolygonDictionary.cpp | 279 +++++++++++++++----- dbms/src/Dictionaries/PolygonDictionary.h | 39 +-- 2 files changed, 240 insertions(+), 78 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index aca7aa19699..0c899d01323 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -1,5 +1,6 @@ #include #include +#include #include "PolygonDictionary.h" #include "DictionaryBlockInputStream.h" #include "DictionaryFactory.h" @@ -20,13 +21,17 @@ IPolygonDictionary::IPolygonDictionary( const std::string & name_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, - const DictionaryLifetime dict_lifetime_) + const DictionaryLifetime dict_lifetime_, + InputType input_type_, + PointType point_type_) : database(database_) , name(name_) , full_name{database_.empty() ? name_ : (database_ + "." + name_)} , dict_struct(dict_struct_) , source_ptr(std::move(source_ptr_)) , dict_lifetime(dict_lifetime_) + , input_type(input_type_) + , point_type(point_type_) { createAttributes(); loadData(); @@ -199,9 +204,11 @@ void IPolygonDictionary::blockToAttributes(const DB::Block &block) else attributes[i] = column.column; } + /** Multi-polygons could cause bigger sizes, but this is better than nothing. */ polygons.reserve(polygons.size() + rows); + ids.reserve(ids.size() + rows); const auto & key = block.safeGetByPosition(0).column; - extractMultiPolygons(key, polygons); + extractPolygons(key); } void IPolygonDictionary::loadData() @@ -211,6 +218,9 @@ void IPolygonDictionary::loadData() while (const auto block = stream->read()) blockToAttributes(block); stream->readSuffix(); + + for (auto & polygon : polygons) + bg::correct(polygon); } void IPolygonDictionary::calculateBytesAllocated() @@ -222,8 +232,8 @@ void IPolygonDictionary::calculateBytesAllocated() std::vector IPolygonDictionary::extractPoints(const Columns &key_columns) { - if (key_columns.size() != DIM) - throw Exception{"Expected " + std::to_string(DIM) + " columns of coordinates", ErrorCodes::BAD_ARGUMENTS}; + if (key_columns.size() != 2) + throw Exception{"Expected two columns of coordinates", ErrorCodes::BAD_ARGUMENTS}; const auto column_x = typeid_cast*>(key_columns[0].get()); const auto column_y = typeid_cast*>(key_columns[1].get()); if (!column_x || !column_y) @@ -241,7 +251,6 @@ void IPolygonDictionary::has(const Columns &key_columns, const DataTypes &, Padd size_t row = 0; for (const auto & pt : extractPoints(key_columns)) { - // TODO: Check whether this will be optimized by the compiler. size_t trash = 0; out[row] = find(pt, trash); ++row; @@ -439,78 +448,197 @@ inline void makeDifferences(IColumn::Offsets & values) values[i] -= values[i - 1]; } +struct Offset +{ + Offset() = default; + IColumn::Offsets ring_sizes, polygon_sizes, multi_polygon_sizes; + IColumn::Offset current_ring = 0, current_polygon = 0, current_multi_polygon = 0; +}; + +struct Data +{ + std::vector & dest; + std::vector & ids; + + void addPolygon(bool new_multi_polygon = false) { + dest.emplace_back(); + ids.push_back((ids.size() ? ids.back() + new_multi_polygon : 0)); + } +}; + +void addNewMultiPolygon(Data &, Offset & offset) +{ + ++offset.current_multi_polygon; + if (offset.current_multi_polygon == offset.multi_polygon_sizes.size()) + throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS}; } -void IPolygonDictionary::extractMultiPolygons(const ColumnPtr &column, std::vector &dest) +void addNewPolygon(Data & data, Offset & offset) +{ + ++offset.current_polygon; + if (offset.current_polygon == offset.polygon_sizes.size()) + throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS}; + + if (--offset.multi_polygon_sizes[offset.current_multi_polygon] == 0) + { + addNewMultiPolygon(data, offset); + data.addPolygon(true); + } + else + data.addPolygon(); +} + +void addNewRing(Data & data, Offset & offset) +{ + ++offset.current_ring; + if (offset.current_ring == offset.ring_sizes.size()) + throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS}; + + if (--offset.polygon_sizes[offset.current_polygon] == 0) + addNewPolygon(data, offset); + else + { + /** An outer ring is added automatically with a new polygon, thus we need the else statement here. + * This also implies that if we are at this point we have to add an inner ring. + */ + auto & last_polygon = data.dest.back(); + last_polygon.inners().emplace_back(); + } +} + +void addNewPointFromMultiPolygon(const IPolygonDictionary::Point & pt, Data & data, Offset & offset) +{ + if (offset.ring_sizes[offset.current_ring] == 0) + addNewRing(data, offset); + + auto & last_polygon = data.dest.back(); + auto & last_ring = (last_polygon.inners().empty() ? last_polygon.outer() : last_polygon.inners().back()); + last_ring.push_back(pt); + + --offset.ring_sizes[offset.current_ring]; +} + +void addNewPointFromSimplePolygon(const IPolygonDictionary::Point & pt, Data & data, Offset & offset) +{ + if (offset.polygon_sizes[offset.current_polygon] == 0) + { + ++offset.current_polygon; + if (offset.current_polygon == offset.polygon_sizes.size()) + throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS}; + + } + auto & last_polygon = data.dest.back(); + last_polygon.outer().push_back(pt); + --offset.polygon_sizes[offset.current_polygon]; +} + +const IColumn * unrollMultiPolygons(const ColumnPtr & column, Offset & offset) { - IColumn::Offsets polygons, rings, points; const auto ptr_multi_polygons = typeid_cast(column.get()); if (!ptr_multi_polygons) throw Exception{"Expected a column containing arrays of polygons", ErrorCodes::TYPE_MISMATCH}; + offset.multi_polygon_sizes.assign(ptr_multi_polygons->getOffsets()); + makeDifferences(offset.multi_polygon_sizes); const auto ptr_polygons = typeid_cast(&ptr_multi_polygons->getData()); if (!ptr_polygons) throw Exception{"Expected a column containing arrays of rings when reading polygons", ErrorCodes::TYPE_MISMATCH}; - polygons.assign(ptr_multi_polygons->getOffsets()); + offset.polygon_sizes.assign(ptr_polygons->getOffsets()); + makeDifferences(offset.polygon_sizes); const auto ptr_rings = typeid_cast(&ptr_polygons->getData()); if (!ptr_rings) throw Exception{"Expected a column containing arrays of points when reading rings", ErrorCodes::TYPE_MISMATCH}; - rings.assign(ptr_polygons->getOffsets()); + offset.ring_sizes.assign(ptr_rings->getOffsets()); + makeDifferences(offset.ring_sizes); - const auto ptr_points = typeid_cast(&ptr_rings->getData()); - if (!ptr_points) - throw Exception{"Expected a column containing arrays of Float64s when reading points", ErrorCodes::TYPE_MISMATCH}; - points.assign(ptr_rings->getOffsets()); + return ptr_rings->getDataPtr().get(); +} +const IColumn * unrollSimplePolygons(const ColumnPtr & column, Offset & offset) +{ + const auto ptr_polygons = typeid_cast(column.get()); + if (!ptr_polygons) + throw Exception{"Expected a column containing arrays of points", ErrorCodes::TYPE_MISMATCH}; + offset.polygon_sizes.assign(ptr_polygons->getOffsets()); + return ptr_polygons->getDataPtr().get(); +} + +void getPointsReprByArrays(const IColumn * column, std::vector & dest) +{ + const auto ptr_points = typeid_cast(column); const auto ptr_coord = typeid_cast*>(&ptr_points->getData()); if (!ptr_coord) - throw Exception{"Expected a column containing Float64s when reading coordinates", ErrorCodes::TYPE_MISMATCH}; - const auto & coordinates = ptr_points->getOffsets(); - makeDifferences(polygons); - makeDifferences(rings); - makeDifferences(points); - IColumn::Offset point_offset = 0, ring_offset = 0, polygon_offset = 0; - dest.emplace_back(); - dest.back().emplace_back(); - for (size_t i = 0; i < coordinates.size(); ++i) + throw Exception{"Expected coordinates to be of type Float64", ErrorCodes::TYPE_MISMATCH}; + const auto & offsets = ptr_points->getOffsets(); + for (size_t i = 0; i < offsets.size(); ++i) { + if (offsets[i] - (i == 0 ? 0 : offsets[i - 1]) != 2) + throw Exception{"All points should be two-dimensional", ErrorCodes::BAD_ARGUMENTS}; + dest.emplace_back(ptr_coord->getElement(2 * i), ptr_coord->getElement(2 * i + 1)); + } +} - if (coordinates[i] - (i == 0 ? 0 : coordinates[i - 1]) != DIM) - throw Exception{"All points should be " + std::to_string(DIM) + "-dimensional", ErrorCodes::BAD_ARGUMENTS}; - if (points[point_offset] == 0) - { - ++point_offset; - --rings[ring_offset]; - if (rings[ring_offset] == 0) - { - ++ring_offset; - if (ring_offset == rings.size()) - throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS}; - --polygons[polygon_offset]; - if (polygons[polygon_offset] == 0) - { - dest.emplace_back(); - ++polygon_offset; - if (polygon_offset == polygons.size()) - throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS}; - } - else - dest.back().emplace_back(); - } - else - if (!dest.back().back().outer().empty()) - dest.back().back().inners().emplace_back(); - } - if (point_offset == points.size()) - throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS}; - --points[point_offset]; - auto & ring = (dest.back().back().inners().empty() ? dest.back().back().outer() : dest.back().back().inners().back()); - ring.emplace_back(ptr_coord->getElement(2 * i), ptr_coord->getElement(2 * i + 1)); +void getPointReprByTuples(const IColumn * column, std::vector & dest) +{ + const auto ptr_points = typeid_cast(column); + if (!ptr_points) + throw Exception{"Expected a column of tuples representing points", ErrorCodes::TYPE_MISMATCH}; + if (ptr_points->tupleSize() != 2) + throw Exception{"Points should be two-dimensional", ErrorCodes::BAD_ARGUMENTS}; + const auto column_x = typeid_cast*>(&ptr_points->getColumn(0)); + const auto column_y = typeid_cast*>(&ptr_points->getColumn(1)); + if (!column_x || !column_y) + throw Exception{"Expected coordinates to be of type Float64", ErrorCodes::TYPE_MISMATCH}; + for (size_t i = 0; i < column_x->size(); ++i) + { + dest.emplace_back(column_x->getElement(i), column_y->getElement(i)); + } +} + +} + +void IPolygonDictionary::extractPolygons(const ColumnPtr &column) +{ + Data data = {polygons, ids}; + Offset offset; + + const IColumn * points_collection; + switch (input_type) + { + case InputType::MultiPolygon: + points_collection = unrollMultiPolygons(column, offset); + break; + case InputType::SimplePolygon: + points_collection = unrollSimplePolygons(column, offset); + break; } - for (auto & multi_polygon : dest) - bg::correct(multi_polygon); + /** Adding the first empty polygon */ + data.addPolygon(true); + + std::vector points; + switch (point_type) + { + case PointType::Array: + getPointsReprByArrays(points_collection, points); + break; + case PointType::Tuple: + getPointReprByTuples(points_collection, points); + break; + } + for (auto & point : points) + { + switch (input_type) + { + case InputType::MultiPolygon: + addNewPointFromMultiPolygon(point, data, offset); + break; + case InputType::SimplePolygon: + addNewPointFromSimplePolygon(point, data, offset); + break; + } + } } SimplePolygonDictionary::SimplePolygonDictionary( @@ -518,8 +646,10 @@ SimplePolygonDictionary::SimplePolygonDictionary( const std::string & name_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, - const DictionaryLifetime dict_lifetime_) - : IPolygonDictionary(database_, name_, dict_struct_, std::move(source_ptr_), dict_lifetime_) + const DictionaryLifetime dict_lifetime_, + InputType input_type_, + PointType point_type_) + : IPolygonDictionary(database_, name_, dict_struct_, std::move(source_ptr_), dict_lifetime_, input_type_, point_type_) { } @@ -530,7 +660,9 @@ std::shared_ptr SimplePolygonDictionary::clone() const this->name, this->dict_struct, this->source_ptr->clone(), - this->dict_lifetime); + this->dict_lifetime, + this->input_type, + this->point_type); } bool SimplePolygonDictionary::find(const Point &point, size_t & id) const @@ -545,7 +677,7 @@ bool SimplePolygonDictionary::find(const Point &point, size_t & id) const if (!found || new_area < area) { found = true; - id = i; + id = ids[i]; area = new_area; } } @@ -570,8 +702,31 @@ void registerDictionaryPolygon(DictionaryFactory & factory) throw Exception{"The 'key' should consist of a single attribute for a dictionary of layout 'polygon'", ErrorCodes::BAD_ARGUMENTS}; // TODO: Once arrays are fully supported this should be changed to a more reasonable check. - if ((*dict_struct.key)[0].type->getName() != "Array(Array(Array(Array(Float64))))") - throw Exception{"The 'key' attribute should be a 4-dimensional array of Float64s for a dictionary of layout 'polygon'", + const auto type_name = (*dict_struct.key)[0].type->getName(); + IPolygonDictionary::InputType input_type; + IPolygonDictionary::PointType point_type; + if (type_name == "Array(Array(Array(Array(Float64))))") + { + input_type = IPolygonDictionary::InputType::MultiPolygon; + point_type = IPolygonDictionary::PointType::Array; + } + else if (type_name == "Array(Array(Array(Tuple(Float64, Float64))))") + { + input_type = IPolygonDictionary::InputType::MultiPolygon; + point_type = IPolygonDictionary::PointType::Tuple; + } + else if (type_name == "Array(Array(Float64))") + { + input_type = IPolygonDictionary::InputType::SimplePolygon; + point_type = IPolygonDictionary::PointType::Array; + } + else if (type_name == "Array(Tuple(Float64, Float64))") + { + input_type = IPolygonDictionary::InputType::SimplePolygon; + point_type = IPolygonDictionary::PointType::Tuple; + } + else + throw Exception{"The key type is not one of the allowed types for a dictionary of layout 'polygon'", ErrorCodes::BAD_ARGUMENTS}; if (dict_struct.range_min || dict_struct.range_max) @@ -581,7 +736,7 @@ void registerDictionaryPolygon(DictionaryFactory & factory) ErrorCodes::BAD_ARGUMENTS}; const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; - return std::make_unique(database, name, dict_struct, std::move(source_ptr), dict_lifetime); + return std::make_unique(database, name, dict_struct, std::move(source_ptr), dict_lifetime, input_type, point_type); }; factory.registerLayout("polygon", create_layout, true); } diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h index 0cdf8f98151..54fee25be8f 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.h +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -26,12 +26,24 @@ namespace bg = boost::geometry; class IPolygonDictionary : public IDictionaryBase { public: + enum class InputType + { + MultiPolygon, + SimplePolygon + }; + enum class PointType + { + Array, + Tuple, + }; IPolygonDictionary( const std::string & database_, const std::string & name_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, - DictionaryLifetime dict_lifetime_); + DictionaryLifetime dict_lifetime_, + InputType input_type_, + PointType point_type_); const std::string & getDatabase() const override; const std::string & getName() const override; @@ -154,7 +166,6 @@ public: // TODO: Refactor the whole dictionary design to perform stronger checks, i.e. make this an override. void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray & out) const; -protected: /** The number of dimensions used. Change with great caution, some extra work will be required. */ static constexpr size_t DIM = 2; /** A point in Euclidean coordinates. */ @@ -164,13 +175,15 @@ protected: /** A multi_polygon in boost is a collection of polygons. */ using MultiPolygon = bg::model::multi_polygon; +protected: /** Returns true if the given point can be found in the polygon dictionary. * If true id is set to the index of a polygon containing the given point. * Overridden in different implementations of this interface. */ virtual bool find(const Point & point, size_t & id) const = 0; - std::vector polygons; + std::vector polygons; + std::vector ids; const std::string database; const std::string name; @@ -179,6 +192,9 @@ protected: const DictionarySourcePtr source_ptr; const DictionaryLifetime dict_lifetime; + const InputType input_type; + const PointType point_type; + private: /** Helper functions for loading the data from the configuration. * The polygons serving as keys are extracted into boost types. @@ -239,21 +255,10 @@ private: * - A ring is represented by a nonempty array of points. * - A point is represented by an array of coordinates. */ - static void extractMultiPolygons(const ColumnPtr & column, std::vector & dest); + void extractPolygons(const ColumnPtr & column); /** Extracts a list of points from two columns representing their x and y coordinates. */ static std::vector extractPoints(const Columns &key_columns); - - /** Converts an array containing two Float64s to a point. */ - static Point fieldToPoint(const Field & field); - - /** Converts an array of arrays of points to a polygon. The first array represents the outer ring and zero or more - * following arrays represent the rings that are excluded from the polygon. - */ - static Polygon fieldToPolygon(const Field & field); - - /** Converts an array of polygons (see above) to a multi-polygon. */ - static MultiPolygon fieldToMultiPolygon(const Field & field); }; /** Simple implementation of the polygon dictionary. Doesn't generate anything during its construction. @@ -269,7 +274,9 @@ public: const std::string & name_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, - DictionaryLifetime dict_lifetime_); + DictionaryLifetime dict_lifetime_, + InputType input_type_, + PointType point_type_); std::shared_ptr clone() const override; From 33339e6347b8426aa60b186ec423626d488869de Mon Sep 17 00:00:00 2001 From: Andrei Chulkov Date: Mon, 27 Jan 2020 23:28:20 +0300 Subject: [PATCH 69/76] tmp --- dbms/src/Dictionaries/PolygonDictionary.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 0c899d01323..09a84074657 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -603,7 +603,7 @@ void IPolygonDictionary::extractPolygons(const ColumnPtr &column) Data data = {polygons, ids}; Offset offset; - const IColumn * points_collection; + const IColumn * points_collection = nullptr; switch (input_type) { case InputType::MultiPolygon: From 5f2ed57c49a711681ec0b3313c03a4c98d4ccdfc Mon Sep 17 00:00:00 2001 From: Andrei Chulkov Date: Tue, 28 Jan 2020 16:25:55 +0300 Subject: [PATCH 70/76] Add some comments & new tests for different input types --- dbms/src/Dictionaries/PolygonDictionary.cpp | 5 +- dbms/src/Dictionaries/PolygonDictionary.h | 33 ++-- .../0_stateless/01037_polygon_dict.reference | 59 -------- .../0_stateless/01037_polygon_dict.sql | 70 --------- ...1037_polygon_dict_multi_polygons.reference | 118 +++++++++++++++ .../01037_polygon_dict_multi_polygons.sql | 107 +++++++++++++ ...037_polygon_dict_simple_polygons.reference | 142 ++++++++++++++++++ .../01037_polygon_dict_simple_polygons.sql | 108 +++++++++++++ 8 files changed, 497 insertions(+), 145 deletions(-) delete mode 100644 dbms/tests/queries/0_stateless/01037_polygon_dict.reference delete mode 100644 dbms/tests/queries/0_stateless/01037_polygon_dict.sql create mode 100644 dbms/tests/queries/0_stateless/01037_polygon_dict_multi_polygons.reference create mode 100644 dbms/tests/queries/0_stateless/01037_polygon_dict_multi_polygons.sql create mode 100644 dbms/tests/queries/0_stateless/01037_polygon_dict_simple_polygons.reference create mode 100644 dbms/tests/queries/0_stateless/01037_polygon_dict_simple_polygons.sql diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 09a84074657..e0ddb448f45 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -425,6 +425,7 @@ void IPolygonDictionary::getItemsImpl( { size_t id = 0; const auto found = find(points[i], id); + id = ids[id]; if (!found) { set_value(i, static_cast(get_default(i))); @@ -462,7 +463,7 @@ struct Data void addPolygon(bool new_multi_polygon = false) { dest.emplace_back(); - ids.push_back((ids.size() ? ids.back() + new_multi_polygon : 0)); + ids.push_back((ids.empty() ? 0 : ids.back() + new_multi_polygon)); } }; @@ -677,7 +678,7 @@ bool SimplePolygonDictionary::find(const Point &point, size_t & id) const if (!found || new_area < area) { found = true; - id = ids[i]; + id = i; area = new_area; } } diff --git a/dbms/src/Dictionaries/PolygonDictionary.h b/dbms/src/Dictionaries/PolygonDictionary.h index 54fee25be8f..fc85e339231 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.h +++ b/dbms/src/Dictionaries/PolygonDictionary.h @@ -26,11 +26,23 @@ namespace bg = boost::geometry; class IPolygonDictionary : public IDictionaryBase { public: + /** Controls the different types of polygons allowed as input. + * The structure of a multi-polygon is as follows: + * - A multi-polygon is represented by a nonempty array of polygons. + * - A polygon is represented by a nonempty array of rings. The first element represents the outer ring. Zero + * or more following rings are cut out from the polygon. + * - A ring is represented by a nonempty array of points. + * - A point is represented by its coordinates stored in an according structure (see below). + * A simple polygon is represented by an one-dimensional array of points, stored in the according structure. + */ enum class InputType { MultiPolygon, SimplePolygon }; + /** Controls the different types allowed for providing the coordinates of points. + * Right now a point can be represented by either an array or a tuple of two Float64 values. + */ enum class PointType { Array, @@ -166,14 +178,10 @@ public: // TODO: Refactor the whole dictionary design to perform stronger checks, i.e. make this an override. void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray & out) const; - /** The number of dimensions used. Change with great caution, some extra work will be required. */ - static constexpr size_t DIM = 2; - /** A point in Euclidean coordinates. */ - using Point = bg::model::point; + /** A two-dimensional point in Euclidean coordinates. */ + using Point = bg::model::point; /** A polygon in boost is a an outer ring of points with zero or more cut out inner rings. */ using Polygon = bg::model::polygon; - /** A multi_polygon in boost is a collection of polygons. */ - using MultiPolygon = bg::model::multi_polygon; protected: /** Returns true if the given point can be found in the polygon dictionary. @@ -183,6 +191,9 @@ protected: virtual bool find(const Point & point, size_t & id) const = 0; std::vector polygons; + /** Since the original data may have been in the form of multi-polygons, an id is stored for each single polygon + * corresponding to the row in which any other attributes for this entry are located. + */ std::vector ids; const std::string database; @@ -246,14 +257,8 @@ private: size_t element_count = 0; mutable std::atomic query_count{0}; - /** Extracts a list of multi-polygons from a column of 4-dimensional arrays of Float64 values. The results are - * written to dest. - * The structure is as follows: - * - A multi-polygon is represented by a nonempty array of polygons. - * - A polygon is represented by a nonempty array of rings. The first element represents the outer ring. Zero - * or more following rings are cut out from the polygon. - * - A ring is represented by a nonempty array of points. - * - A point is represented by an array of coordinates. + /** Extracts a list of polygons from a column according to input_type and point_type. + * The polygons are appended to the dictionary with the corresponding ids. */ void extractPolygons(const ColumnPtr & column); diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.reference b/dbms/tests/queries/0_stateless/01037_polygon_dict.reference deleted file mode 100644 index b296fa1cc38..00000000000 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.reference +++ /dev/null @@ -1,59 +0,0 @@ -dictGet test_01037.dict (-100,-42) qqq 101 -dictGet test_01037.dict (-1,0) Click South 423 -dictGet test_01037.dict (-0.1,0) Click South 423 -dictGet test_01037.dict (0,-2) Click West 424 -dictGet test_01037.dict (0,-1.1) Click West 424 -dictGet test_01037.dict (0,1.1) Click North 422 -dictGet test_01037.dict (0,2) Click North 422 -dictGet test_01037.dict (0.1,0) Click East 421 -dictGet test_01037.dict (0.99,2.99) Click North 422 -dictGet test_01037.dict (1,0) Click East 421 -dictGet test_01037.dict (3,3) House 314159 -dictGet test_01037.dict (5,6) Click 42 -dictGet test_01037.dict (7.01,7.01) qqq 101 -dictGetOrDefault test_01037.dict (-100,-42) www 1234 -dictGetOrDefault test_01037.dict (-1,0) Click South 423 -dictGetOrDefault test_01037.dict (-0.1,0) Click South 423 -dictGetOrDefault test_01037.dict (0,-2) Click West 424 -dictGetOrDefault test_01037.dict (0,-1.1) Click West 424 -dictGetOrDefault test_01037.dict (0,1.1) Click North 422 -dictGetOrDefault test_01037.dict (0,2) Click North 422 -dictGetOrDefault test_01037.dict (0.1,0) Click East 421 -dictGetOrDefault test_01037.dict (0.99,2.99) Click North 422 -dictGetOrDefault test_01037.dict (1,0) Click East 421 -dictGetOrDefault test_01037.dict (3,3) House 314159 -dictGetOrDefault test_01037.dict (5,6) Click 42 -dictGetOrDefault test_01037.dict (7.01,7.01) www 1234 -dictGetOrDefault test_01037.dict (-100,-42) dd 44 -dictGetOrDefault test_01037.dict (-1,0) Click South 423 -dictGetOrDefault test_01037.dict (-0.1,0) Click South 423 -dictGetOrDefault test_01037.dict (0,-2) Click West 424 -dictGetOrDefault test_01037.dict (0,-1.1) Click West 424 -dictGetOrDefault test_01037.dict (0,1.1) Click North 422 -dictGetOrDefault test_01037.dict (0,2) Click North 422 -dictGetOrDefault test_01037.dict (0.1,0) Click East 421 -dictGetOrDefault test_01037.dict (0.99,2.99) Click North 422 -dictGetOrDefault test_01037.dict (1,0) Click East 421 -dictGetOrDefault test_01037.dict (3,3) House 314159 -dictGetOrDefault test_01037.dict (5,6) Click 42 -dictGetOrDefault test_01037.dict (7.01,7.01) ee 55 -dictHas test_01037.dict (-100,-42) 0 -dictHas test_01037.dict (-1,0) 1 -dictHas test_01037.dict (-0.1,0) 1 -dictHas test_01037.dict (0,-2) 1 -dictHas test_01037.dict (0,-1.1) 1 -dictHas test_01037.dict (0,-1) 1 -dictHas test_01037.dict (0,0) 1 -dictHas test_01037.dict (0,1) 1 -dictHas test_01037.dict (0,1.1) 1 -dictHas test_01037.dict (0,2) 1 -dictHas test_01037.dict (0.1,0) 1 -dictHas test_01037.dict (0.99,2.99) 1 -dictHas test_01037.dict (1,0) 1 -dictHas test_01037.dict (1,1) 1 -dictHas test_01037.dict (1,3) 1 -dictHas test_01037.dict (3,3) 1 -dictHas test_01037.dict (5,1) 1 -dictHas test_01037.dict (5,5) 1 -dictHas test_01037.dict (5,6) 1 -dictHas test_01037.dict (7.01,7.01) 0 diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict.sql deleted file mode 100644 index 7b247737f80..00000000000 --- a/dbms/tests/queries/0_stateless/01037_polygon_dict.sql +++ /dev/null @@ -1,70 +0,0 @@ -SET send_logs_level = 'none'; - -DROP DATABASE IF EXISTS test_01037; - -CREATE DATABASE test_01037 Engine = Ordinary; - -DROP DICTIONARY IF EXISTS test_01037.dict; -DROP TABLE IF EXISTS test_01037.polygons; - -CREATE TABLE test_01037.polygons (key Array(Array(Array(Array(Float64)))), name String, value UInt64) ENGINE = Memory; -INSERT INTO test_01037.polygons VALUES ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], 'Click', 42); -INSERT INTO test_01037.polygons VALUES ([[[[5, 5], [5, -5], [-5, -5], [-5, 5]], [[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]]], 'House', 314159); -INSERT INTO test_01037.polygons VALUES ([[[[3, 1], [0, 1], [0, -1], [3, -1]]]], 'Click East', 421); -INSERT INTO test_01037.polygons VALUES ([[[[-1, 1], [1, 1], [1, 3], [-1, 3]]]], 'Click North', 422); -INSERT INTO test_01037.polygons VALUES ([[[[-3, 1], [-3, -1], [0, -1], [0, 1]]]], 'Click South', 423); -INSERT INTO test_01037.polygons VALUES ([[[[-1, -1], [1, -1], [1, -3], [-1, -3]]]], 'Click West', 424); - -CREATE DICTIONARY test_01037.dict -( - key Array(Array(Array(Array(Float64)))), - name String DEFAULT 'qqq', - value UInt64 DEFAULT 101 -) -PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'polygons' PASSWORD '' DB 'test_01037')) -LIFETIME(MIN 1 MAX 10) -LAYOUT(POLYGON()); - -DROP TABLE IF EXISTS test_01037.points; - -CREATE TABLE test_01037.points (x Float64, y Float64, def_i UInt64, def_s String) ENGINE = Memory; -INSERT INTO test_01037.points VALUES (0.1, 0.0, 112, 'aax'); -INSERT INTO test_01037.points VALUES (-0.1, 0.0, 113, 'aay'); -INSERT INTO test_01037.points VALUES (0.0, 1.1, 114, 'aaz'); -INSERT INTO test_01037.points VALUES (0.0, -1.1, 115, 'aat'); -INSERT INTO test_01037.points VALUES (3.0, 3.0, 22, 'bb'); -INSERT INTO test_01037.points VALUES (5.0, 6.0, 33, 'cc'); -INSERT INTO test_01037.points VALUES (-100.0, -42.0, 44, 'dd'); -INSERT INTO test_01037.points VALUES (7.01, 7.01, 55, 'ee') -INSERT INTO test_01037.points VALUES (0.99, 2.99, 66, 'ee'); -INSERT INTO test_01037.points VALUES (1.0, 0.0, 771, 'ffa'); -INSERT INTO test_01037.points VALUES (-1.0, 0.0, 772, 'ffb'); -INSERT INTO test_01037.points VALUES (0.0, 2.0, 773, 'ffc'); -INSERT INTO test_01037.points VALUES (0.0, -2.0, 774, 'ffd'); - -select 'dictGet', 'test_01037.dict' as dict_name, tuple(x, y) as key, - dictGet(dict_name, 'name', key), - dictGet(dict_name, 'value', key) from test_01037.points order by x, y; -select 'dictGetOrDefault', 'test_01037.dict' as dict_name, tuple(x, y) as key, - dictGetOrDefault(dict_name, 'name', key, 'www'), - dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points order by x, y; -select 'dictGetOrDefault', 'test_01037.dict' as dict_name, tuple(x, y) as key, - dictGetOrDefault(dict_name, 'name', key, def_s), - dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points order by x, y; - -INSERT INTO test_01037.points VALUES (5.0, 5.0, 0, ''); -INSERT INTO test_01037.points VALUES (5.0, 1.0, 0, ''); -INSERT INTO test_01037.points VALUES (1.0, 3.0, 0, ''); -INSERT INTO test_01037.points VALUES (0.0, 0.0, 0, ''); -INSERT INTO test_01037.points VALUES (0.0, 1.0, 0, ''); -INSERT INTO test_01037.points VALUES (0.0, -1.0, 0, ''); -INSERT INTO test_01037.points VALUES (1.0, 1.0, 0, ''); - -select 'dictHas', 'test_01037.dict' as dict_name, tuple(x, y) as key, - dictHas(dict_name, key) from test_01037.points order by x, y; - -DROP DICTIONARY test_01037.dict; -DROP TABLE test_01037.polygons; -DROP TABLE test_01037.points; -DROP DATABASE test_01037; diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict_multi_polygons.reference b/dbms/tests/queries/0_stateless/01037_polygon_dict_multi_polygons.reference new file mode 100644 index 00000000000..80af8a52400 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict_multi_polygons.reference @@ -0,0 +1,118 @@ +dictGet test_01037.dict_array (-100,-42) qqq 101 +dictGet test_01037.dict_array (-1,0) Click South 423 +dictGet test_01037.dict_array (-0.1,0) Click South 423 +dictGet test_01037.dict_array (0,-2) Click West 424 +dictGet test_01037.dict_array (0,-1.1) Click West 424 +dictGet test_01037.dict_array (0,1.1) Click North 422 +dictGet test_01037.dict_array (0,2) Click North 422 +dictGet test_01037.dict_array (0.1,0) Click East 421 +dictGet test_01037.dict_array (0.99,2.99) Click North 422 +dictGet test_01037.dict_array (1,0) Click East 421 +dictGet test_01037.dict_array (3,3) House 314159 +dictGet test_01037.dict_array (5,6) Click 42 +dictGet test_01037.dict_array (7.01,7.01) qqq 101 +dictGetOrDefault test_01037.dict_array (-100,-42) www 1234 +dictGetOrDefault test_01037.dict_array (-1,0) Click South 423 +dictGetOrDefault test_01037.dict_array (-0.1,0) Click South 423 +dictGetOrDefault test_01037.dict_array (0,-2) Click West 424 +dictGetOrDefault test_01037.dict_array (0,-1.1) Click West 424 +dictGetOrDefault test_01037.dict_array (0,1.1) Click North 422 +dictGetOrDefault test_01037.dict_array (0,2) Click North 422 +dictGetOrDefault test_01037.dict_array (0.1,0) Click East 421 +dictGetOrDefault test_01037.dict_array (0.99,2.99) Click North 422 +dictGetOrDefault test_01037.dict_array (1,0) Click East 421 +dictGetOrDefault test_01037.dict_array (3,3) House 314159 +dictGetOrDefault test_01037.dict_array (5,6) Click 42 +dictGetOrDefault test_01037.dict_array (7.01,7.01) www 1234 +dictGetOrDefault test_01037.dict_array (-100,-42) dd 44 +dictGetOrDefault test_01037.dict_array (-1,0) Click South 423 +dictGetOrDefault test_01037.dict_array (-0.1,0) Click South 423 +dictGetOrDefault test_01037.dict_array (0,-2) Click West 424 +dictGetOrDefault test_01037.dict_array (0,-1.1) Click West 424 +dictGetOrDefault test_01037.dict_array (0,1.1) Click North 422 +dictGetOrDefault test_01037.dict_array (0,2) Click North 422 +dictGetOrDefault test_01037.dict_array (0.1,0) Click East 421 +dictGetOrDefault test_01037.dict_array (0.99,2.99) Click North 422 +dictGetOrDefault test_01037.dict_array (1,0) Click East 421 +dictGetOrDefault test_01037.dict_array (3,3) House 314159 +dictGetOrDefault test_01037.dict_array (5,6) Click 42 +dictGetOrDefault test_01037.dict_array (7.01,7.01) ee 55 +dictGet test_01037.dict_tuple (-100,-42) qqq 101 +dictGet test_01037.dict_tuple (-1,0) Click South 423 +dictGet test_01037.dict_tuple (-0.1,0) Click South 423 +dictGet test_01037.dict_tuple (0,-2) Click West 424 +dictGet test_01037.dict_tuple (0,-1.1) Click West 424 +dictGet test_01037.dict_tuple (0,1.1) Click North 422 +dictGet test_01037.dict_tuple (0,2) Click North 422 +dictGet test_01037.dict_tuple (0.1,0) Click East 421 +dictGet test_01037.dict_tuple (0.99,2.99) Click North 422 +dictGet test_01037.dict_tuple (1,0) Click East 421 +dictGet test_01037.dict_tuple (3,3) House 314159 +dictGet test_01037.dict_tuple (5,6) Click 42 +dictGet test_01037.dict_tuple (7.01,7.01) qqq 101 +dictGetOrDefault test_01037.dict_tuple (-100,-42) www 1234 +dictGetOrDefault test_01037.dict_tuple (-1,0) Click South 423 +dictGetOrDefault test_01037.dict_tuple (-0.1,0) Click South 423 +dictGetOrDefault test_01037.dict_tuple (0,-2) Click West 424 +dictGetOrDefault test_01037.dict_tuple (0,-1.1) Click West 424 +dictGetOrDefault test_01037.dict_tuple (0,1.1) Click North 422 +dictGetOrDefault test_01037.dict_tuple (0,2) Click North 422 +dictGetOrDefault test_01037.dict_tuple (0.1,0) Click East 421 +dictGetOrDefault test_01037.dict_tuple (0.99,2.99) Click North 422 +dictGetOrDefault test_01037.dict_tuple (1,0) Click East 421 +dictGetOrDefault test_01037.dict_tuple (3,3) House 314159 +dictGetOrDefault test_01037.dict_tuple (5,6) Click 42 +dictGetOrDefault test_01037.dict_tuple (7.01,7.01) www 1234 +dictGetOrDefault test_01037.dict_tuple (-100,-42) dd 44 +dictGetOrDefault test_01037.dict_tuple (-1,0) Click South 423 +dictGetOrDefault test_01037.dict_tuple (-0.1,0) Click South 423 +dictGetOrDefault test_01037.dict_tuple (0,-2) Click West 424 +dictGetOrDefault test_01037.dict_tuple (0,-1.1) Click West 424 +dictGetOrDefault test_01037.dict_tuple (0,1.1) Click North 422 +dictGetOrDefault test_01037.dict_tuple (0,2) Click North 422 +dictGetOrDefault test_01037.dict_tuple (0.1,0) Click East 421 +dictGetOrDefault test_01037.dict_tuple (0.99,2.99) Click North 422 +dictGetOrDefault test_01037.dict_tuple (1,0) Click East 421 +dictGetOrDefault test_01037.dict_tuple (3,3) House 314159 +dictGetOrDefault test_01037.dict_tuple (5,6) Click 42 +dictGetOrDefault test_01037.dict_tuple (7.01,7.01) ee 55 +dictHas test_01037.dict_array (-100,-42) 0 +dictHas test_01037.dict_array (-1,0) 1 +dictHas test_01037.dict_array (-0.1,0) 1 +dictHas test_01037.dict_array (0,-2) 1 +dictHas test_01037.dict_array (0,-1.1) 1 +dictHas test_01037.dict_array (0,-1) 1 +dictHas test_01037.dict_array (0,0) 1 +dictHas test_01037.dict_array (0,1) 1 +dictHas test_01037.dict_array (0,1.1) 1 +dictHas test_01037.dict_array (0,2) 1 +dictHas test_01037.dict_array (0.1,0) 1 +dictHas test_01037.dict_array (0.99,2.99) 1 +dictHas test_01037.dict_array (1,0) 1 +dictHas test_01037.dict_array (1,1) 1 +dictHas test_01037.dict_array (1,3) 1 +dictHas test_01037.dict_array (3,3) 1 +dictHas test_01037.dict_array (5,1) 1 +dictHas test_01037.dict_array (5,5) 1 +dictHas test_01037.dict_array (5,6) 1 +dictHas test_01037.dict_array (7.01,7.01) 0 +dictHas test_01037.dict_tuple (-100,-42) 0 +dictHas test_01037.dict_tuple (-1,0) 1 +dictHas test_01037.dict_tuple (-0.1,0) 1 +dictHas test_01037.dict_tuple (0,-2) 1 +dictHas test_01037.dict_tuple (0,-1.1) 1 +dictHas test_01037.dict_tuple (0,-1) 1 +dictHas test_01037.dict_tuple (0,0) 1 +dictHas test_01037.dict_tuple (0,1) 1 +dictHas test_01037.dict_tuple (0,1.1) 1 +dictHas test_01037.dict_tuple (0,2) 1 +dictHas test_01037.dict_tuple (0.1,0) 1 +dictHas test_01037.dict_tuple (0.99,2.99) 1 +dictHas test_01037.dict_tuple (1,0) 1 +dictHas test_01037.dict_tuple (1,1) 1 +dictHas test_01037.dict_tuple (1,3) 1 +dictHas test_01037.dict_tuple (3,3) 1 +dictHas test_01037.dict_tuple (5,1) 1 +dictHas test_01037.dict_tuple (5,5) 1 +dictHas test_01037.dict_tuple (5,6) 1 +dictHas test_01037.dict_tuple (7.01,7.01) 0 diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict_multi_polygons.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict_multi_polygons.sql new file mode 100644 index 00000000000..4c446c28b51 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict_multi_polygons.sql @@ -0,0 +1,107 @@ +SET send_logs_level = 'none'; + +DROP DATABASE IF EXISTS test_01037; + +CREATE DATABASE test_01037 Engine = Ordinary; + +DROP DICTIONARY IF EXISTS test_01037.dict_array; +DROP TABLE IF EXISTS test_01037.polygons_array; + +CREATE TABLE test_01037.polygons_array (key Array(Array(Array(Array(Float64)))), name String, value UInt64) ENGINE = Memory; +INSERT INTO test_01037.polygons_array VALUES ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], 'Click', 42); +INSERT INTO test_01037.polygons_array VALUES ([[[[5, 5], [5, -5], [-5, -5], [-5, 5]], [[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]]], 'House', 314159); +INSERT INTO test_01037.polygons_array VALUES ([[[[3, 1], [0, 1], [0, -1], [3, -1]]]], 'Click East', 421); +INSERT INTO test_01037.polygons_array VALUES ([[[[-1, 1], [1, 1], [1, 3], [-1, 3]]]], 'Click North', 422); +INSERT INTO test_01037.polygons_array VALUES ([[[[-3, 1], [-3, -1], [0, -1], [0, 1]]]], 'Click South', 423); +INSERT INTO test_01037.polygons_array VALUES ([[[[-1, -1], [1, -1], [1, -3], [-1, -3]]]], 'Click West', 424); + +CREATE DICTIONARY test_01037.dict_array +( + key Array(Array(Array(Array(Float64)))), + name String DEFAULT 'qqq', + value UInt64 DEFAULT 101 +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'polygons_array' PASSWORD '' DB 'test_01037')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(POLYGON()); + +DROP DICTIONARY IF EXISTS test_01037.dict_tuple; +DROP TABLE IF EXISTS test_01037.polygons_tuple; + +CREATE TABLE test_01037.polygons_tuple (key Array(Array(Array(Tuple(Float64, Float64)))), name String, value UInt64) ENGINE = Memory; +INSERT INTO test_01037.polygons_tuple VALUES ([[[(1, 3), (1, 1), (3, 1), (3, -1), (1, -1), (1, -3), (-1, -3), (-1, -1), (-3, -1), (-3, 1), (-1, 1), (-1, 3)]], [[(5, 5), (5, 1), (7, 1), (7, 7), (1, 7), (1, 5)]]], 'Click', 42); +INSERT INTO test_01037.polygons_tuple VALUES ([[[(5, 5), (5, -5), (-5, -5), (-5, 5)], [(1, 3), (1, 1), (3, 1), (3, -1), (1, -1), (1, -3), (-1, -3), (-1, -1), (-3, -1), (-3, 1), (-1, 1), (-1, 3)]]], 'House', 314159); +INSERT INTO test_01037.polygons_tuple VALUES ([[[(3, 1), (0, 1), (0, -1), (3, -1)]]], 'Click East', 421); +INSERT INTO test_01037.polygons_tuple VALUES ([[[(-1, 1), (1, 1), (1, 3), (-1, 3)]]], 'Click North', 422); +INSERT INTO test_01037.polygons_tuple VALUES ([[[(-3, 1), (-3, -1), (0, -1), (0, 1)]]], 'Click South', 423); +INSERT INTO test_01037.polygons_tuple VALUES ([[[(-1, -1), (1, -1), (1, -3), (-1, -3)]]], 'Click West', 424); + +CREATE DICTIONARY test_01037.dict_tuple +( + key Array(Array(Array(Tuple(Float64, Float64)))), + name String DEFAULT 'qqq', + value UInt64 DEFAULT 101 +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'polygons_tuple' PASSWORD '' DB 'test_01037')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(POLYGON()); + +DROP TABLE IF EXISTS test_01037.points; + +CREATE TABLE test_01037.points (x Float64, y Float64, def_i UInt64, def_s String) ENGINE = Memory; +INSERT INTO test_01037.points VALUES (0.1, 0.0, 112, 'aax'); +INSERT INTO test_01037.points VALUES (-0.1, 0.0, 113, 'aay'); +INSERT INTO test_01037.points VALUES (0.0, 1.1, 114, 'aaz'); +INSERT INTO test_01037.points VALUES (0.0, -1.1, 115, 'aat'); +INSERT INTO test_01037.points VALUES (3.0, 3.0, 22, 'bb'); +INSERT INTO test_01037.points VALUES (5.0, 6.0, 33, 'cc'); +INSERT INTO test_01037.points VALUES (-100.0, -42.0, 44, 'dd'); +INSERT INTO test_01037.points VALUES (7.01, 7.01, 55, 'ee') +INSERT INTO test_01037.points VALUES (0.99, 2.99, 66, 'ee'); +INSERT INTO test_01037.points VALUES (1.0, 0.0, 771, 'ffa'); +INSERT INTO test_01037.points VALUES (-1.0, 0.0, 772, 'ffb'); +INSERT INTO test_01037.points VALUES (0.0, 2.0, 773, 'ffc'); +INSERT INTO test_01037.points VALUES (0.0, -2.0, 774, 'ffd'); + +select 'dictGet', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, + dictGet(dict_name, 'name', key), + dictGet(dict_name, 'value', key) from test_01037.points order by x, y; +select 'dictGetOrDefault', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, + dictGetOrDefault(dict_name, 'name', key, 'www'), + dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points order by x, y; +select 'dictGetOrDefault', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, + dictGetOrDefault(dict_name, 'name', key, def_s), + dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points order by x, y; + +select 'dictGet', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, + dictGet(dict_name, 'name', key), + dictGet(dict_name, 'value', key) from test_01037.points order by x, y; +select 'dictGetOrDefault', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, + dictGetOrDefault(dict_name, 'name', key, 'www'), + dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points order by x, y; +select 'dictGetOrDefault', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, + dictGetOrDefault(dict_name, 'name', key, def_s), + dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points order by x, y; + +INSERT INTO test_01037.points VALUES (5.0, 5.0, 0, ''); +INSERT INTO test_01037.points VALUES (5.0, 1.0, 0, ''); +INSERT INTO test_01037.points VALUES (1.0, 3.0, 0, ''); +INSERT INTO test_01037.points VALUES (0.0, 0.0, 0, ''); +INSERT INTO test_01037.points VALUES (0.0, 1.0, 0, ''); +INSERT INTO test_01037.points VALUES (0.0, -1.0, 0, ''); +INSERT INTO test_01037.points VALUES (1.0, 1.0, 0, ''); + +select 'dictHas', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, + dictHas(dict_name, key) from test_01037.points order by x, y; + +select 'dictHas', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, + dictHas(dict_name, key) from test_01037.points order by x, y; + +DROP DICTIONARY test_01037.dict_array; +DROP DICTIONARY test_01037.dict_tuple; +DROP TABLE test_01037.polygons_array; +DROP TABLE test_01037.polygons_tuple; +DROP TABLE test_01037.points; +DROP DATABASE test_01037; diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict_simple_polygons.reference b/dbms/tests/queries/0_stateless/01037_polygon_dict_simple_polygons.reference new file mode 100644 index 00000000000..bc40521f692 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict_simple_polygons.reference @@ -0,0 +1,142 @@ +dictGet test_01037.dict_array (-100,-42) qqq 101 +dictGet test_01037.dict_array (-1,0) Click South 423 +dictGet test_01037.dict_array (-0.1,0) Click South 423 +dictGet test_01037.dict_array (0,-2) Click West 424 +dictGet test_01037.dict_array (0,-1.1) Click West 424 +dictGet test_01037.dict_array (0,1.1) Click North 422 +dictGet test_01037.dict_array (0,2) Click North 422 +dictGet test_01037.dict_array (0.1,0) Click East 421 +dictGet test_01037.dict_array (0.99,2.99) Click North 422 +dictGet test_01037.dict_array (1,0) Click East 421 +dictGet test_01037.dict_array (2,4) House 523 +dictGet test_01037.dict_array (2,4.1) qqq 101 +dictGet test_01037.dict_array (3,3) House 523 +dictGet test_01037.dict_array (4,4) House 523 +dictGet test_01037.dict_array (5,6) qqq 101 +dictGet test_01037.dict_array (7.01,7.01) qqq 101 +dictGetOrDefault test_01037.dict_array (-100,-42) www 1234 +dictGetOrDefault test_01037.dict_array (-1,0) Click South 423 +dictGetOrDefault test_01037.dict_array (-0.1,0) Click South 423 +dictGetOrDefault test_01037.dict_array (0,-2) Click West 424 +dictGetOrDefault test_01037.dict_array (0,-1.1) Click West 424 +dictGetOrDefault test_01037.dict_array (0,1.1) Click North 422 +dictGetOrDefault test_01037.dict_array (0,2) Click North 422 +dictGetOrDefault test_01037.dict_array (0.1,0) Click East 421 +dictGetOrDefault test_01037.dict_array (0.99,2.99) Click North 422 +dictGetOrDefault test_01037.dict_array (1,0) Click East 421 +dictGetOrDefault test_01037.dict_array (2,4) House 523 +dictGetOrDefault test_01037.dict_array (2,4.1) www 1234 +dictGetOrDefault test_01037.dict_array (3,3) House 523 +dictGetOrDefault test_01037.dict_array (4,4) House 523 +dictGetOrDefault test_01037.dict_array (5,6) www 1234 +dictGetOrDefault test_01037.dict_array (7.01,7.01) www 1234 +dictGetOrDefault test_01037.dict_array (-100,-42) dd 44 +dictGetOrDefault test_01037.dict_array (-1,0) Click South 423 +dictGetOrDefault test_01037.dict_array (-0.1,0) Click South 423 +dictGetOrDefault test_01037.dict_array (0,-2) Click West 424 +dictGetOrDefault test_01037.dict_array (0,-1.1) Click West 424 +dictGetOrDefault test_01037.dict_array (0,1.1) Click North 422 +dictGetOrDefault test_01037.dict_array (0,2) Click North 422 +dictGetOrDefault test_01037.dict_array (0.1,0) Click East 421 +dictGetOrDefault test_01037.dict_array (0.99,2.99) Click North 422 +dictGetOrDefault test_01037.dict_array (1,0) Click East 421 +dictGetOrDefault test_01037.dict_array (2,4) House 523 +dictGetOrDefault test_01037.dict_array (2,4.1) gac 803 +dictGetOrDefault test_01037.dict_array (3,3) House 523 +dictGetOrDefault test_01037.dict_array (4,4) House 523 +dictGetOrDefault test_01037.dict_array (5,6) cc 33 +dictGetOrDefault test_01037.dict_array (7.01,7.01) ee 55 +dictGet test_01037.dict_tuple (-100,-42) qqq 101 +dictGet test_01037.dict_tuple (-1,0) Click South 423 +dictGet test_01037.dict_tuple (-0.1,0) Click South 423 +dictGet test_01037.dict_tuple (0,-2) Click West 424 +dictGet test_01037.dict_tuple (0,-1.1) Click West 424 +dictGet test_01037.dict_tuple (0,1.1) Click North 422 +dictGet test_01037.dict_tuple (0,2) Click North 422 +dictGet test_01037.dict_tuple (0.1,0) Click East 421 +dictGet test_01037.dict_tuple (0.99,2.99) Click North 422 +dictGet test_01037.dict_tuple (1,0) Click East 421 +dictGet test_01037.dict_tuple (2,4) House 523 +dictGet test_01037.dict_tuple (2,4.1) qqq 101 +dictGet test_01037.dict_tuple (3,3) House 523 +dictGet test_01037.dict_tuple (4,4) House 523 +dictGet test_01037.dict_tuple (5,6) qqq 101 +dictGet test_01037.dict_tuple (7.01,7.01) qqq 101 +dictGetOrDefault test_01037.dict_tuple (-100,-42) www 1234 +dictGetOrDefault test_01037.dict_tuple (-1,0) Click South 423 +dictGetOrDefault test_01037.dict_tuple (-0.1,0) Click South 423 +dictGetOrDefault test_01037.dict_tuple (0,-2) Click West 424 +dictGetOrDefault test_01037.dict_tuple (0,-1.1) Click West 424 +dictGetOrDefault test_01037.dict_tuple (0,1.1) Click North 422 +dictGetOrDefault test_01037.dict_tuple (0,2) Click North 422 +dictGetOrDefault test_01037.dict_tuple (0.1,0) Click East 421 +dictGetOrDefault test_01037.dict_tuple (0.99,2.99) Click North 422 +dictGetOrDefault test_01037.dict_tuple (1,0) Click East 421 +dictGetOrDefault test_01037.dict_tuple (2,4) House 523 +dictGetOrDefault test_01037.dict_tuple (2,4.1) www 1234 +dictGetOrDefault test_01037.dict_tuple (3,3) House 523 +dictGetOrDefault test_01037.dict_tuple (4,4) House 523 +dictGetOrDefault test_01037.dict_tuple (5,6) www 1234 +dictGetOrDefault test_01037.dict_tuple (7.01,7.01) www 1234 +dictGetOrDefault test_01037.dict_tuple (-100,-42) dd 44 +dictGetOrDefault test_01037.dict_tuple (-1,0) Click South 423 +dictGetOrDefault test_01037.dict_tuple (-0.1,0) Click South 423 +dictGetOrDefault test_01037.dict_tuple (0,-2) Click West 424 +dictGetOrDefault test_01037.dict_tuple (0,-1.1) Click West 424 +dictGetOrDefault test_01037.dict_tuple (0,1.1) Click North 422 +dictGetOrDefault test_01037.dict_tuple (0,2) Click North 422 +dictGetOrDefault test_01037.dict_tuple (0.1,0) Click East 421 +dictGetOrDefault test_01037.dict_tuple (0.99,2.99) Click North 422 +dictGetOrDefault test_01037.dict_tuple (1,0) Click East 421 +dictGetOrDefault test_01037.dict_tuple (2,4) House 523 +dictGetOrDefault test_01037.dict_tuple (2,4.1) gac 803 +dictGetOrDefault test_01037.dict_tuple (3,3) House 523 +dictGetOrDefault test_01037.dict_tuple (4,4) House 523 +dictGetOrDefault test_01037.dict_tuple (5,6) cc 33 +dictGetOrDefault test_01037.dict_tuple (7.01,7.01) ee 55 +dictHas test_01037.dict_array (-100,-42) 0 +dictHas test_01037.dict_array (-1,0) 1 +dictHas test_01037.dict_array (-0.1,0) 1 +dictHas test_01037.dict_array (0,-2) 1 +dictHas test_01037.dict_array (0,-1.1) 1 +dictHas test_01037.dict_array (0,-1) 1 +dictHas test_01037.dict_array (0,0) 1 +dictHas test_01037.dict_array (0,1) 1 +dictHas test_01037.dict_array (0,1.1) 1 +dictHas test_01037.dict_array (0,2) 1 +dictHas test_01037.dict_array (0.1,0) 1 +dictHas test_01037.dict_array (0.99,2.99) 1 +dictHas test_01037.dict_array (1,0) 1 +dictHas test_01037.dict_array (1,1) 1 +dictHas test_01037.dict_array (1,3) 1 +dictHas test_01037.dict_array (2,4) 1 +dictHas test_01037.dict_array (2,4.1) 0 +dictHas test_01037.dict_array (3,3) 1 +dictHas test_01037.dict_array (4,4) 1 +dictHas test_01037.dict_array (5,1) 1 +dictHas test_01037.dict_array (5,5) 1 +dictHas test_01037.dict_array (5,6) 0 +dictHas test_01037.dict_array (7.01,7.01) 0 +dictHas test_01037.dict_tuple (-100,-42) 0 +dictHas test_01037.dict_tuple (-1,0) 1 +dictHas test_01037.dict_tuple (-0.1,0) 1 +dictHas test_01037.dict_tuple (0,-2) 1 +dictHas test_01037.dict_tuple (0,-1.1) 1 +dictHas test_01037.dict_tuple (0,-1) 1 +dictHas test_01037.dict_tuple (0,0) 1 +dictHas test_01037.dict_tuple (0,1) 1 +dictHas test_01037.dict_tuple (0,1.1) 1 +dictHas test_01037.dict_tuple (0,2) 1 +dictHas test_01037.dict_tuple (0.1,0) 1 +dictHas test_01037.dict_tuple (0.99,2.99) 1 +dictHas test_01037.dict_tuple (1,0) 1 +dictHas test_01037.dict_tuple (1,1) 1 +dictHas test_01037.dict_tuple (1,3) 1 +dictHas test_01037.dict_tuple (2,4) 1 +dictHas test_01037.dict_tuple (2,4.1) 0 +dictHas test_01037.dict_tuple (3,3) 1 +dictHas test_01037.dict_tuple (4,4) 1 +dictHas test_01037.dict_tuple (5,1) 1 +dictHas test_01037.dict_tuple (5,5) 1 +dictHas test_01037.dict_tuple (5,6) 0 +dictHas test_01037.dict_tuple (7.01,7.01) 0 diff --git a/dbms/tests/queries/0_stateless/01037_polygon_dict_simple_polygons.sql b/dbms/tests/queries/0_stateless/01037_polygon_dict_simple_polygons.sql new file mode 100644 index 00000000000..56e5865a5a1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01037_polygon_dict_simple_polygons.sql @@ -0,0 +1,108 @@ +SET send_logs_level = 'none'; + +DROP DATABASE IF EXISTS test_01037; + +CREATE DATABASE test_01037 Engine = Ordinary; + +DROP DICTIONARY IF EXISTS test_01037.dict_array; +DROP TABLE IF EXISTS test_01037.polygons_array; + +CREATE TABLE test_01037.polygons_array (key Array(Array(Float64)), name String, value UInt64) ENGINE = Memory; +INSERT INTO test_01037.polygons_array VALUES ([[3, 1], [0, 1], [0, -1], [3, -1]], 'Click East', 421); +INSERT INTO test_01037.polygons_array VALUES ([[-1, 1], [1, 1], [1, 3], [-1, 3]], 'Click North', 422); +INSERT INTO test_01037.polygons_array VALUES ([[-3, 1], [-3, -1], [0, -1], [0, 1]], 'Click South', 423); +INSERT INTO test_01037.polygons_array VALUES ([[-1, -1], [1, -1], [1, -3], [-1, -3]], 'Click West', 424); +INSERT INTO test_01037.polygons_array VALUES ([[1, 1], [1, 3], [3, 5], [5, 5], [5, 1]], 'House', 523); + +CREATE DICTIONARY test_01037.dict_array +( + key Array(Array(Float64)), + name String DEFAULT 'qqq', + value UInt64 DEFAULT 101 +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'polygons_array' PASSWORD '' DB 'test_01037')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(POLYGON()); + +DROP DICTIONARY IF EXISTS test_01037.dict_tuple; +DROP TABLE IF EXISTS test_01037.polygons_tuple; + +CREATE TABLE test_01037.polygons_tuple (key Array(Tuple(Float64, Float64)), name String, value UInt64) ENGINE = Memory; +INSERT INTO test_01037.polygons_tuple VALUES ([(3.0, 1.0), (0.0, 1.0), (0.0, -1.0), (3.0, -1.0)], 'Click East', 421); +INSERT INTO test_01037.polygons_tuple VALUES ([(-1, 1), (1, 1), (1, 3), (-1, 3)], 'Click North', 422); +INSERT INTO test_01037.polygons_tuple VALUES ([(-3, 1), (-3, -1), (0, -1), (0, 1)], 'Click South', 423); +INSERT INTO test_01037.polygons_tuple VALUES ([(-1, -1), (1, -1), (1, -3), (-1, -3)], 'Click West', 424); +INSERT INTO test_01037.polygons_tuple VALUES ([(1, 1), (1, 3), (3, 5), (5, 5), (5, 1)], 'House', 523); + +CREATE DICTIONARY test_01037.dict_tuple +( + key Array(Tuple(Float64, Float64)), + name String DEFAULT 'qqq', + value UInt64 DEFAULT 101 +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'polygons_tuple' PASSWORD '' DB 'test_01037')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(POLYGON()); + +DROP TABLE IF EXISTS test_01037.points; + +CREATE TABLE test_01037.points (x Float64, y Float64, def_i UInt64, def_s String) ENGINE = Memory; +INSERT INTO test_01037.points VALUES (0.1, 0.0, 112, 'aax'); +INSERT INTO test_01037.points VALUES (-0.1, 0.0, 113, 'aay'); +INSERT INTO test_01037.points VALUES (0.0, 1.1, 114, 'aaz'); +INSERT INTO test_01037.points VALUES (0.0, -1.1, 115, 'aat'); +INSERT INTO test_01037.points VALUES (3.0, 3.0, 22, 'bb'); +INSERT INTO test_01037.points VALUES (5.0, 6.0, 33, 'cc'); +INSERT INTO test_01037.points VALUES (-100.0, -42.0, 44, 'dd'); +INSERT INTO test_01037.points VALUES (7.01, 7.01, 55, 'ee') +INSERT INTO test_01037.points VALUES (0.99, 2.99, 66, 'ee'); +INSERT INTO test_01037.points VALUES (1.0, 0.0, 771, 'ffa'); +INSERT INTO test_01037.points VALUES (-1.0, 0.0, 772, 'ffb'); +INSERT INTO test_01037.points VALUES (0.0, 2.0, 773, 'ffc'); +INSERT INTO test_01037.points VALUES (0.0, -2.0, 774, 'ffd'); +INSERT INTO test_01037.points VALUES (2.0, 4.0, 801, 'gaa') +INSERT INTO test_01037.points VALUES (4.0, 4.0, 802, 'gab') +INSERT INTO test_01037.points VALUES (2.0, 4.1, 803, 'gac') + +select 'dictGet', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, + dictGet(dict_name, 'name', key), + dictGet(dict_name, 'value', key) from test_01037.points order by x, y; +select 'dictGetOrDefault', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, + dictGetOrDefault(dict_name, 'name', key, 'www'), + dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points order by x, y; +select 'dictGetOrDefault', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, + dictGetOrDefault(dict_name, 'name', key, def_s), + dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points order by x, y; + +select 'dictGet', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, + dictGet(dict_name, 'name', key), + dictGet(dict_name, 'value', key) from test_01037.points order by x, y; +select 'dictGetOrDefault', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, + dictGetOrDefault(dict_name, 'name', key, 'www'), + dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points order by x, y; +select 'dictGetOrDefault', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, + dictGetOrDefault(dict_name, 'name', key, def_s), + dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points order by x, y; + +INSERT INTO test_01037.points VALUES (5.0, 5.0, 0, ''); +INSERT INTO test_01037.points VALUES (5.0, 1.0, 0, ''); +INSERT INTO test_01037.points VALUES (1.0, 3.0, 0, ''); +INSERT INTO test_01037.points VALUES (0.0, 0.0, 0, ''); +INSERT INTO test_01037.points VALUES (0.0, 1.0, 0, ''); +INSERT INTO test_01037.points VALUES (0.0, -1.0, 0, ''); +INSERT INTO test_01037.points VALUES (1.0, 1.0, 0, ''); + +select 'dictHas', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, + dictHas(dict_name, key) from test_01037.points order by x, y; + +select 'dictHas', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, + dictHas(dict_name, key) from test_01037.points order by x, y; + +DROP DICTIONARY test_01037.dict_array; +DROP DICTIONARY test_01037.dict_tuple; +DROP TABLE test_01037.polygons_array; +DROP TABLE test_01037.polygons_tuple; +DROP TABLE test_01037.points; +DROP DATABASE test_01037; From 34eee12d478740fe7ede94cd40ea0477d56c6485 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Tue, 28 Jan 2020 17:21:02 +0300 Subject: [PATCH 71/76] Fix style check --- dbms/src/Dictionaries/PolygonDictionary.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index e0ddb448f45..510678fa511 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -461,7 +461,8 @@ struct Data std::vector & dest; std::vector & ids; - void addPolygon(bool new_multi_polygon = false) { + void addPolygon(bool new_multi_polygon = false) + { dest.emplace_back(); ids.push_back((ids.empty() ? 0 : ids.back() + new_multi_polygon)); } From 07699df1e8a24ae1b73254a5236a88054bfe6947 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Thu, 30 Jan 2020 17:59:16 +0300 Subject: [PATCH 72/76] Address comments --- dbms/src/Dictionaries/PolygonDictionary.cpp | 29 ++++++++++++++------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index 510678fa511..ef2881f883f 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include #include "PolygonDictionary.h" #include "DictionaryBlockInputStream.h" #include "DictionaryFactory.h" @@ -703,38 +705,47 @@ void registerDictionaryPolygon(DictionaryFactory & factory) if (dict_struct.key->size() != 1) throw Exception{"The 'key' should consist of a single attribute for a dictionary of layout 'polygon'", ErrorCodes::BAD_ARGUMENTS}; - // TODO: Once arrays are fully supported this should be changed to a more reasonable check. - const auto type_name = (*dict_struct.key)[0].type->getName(); IPolygonDictionary::InputType input_type; IPolygonDictionary::PointType point_type; - if (type_name == "Array(Array(Array(Array(Float64))))") + const auto key_type = (*dict_struct.key)[0].type; + const auto f64 = std::make_shared(); + const auto multi_polygon_array = DataTypeArray(DataTypeArray(DataTypeArray(DataTypeArray(f64)))); + const auto multi_polygon_tuple = DataTypeArray(DataTypeArray(DataTypeArray(std::make_shared(std::vector{f64, f64})))); + const auto simple_polygon_array = DataTypeArray(DataTypeArray(f64)); + const auto simple_polygon_tuple = DataTypeArray(std::make_shared(std::vector{f64, f64})); + if (key_type->equals(multi_polygon_array)) { input_type = IPolygonDictionary::InputType::MultiPolygon; point_type = IPolygonDictionary::PointType::Array; } - else if (type_name == "Array(Array(Array(Tuple(Float64, Float64))))") + else if (key_type->equals(multi_polygon_tuple)) { input_type = IPolygonDictionary::InputType::MultiPolygon; point_type = IPolygonDictionary::PointType::Tuple; } - else if (type_name == "Array(Array(Float64))") + else if (key_type->equals(simple_polygon_array)) { input_type = IPolygonDictionary::InputType::SimplePolygon; point_type = IPolygonDictionary::PointType::Array; } - else if (type_name == "Array(Tuple(Float64, Float64))") + else if (key_type->equals(simple_polygon_tuple)) { input_type = IPolygonDictionary::InputType::SimplePolygon; point_type = IPolygonDictionary::PointType::Tuple; } else - throw Exception{"The key type is not one of the allowed types for a dictionary of layout 'polygon'", + throw Exception{"The key type " + key_type->getName() + + " is not one of the following allowed types for a dictionary of layout 'polygon': " + + multi_polygon_array.getName() + " " + + multi_polygon_tuple.getName() + " " + + simple_polygon_array.getName() + " " + + simple_polygon_tuple.getName() + " ", ErrorCodes::BAD_ARGUMENTS}; if (dict_struct.range_min || dict_struct.range_max) throw Exception{name - + ": elements .structure.range_min and .structure.range_max should be defined only " - "for a dictionary of layout 'polygon'", + + ": elements range_min and range_max should be defined only " + "for a dictionary of layout 'range_hashed'", ErrorCodes::BAD_ARGUMENTS}; const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; From 68cf5104e17d060229b32b738e1f07cf277b9478 Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Thu, 30 Jan 2020 18:17:20 +0300 Subject: [PATCH 73/76] Fix types --- dbms/src/Dictionaries/PolygonDictionary.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index ef2881f883f..d820b70f25a 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -709,9 +709,9 @@ void registerDictionaryPolygon(DictionaryFactory & factory) IPolygonDictionary::PointType point_type; const auto key_type = (*dict_struct.key)[0].type; const auto f64 = std::make_shared(); - const auto multi_polygon_array = DataTypeArray(DataTypeArray(DataTypeArray(DataTypeArray(f64)))); - const auto multi_polygon_tuple = DataTypeArray(DataTypeArray(DataTypeArray(std::make_shared(std::vector{f64, f64})))); - const auto simple_polygon_array = DataTypeArray(DataTypeArray(f64)); + const auto multi_polygon_array = DataTypeArray(std::make_shared(std::make_shared(std::make_shared(f64)))); + const auto multi_polygon_tuple = DataTypeArray(std::make_shared(std::make_shared(std::make_shared(std::vector{f64, f64})))); + const auto simple_polygon_array = DataTypeArray(std::make_shared(f64)); const auto simple_polygon_tuple = DataTypeArray(std::make_shared(std::vector{f64, f64})); if (key_type->equals(multi_polygon_array)) { From 0482ab8eaa0ad6cf6258bdbf78b8ba3382defffa Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Thu, 30 Jan 2020 19:35:33 +0300 Subject: [PATCH 74/76] Address comment --- dbms/src/Dictionaries/PolygonDictionary.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index d820b70f25a..c7fa1a4b8a1 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -454,8 +454,14 @@ inline void makeDifferences(IColumn::Offsets & values) struct Offset { Offset() = default; - IColumn::Offsets ring_sizes, polygon_sizes, multi_polygon_sizes; - IColumn::Offset current_ring = 0, current_polygon = 0, current_multi_polygon = 0; + + IColumn::Offsets ring_sizes; + IColumn::Offsets polygon_sizes; + IColumn::Offsets multi_polygon_sizes; + + IColumn::Offset current_ring = 0; + IColumn::Offset current_polygon = 0; + IColumn::Offset current_multi_polygon = 0; }; struct Data From 1f97500756dcd2d72d820fb2736de32e1b0ef0da Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Thu, 30 Jan 2020 19:41:58 +0300 Subject: [PATCH 75/76] Address comment --- dbms/src/Dictionaries/PolygonDictionary.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index c7fa1a4b8a1..e7d4e8a625b 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -581,10 +581,12 @@ void getPointsReprByArrays(const IColumn * column, std::vectorgetOffsets(); + IColumn::Offset prev_offset = 0; for (size_t i = 0; i < offsets.size(); ++i) { - if (offsets[i] - (i == 0 ? 0 : offsets[i - 1]) != 2) + if (offsets[i] - prev_offset != 2) throw Exception{"All points should be two-dimensional", ErrorCodes::BAD_ARGUMENTS}; + prev_offset = offsets[i]; dest.emplace_back(ptr_coord->getElement(2 * i), ptr_coord->getElement(2 * i + 1)); } } From 0ae0e03d52947ca4a8a7295016d9ef24402b698d Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Wed, 5 Feb 2020 19:38:03 +0300 Subject: [PATCH 76/76] Address latest comments --- dbms/src/Dictionaries/PolygonDictionary.cpp | 174 +++++++++----------- 1 file changed, 78 insertions(+), 96 deletions(-) diff --git a/dbms/src/Dictionaries/PolygonDictionary.cpp b/dbms/src/Dictionaries/PolygonDictionary.cpp index e7d4e8a625b..0c763e23db4 100644 --- a/dbms/src/Dictionaries/PolygonDictionary.cpp +++ b/dbms/src/Dictionaries/PolygonDictionary.cpp @@ -7,6 +7,8 @@ #include "DictionaryBlockInputStream.h" #include "DictionaryFactory.h" +#include + namespace DB { @@ -445,23 +447,52 @@ void IPolygonDictionary::getItemsImpl( namespace { -inline void makeDifferences(IColumn::Offsets & values) -{ - for (size_t i = 1; i < values.size(); ++i) - values[i] -= values[i - 1]; -} - struct Offset { Offset() = default; - IColumn::Offsets ring_sizes; - IColumn::Offsets polygon_sizes; - IColumn::Offsets multi_polygon_sizes; + IColumn::Offsets ring_offsets; + IColumn::Offsets polygon_offsets; + IColumn::Offsets multi_polygon_offsets; + IColumn::Offset points_added = 0; IColumn::Offset current_ring = 0; IColumn::Offset current_polygon = 0; IColumn::Offset current_multi_polygon = 0; + + Offset& operator++() + { + ++points_added; + if (points_added <= ring_offsets[current_ring]) + return *this; + + ++current_ring; + if (current_ring < polygon_offsets[current_polygon]) + return *this; + + ++current_polygon; + if (current_polygon < multi_polygon_offsets[current_multi_polygon]) + return *this; + + ++current_multi_polygon; + return *this; + } + + bool atLastPolygonOfMultiPolygon() { return current_polygon + 1 == multi_polygon_offsets[current_multi_polygon]; } + bool atLastRingOfPolygon() { return current_ring + 1 == polygon_offsets[current_polygon]; } + bool atLastPointOfRing() { return points_added == ring_offsets[current_ring]; } + + bool allRingsHaveAPositiveArea() + { + IColumn::Offset prev_offset = 0; + for (const auto offset : ring_offsets) + { + if (offset - prev_offset < 3) + return false; + prev_offset = offset; + } + return true; + } }; struct Data @@ -474,72 +505,32 @@ struct Data dest.emplace_back(); ids.push_back((ids.empty() ? 0 : ids.back() + new_multi_polygon)); } + + void addPoint(Float64 x, Float64 y) + { + auto & last_polygon = dest.back(); + auto & last_ring = (last_polygon.inners().empty() ? last_polygon.outer() : last_polygon.inners().back()); + last_ring.emplace_back(x, y); + } }; -void addNewMultiPolygon(Data &, Offset & offset) +void addNewPoint(Float64 x, Float64 y, Data & data, Offset & offset) { - ++offset.current_multi_polygon; - if (offset.current_multi_polygon == offset.multi_polygon_sizes.size()) - throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS}; -} - -void addNewPolygon(Data & data, Offset & offset) -{ - ++offset.current_polygon; - if (offset.current_polygon == offset.polygon_sizes.size()) - throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS}; - - if (--offset.multi_polygon_sizes[offset.current_multi_polygon] == 0) + if (offset.atLastPointOfRing()) { - addNewMultiPolygon(data, offset); - data.addPolygon(true); + if (offset.atLastRingOfPolygon()) + data.addPolygon(offset.atLastPolygonOfMultiPolygon()); + else + { + /** An outer ring is added automatically with a new polygon, thus we need the else statement here. + * This also implies that if we are at this point we have to add an inner ring. + */ + auto & last_polygon = data.dest.back(); + last_polygon.inners().emplace_back(); + } } - else - data.addPolygon(); -} - -void addNewRing(Data & data, Offset & offset) -{ - ++offset.current_ring; - if (offset.current_ring == offset.ring_sizes.size()) - throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS}; - - if (--offset.polygon_sizes[offset.current_polygon] == 0) - addNewPolygon(data, offset); - else - { - /** An outer ring is added automatically with a new polygon, thus we need the else statement here. - * This also implies that if we are at this point we have to add an inner ring. - */ - auto & last_polygon = data.dest.back(); - last_polygon.inners().emplace_back(); - } -} - -void addNewPointFromMultiPolygon(const IPolygonDictionary::Point & pt, Data & data, Offset & offset) -{ - if (offset.ring_sizes[offset.current_ring] == 0) - addNewRing(data, offset); - - auto & last_polygon = data.dest.back(); - auto & last_ring = (last_polygon.inners().empty() ? last_polygon.outer() : last_polygon.inners().back()); - last_ring.push_back(pt); - - --offset.ring_sizes[offset.current_ring]; -} - -void addNewPointFromSimplePolygon(const IPolygonDictionary::Point & pt, Data & data, Offset & offset) -{ - if (offset.polygon_sizes[offset.current_polygon] == 0) - { - ++offset.current_polygon; - if (offset.current_polygon == offset.polygon_sizes.size()) - throw Exception{"Incorrect polygon formatting", ErrorCodes::BAD_ARGUMENTS}; - - } - auto & last_polygon = data.dest.back(); - last_polygon.outer().push_back(pt); - --offset.polygon_sizes[offset.current_polygon]; + data.addPoint(x, y); + ++offset; } const IColumn * unrollMultiPolygons(const ColumnPtr & column, Offset & offset) @@ -547,20 +538,17 @@ const IColumn * unrollMultiPolygons(const ColumnPtr & column, Offset & offset) const auto ptr_multi_polygons = typeid_cast(column.get()); if (!ptr_multi_polygons) throw Exception{"Expected a column containing arrays of polygons", ErrorCodes::TYPE_MISMATCH}; - offset.multi_polygon_sizes.assign(ptr_multi_polygons->getOffsets()); - makeDifferences(offset.multi_polygon_sizes); + offset.multi_polygon_offsets.assign(ptr_multi_polygons->getOffsets()); const auto ptr_polygons = typeid_cast(&ptr_multi_polygons->getData()); if (!ptr_polygons) throw Exception{"Expected a column containing arrays of rings when reading polygons", ErrorCodes::TYPE_MISMATCH}; - offset.polygon_sizes.assign(ptr_polygons->getOffsets()); - makeDifferences(offset.polygon_sizes); + offset.polygon_offsets.assign(ptr_polygons->getOffsets()); const auto ptr_rings = typeid_cast(&ptr_polygons->getData()); if (!ptr_rings) throw Exception{"Expected a column containing arrays of points when reading rings", ErrorCodes::TYPE_MISMATCH}; - offset.ring_sizes.assign(ptr_rings->getOffsets()); - makeDifferences(offset.ring_sizes); + offset.ring_offsets.assign(ptr_rings->getOffsets()); return ptr_rings->getDataPtr().get(); } @@ -570,11 +558,14 @@ const IColumn * unrollSimplePolygons(const ColumnPtr & column, Offset & offset) const auto ptr_polygons = typeid_cast(column.get()); if (!ptr_polygons) throw Exception{"Expected a column containing arrays of points", ErrorCodes::TYPE_MISMATCH}; - offset.polygon_sizes.assign(ptr_polygons->getOffsets()); + offset.ring_offsets.assign(ptr_polygons->getOffsets()); + std::iota(offset.polygon_offsets.begin(), offset.polygon_offsets.end(), 1); + offset.multi_polygon_offsets.assign(offset.polygon_offsets); + return ptr_polygons->getDataPtr().get(); } -void getPointsReprByArrays(const IColumn * column, std::vector & dest) +void handlePointsReprByArrays(const IColumn * column, Data & data, Offset & offset) { const auto ptr_points = typeid_cast(column); const auto ptr_coord = typeid_cast*>(&ptr_points->getData()); @@ -587,11 +578,11 @@ void getPointsReprByArrays(const IColumn * column, std::vectorgetElement(2 * i), ptr_coord->getElement(2 * i + 1)); + addNewPoint(ptr_coord->getElement(2 * i), ptr_coord->getElement(2 * i + 1), data, offset); } } -void getPointReprByTuples(const IColumn * column, std::vector & dest) +void handlePointsReprByTuples(const IColumn * column, Data & data, Offset & offset) { const auto ptr_points = typeid_cast(column); if (!ptr_points) @@ -604,7 +595,7 @@ void getPointReprByTuples(const IColumn * column, std::vectorsize(); ++i) { - dest.emplace_back(column_x->getElement(i), column_y->getElement(i)); + addNewPoint(column_x->getElement(i), column_y->getElement(i), data, offset); } } @@ -626,31 +617,22 @@ void IPolygonDictionary::extractPolygons(const ColumnPtr &column) break; } + if (!offset.allRingsHaveAPositiveArea()) + throw Exception{"Every ring included in a polygon or excluded from it should contain at least 3 points", + ErrorCodes::BAD_ARGUMENTS}; + /** Adding the first empty polygon */ data.addPolygon(true); - std::vector points; switch (point_type) { case PointType::Array: - getPointsReprByArrays(points_collection, points); + handlePointsReprByArrays(points_collection, data, offset); break; case PointType::Tuple: - getPointReprByTuples(points_collection, points); + handlePointsReprByTuples(points_collection, data, offset); break; } - for (auto & point : points) - { - switch (input_type) - { - case InputType::MultiPolygon: - addNewPointFromMultiPolygon(point, data, offset); - break; - case InputType::SimplePolygon: - addNewPointFromSimplePolygon(point, data, offset); - break; - } - } } SimplePolygonDictionary::SimplePolygonDictionary(