From 3f34c733dd67033a98326a6a2077187313dd03b0 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Fri, 27 Feb 2015 14:57:14 +0300 Subject: [PATCH] dbms: refactor FlatDictionary to use tuple and less macro-code[#METR-13298] --- .../include/DB/Dictionaries/CacheDictionary.h | 67 +++-- dbms/include/DB/Dictionaries/FlatDictionary.h | 269 ++++++++---------- .../DB/Dictionaries/HashedDictionary.h | 85 ++---- dbms/include/DB/Dictionaries/IDictionary.h | 1 + 4 files changed, 187 insertions(+), 235 deletions(-) diff --git a/dbms/include/DB/Dictionaries/CacheDictionary.h b/dbms/include/DB/Dictionaries/CacheDictionary.h index 98912027cdc..0cac0addcba 100644 --- a/dbms/include/DB/Dictionaries/CacheDictionary.h +++ b/dbms/include/DB/Dictionaries/CacheDictionary.h @@ -52,15 +52,25 @@ public: const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } - bool hasHierarchy() const override { return false; } + bool hasHierarchy() const override { return hierarchical_attribute; } - id_t toParent(const id_t id) const override { return 0; } + id_t toParent(const id_t id) const override + { + PODArray ids{1, id}; + PODArray out{1}; + getItems(*hierarchical_attribute, ids, out); + return out.front(); + } + + void toParent(const PODArray & ids, PODArray & out) const override + { + getItems(*hierarchical_attribute, ids, out); + } #define DECLARE_INDIVIDUAL_GETTER(TYPE, LC_TYPE) \ TYPE get##TYPE(const std::string & attribute_name, const id_t id) const override\ {\ - const auto idx = getAttributeIndex(attribute_name);\ - const auto & attribute = attributes[idx];\ + auto & attribute = getAttribute(attribute_name);\ if (attribute.type != AttributeType::LC_TYPE)\ throw Exception{\ "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ @@ -69,7 +79,7 @@ public: \ PODArray ids{1, id};\ PODArray out{1};\ - getItems(idx, ids, out);\ + getItems(attribute, ids, out);\ return out.front();\ } DECLARE_INDIVIDUAL_GETTER(UInt8, uint8) @@ -85,8 +95,7 @@ public: #undef DECLARE_INDIVIDUAL_GETTER String getString(const std::string & attribute_name, const id_t id) const override { - const auto idx = getAttributeIndex(attribute_name); - const auto & attribute = attributes[idx]; + auto & attribute = getAttribute(attribute_name); if (attribute.type != AttributeType::string) throw Exception{ "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), @@ -95,7 +104,7 @@ public: PODArray ids{1, id}; ColumnString out; - getItems(idx, ids, &out); + getItems(attribute, ids, &out); return out.getDataAt(0); } @@ -103,15 +112,14 @@ public: #define DECLARE_MULTIPLE_GETTER(TYPE, LC_TYPE)\ void get##TYPE(const std::string & attribute_name, const PODArray & ids, PODArray & out) const override\ {\ - const auto idx = getAttributeIndex(attribute_name);\ - const auto & attribute = attributes[idx];\ + auto & attribute = getAttribute(attribute_name);\ if (attribute.type != AttributeType::LC_TYPE)\ throw Exception{\ "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ ErrorCodes::TYPE_MISMATCH\ };\ \ - getItems(idx, ids, out);\ + getItems(attribute, ids, out);\ } DECLARE_MULTIPLE_GETTER(UInt8, uint8) DECLARE_MULTIPLE_GETTER(UInt16, uint16) @@ -126,15 +134,14 @@ public: #undef DECLARE_MULTIPLE_GETTER void getString(const std::string & attribute_name, const PODArray & ids, ColumnString * out) const override { - const auto idx = getAttributeIndex(attribute_name); - const auto & attribute = attributes[idx]; + auto & attribute = getAttribute(attribute_name); if (attribute.type != AttributeType::string) throw Exception{ "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH }; - getItems(idx, ids, out); + getItems(attribute, ids, out); } private: @@ -176,7 +183,15 @@ private: attribute.null_value)); if (attribute.hierarchical) + { hierarchical_attribute = &attributes.back(); + + if (hierarchical_attribute->type != AttributeType::uint64) + throw Exception{ + "Hierarchical attribute must be UInt64.", + ErrorCodes::TYPE_MISMATCH + }; + } } } @@ -241,14 +256,14 @@ private: } template - void getItems(const std::size_t attribute_idx, const PODArray & ids, PODArray & out) const + void getItems(attribute_t & attribute, const PODArray & ids, PODArray & out) const { HashMap> outdated_ids; - auto & attribute = attributes[attribute_idx]; auto & attribute_array = std::get>(attribute.arrays); { const Poco::ScopedReadRWLock read_lock{rw_lock}; + /// fetch up-to-date values, decide which ones require update for (const auto i : ext::range(0, ids.size())) { @@ -289,12 +304,11 @@ private: }); } - void getItems(const std::size_t attribute_idx, const PODArray & ids, ColumnString * out) const + void getItems(attribute_t & attribute, const PODArray & ids, ColumnString * out) const { /// save on some allocations out->getOffsets().reserve(ids.size()); - auto & attribute = attributes[attribute_idx]; auto & attribute_array = std::get>(attribute.arrays); auto found_outdated_values = false; @@ -381,7 +395,7 @@ private: const auto attribute_value = attribute_array[cell_idx]; map[id] = attribute_value; - total_length += attribute_value.size + 1; + total_length += (attribute_value.size + 1) * outdated_ids[id]; }); } @@ -390,7 +404,7 @@ private: for (const auto id : ids) { const auto it = map.find(id); - const auto string = it != map.end() ? it->second : std::get(attributes[attribute_idx].null_values); + const auto string = it != map.end() ? it->second : std::get(attribute.null_values); out->insertData(string.data(), string.size()); } } @@ -414,6 +428,11 @@ private: const auto & ids = id_column->getData(); + /// cache column pointers + std::vector column_ptrs(attributes.size()); + for (const auto i : ext::range(0, attributes.size())) + column_ptrs[i] = block.getByPosition(i + 1).column.get(); + for (const auto i : ext::range(0, ids.size())) { const auto id = ids[i]; @@ -422,7 +441,7 @@ private: for (const auto attribute_idx : ext::range(0, attributes.size())) { - const auto & attribute_column = *block.getByPosition(attribute_idx + 1).column; + const auto & attribute_column = *column_ptrs[attribute_idx]; auto & attribute = attributes[attribute_idx]; setAttributeValue(attribute, cell_idx, attribute_column[i]); @@ -486,7 +505,7 @@ private: } } - std::size_t getAttributeIndex(const std::string & attribute_name) const + attribute_t & getAttribute(const std::string & attribute_name) const { const auto it = attribute_index_by_name.find(attribute_name); if (it == std::end(attribute_index_by_name)) @@ -495,7 +514,7 @@ private: ErrorCodes::BAD_ARGUMENTS }; - return it->second; + return attributes[it->second]; } static std::size_t round_up_to_power_of_two(std::size_t n) @@ -529,7 +548,7 @@ private: std::map attribute_index_by_name; mutable std::vector attributes; mutable std::vector cells; - const attribute_t * hierarchical_attribute = nullptr; + attribute_t * hierarchical_attribute = nullptr; mutable std::mt19937_64 rnd_engine{getSeed()}; }; diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index 52528e9e487..0fb1a1ce740 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB { @@ -46,42 +47,29 @@ public: id_t toParent(const id_t id) const override { const auto attr = hierarchical_attribute; + const auto & array = *std::get>>(attr->arrays); - switch (hierarchical_attribute->type) - { - case AttributeType::uint8: return id < attr->uint8_array->size() ? (*attr->uint8_array)[id] : attr->uint8_null_value; - case AttributeType::uint16: return id < attr->uint16_array->size() ? (*attr->uint16_array)[id] : attr->uint16_null_value; - case AttributeType::uint32: return id < attr->uint32_array->size() ? (*attr->uint32_array)[id] : attr->uint32_null_value; - case AttributeType::uint64: return id < attr->uint64_array->size() ? (*attr->uint64_array)[id] : attr->uint64_null_value; - case AttributeType::int8: return id < attr->int8_array->size() ? (*attr->int8_array)[id] : attr->int8_null_value; - case AttributeType::int16: return id < attr->int16_array->size() ? (*attr->int16_array)[id] : attr->int16_null_value; - case AttributeType::int32: return id < attr->int32_array->size() ? (*attr->int32_array)[id] : attr->int32_null_value; - case AttributeType::int64: return id < attr->int64_array->size() ? (*attr->int64_array)[id] : attr->int64_null_value; - case AttributeType::float32: - case AttributeType::float64: - case AttributeType::string: - break; - } + return id < array.size() ? array[id] : std::get(attr->null_values); + } - throw Exception{ - "Hierarchical attribute has non-integer type " + toString(hierarchical_attribute->type), - ErrorCodes::TYPE_MISMATCH - }; + void toParent(const PODArray & ids, PODArray & out) const override + { + getItems(*hierarchical_attribute, ids, out); } #define DECLARE_INDIVIDUAL_GETTER(TYPE, LC_TYPE) \ TYPE get##TYPE(const std::string & attribute_name, const id_t id) const override\ {\ - const auto idx = getAttributeIndex(attribute_name);\ - const auto & attribute = attributes[idx];\ + const auto & attribute = getAttribute(attribute_name);\ if (attribute.type != AttributeType::LC_TYPE)\ throw Exception{\ "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ ErrorCodes::TYPE_MISMATCH\ };\ - if (id < attribute.LC_TYPE##_array->size())\ - return (*attribute.LC_TYPE##_array)[id];\ - return attribute.LC_TYPE##_null_value;\ + \ + const auto & array = *std::get>>(attribute.arrays);\ + \ + return id < array.size() ? array[id] : std::get(attribute.null_values);\ } DECLARE_INDIVIDUAL_GETTER(UInt8, uint8) DECLARE_INDIVIDUAL_GETTER(UInt16, uint16) @@ -93,28 +81,32 @@ public: DECLARE_INDIVIDUAL_GETTER(Int64, int64) DECLARE_INDIVIDUAL_GETTER(Float32, float32) DECLARE_INDIVIDUAL_GETTER(Float64, float64) - DECLARE_INDIVIDUAL_GETTER(String, string) #undef DECLARE_INDIVIDUAL_GETTER + String getString(const std::string & attribute_name, const id_t id) const override + { + const auto & attribute = getAttribute(attribute_name); + if (attribute.type != AttributeType::string) + throw Exception{ + "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH + }; + + const auto & array = *std::get>>(attribute.arrays); + + return id < array.size() ? String{array[id]} : std::get(attribute.null_values); + } #define DECLARE_MULTIPLE_GETTER(TYPE, LC_TYPE)\ void get##TYPE(const std::string & attribute_name, const PODArray & ids, PODArray & out) const override\ {\ - const auto idx = getAttributeIndex(attribute_name);\ - const auto & attribute = attributes[idx];\ + const auto & attribute = getAttribute(attribute_name);\ if (attribute.type != AttributeType::LC_TYPE)\ throw Exception{\ "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ ErrorCodes::TYPE_MISMATCH\ };\ \ - const auto & attr = *attribute.LC_TYPE##_array;\ - const auto null_value = attribute.LC_TYPE##_null_value;\ - \ - for (const auto i : ext::range(0, ids.size()))\ - {\ - const auto id = ids[i];\ - out[i] = id < attr.size() ? attr[id] : null_value;\ - }\ + getItems(attribute, ids, out);\ } DECLARE_MULTIPLE_GETTER(UInt8, uint8) DECLARE_MULTIPLE_GETTER(UInt16, uint16) @@ -129,16 +121,15 @@ public: #undef DECLARE_MULTIPLE_GETTER void getString(const std::string & attribute_name, const PODArray & ids, ColumnString * out) const override { - const auto idx = getAttributeIndex(attribute_name); - const auto & attribute = attributes[idx]; + const auto & attribute = getAttribute(attribute_name); if (attribute.type != AttributeType::string) throw Exception{ "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH }; - const auto & attr = *attribute.string_array; - const auto null_value = attribute.string_null_value; + const auto & attr = *std::get>>(attribute.arrays); + const auto & null_value = std::get(attribute.null_values); for (const auto i : ext::range(0, ids.size())) { @@ -149,32 +140,25 @@ public: } private: - struct attribute_t + struct attribute_t final { AttributeType type; - UInt8 uint8_null_value; - UInt16 uint16_null_value; - UInt32 uint32_null_value; - UInt64 uint64_null_value; - Int8 int8_null_value; - Int16 int16_null_value; - Int32 int32_null_value; - Int64 int64_null_value; - Float32 float32_null_value; - Float64 float64_null_value; - String string_null_value; - std::unique_ptr> uint8_array; - std::unique_ptr> uint16_array; - std::unique_ptr> uint32_array; - std::unique_ptr> uint64_array; - std::unique_ptr> int8_array; - std::unique_ptr> int16_array; - std::unique_ptr> int32_array; - std::unique_ptr> int64_array; - std::unique_ptr> float32_array; - std::unique_ptr> float64_array; + std::tuple null_values; + std::tuple>, + std::unique_ptr>, + std::unique_ptr>, + std::unique_ptr>, + std::unique_ptr>, + std::unique_ptr>, + std::unique_ptr>, + std::unique_ptr>, + std::unique_ptr>, + std::unique_ptr>, + std::unique_ptr>> arrays; std::unique_ptr string_arena; - std::unique_ptr> string_array; }; void createAttributes() @@ -188,7 +172,15 @@ private: attribute.null_value)); if (attribute.hierarchical) + { hierarchical_attribute = &attributes.back(); + + if (hierarchical_attribute->type != AttributeType::uint64) + throw Exception{ + "Hierarchical attribute must be UInt64.", + ErrorCodes::TYPE_MISMATCH + }; + } } } @@ -214,73 +206,65 @@ private: stream->readSuffix(); } + template + void createAttributeImpl(attribute_t & attribute, const std::string & null_value) + { + const auto & null_value_ref = std::get(attribute.null_values) = DB::parse(null_value); + std::get>>(attribute.arrays) = + std::make_unique>(initial_array_size, null_value_ref); + } + attribute_t createAttributeWithType(const AttributeType type, const std::string & null_value) { attribute_t attr{type}; switch (type) { - case AttributeType::uint8: - attr.uint8_null_value = DB::parse(null_value); - attr.uint8_array.reset(new PODArray); - attr.uint8_array->resize_fill(initial_array_size, attr.uint8_null_value); - break; - case AttributeType::uint16: - attr.uint16_null_value = DB::parse(null_value); - attr.uint16_array.reset(new PODArray); - attr.uint16_array->resize_fill(initial_array_size, attr.uint16_null_value); - break; - case AttributeType::uint32: - attr.uint32_null_value = DB::parse(null_value); - attr.uint32_array.reset(new PODArray); - attr.uint32_array->resize_fill(initial_array_size, attr.uint32_null_value); - break; - case AttributeType::uint64: - attr.uint64_null_value = DB::parse(null_value); - attr.uint64_array.reset(new PODArray); - attr.uint64_array->resize_fill(initial_array_size, attr.uint64_null_value); - break; - case AttributeType::int8: - attr.int8_null_value = DB::parse(null_value); - attr.int8_array.reset(new PODArray); - attr.int8_array->resize_fill(initial_array_size, attr.int8_null_value); - break; - case AttributeType::int16: - attr.int16_null_value = DB::parse(null_value); - attr.int16_array.reset(new PODArray); - attr.int16_array->resize_fill(initial_array_size, attr.int16_null_value); - break; - case AttributeType::int32: - attr.int32_null_value = DB::parse(null_value); - attr.int32_array.reset(new PODArray); - attr.int32_array->resize_fill(initial_array_size, attr.int32_null_value); - break; - case AttributeType::int64: - attr.int64_null_value = DB::parse(null_value); - attr.int64_array.reset(new PODArray); - attr.int64_array->resize_fill(initial_array_size, attr.int64_null_value); - break; - case AttributeType::float32: - attr.float32_null_value = DB::parse(null_value); - attr.float32_array.reset(new PODArray); - attr.float32_array->resize_fill(initial_array_size, attr.float32_null_value); - break; - case AttributeType::float64: - attr.float64_null_value = DB::parse(null_value); - attr.float64_array.reset(new PODArray); - attr.float64_array->resize_fill(initial_array_size, attr.float64_null_value); - break; + case AttributeType::uint8: createAttributeImpl(attr, null_value); break; + case AttributeType::uint16: createAttributeImpl(attr, null_value); break; + case AttributeType::uint32: createAttributeImpl(attr, null_value); break; + case AttributeType::uint64: createAttributeImpl(attr, null_value); break; + case AttributeType::int8: createAttributeImpl(attr, null_value); break; + case AttributeType::int16: createAttributeImpl(attr, null_value); break; + case AttributeType::int32: createAttributeImpl(attr, null_value); break; + case AttributeType::int64: createAttributeImpl(attr, null_value); break; + case AttributeType::float32: createAttributeImpl(attr, null_value); break; + case AttributeType::float64: createAttributeImpl(attr, null_value); break; case AttributeType::string: - attr.string_null_value = null_value; - attr.string_arena.reset(new Arena); - attr.string_array.reset(new PODArray); - attr.string_array->resize_fill(initial_array_size, attr.string_null_value); + { + const auto & null_value_ref = std::get(attr.null_values) = DB::parse(null_value); + std::get>>(attr.arrays) = + std::make_unique>(initial_array_size, null_value_ref); + attr.string_arena = std::make_unique(); break; - } + } + }; return attr; } + template + void getItems(const attribute_t & attribute, const PODArray & ids, PODArray & out) const + { + const auto & attr = *std::get>>(attribute.arrays); + const auto null_value = std::get(attribute.null_values); + + for (const auto i : ext::range(0, ids.size())) + { + const auto id = ids[i]; + out[i] = id < attr.size() ? attr[id] : null_value; + } + } + + template + void setAttributeValueImpl(attribute_t & attribute, const id_t id, const T value) + { + auto & array = *std::get>>(attribute.arrays); + if (id >= array.size()) + array.resize_fill(id, std::get(attribute.null_values)); + array[id] = value; + } + void setAttributeValue(attribute_t & attribute, const id_t id, const Field & value) { if (id >= max_array_size) @@ -293,87 +277,68 @@ private: { case AttributeType::uint8: { - if (id >= attribute.uint8_array->size()) - attribute.uint8_array->resize_fill(id, attribute.uint8_null_value); - (*attribute.uint8_array)[id] = value.get(); + setAttributeValueImpl(attribute, id, value.get()); break; } case AttributeType::uint16: { - if (id >= attribute.uint16_array->size()) - attribute.uint16_array->resize_fill(id, attribute.uint16_null_value); - (*attribute.uint16_array)[id] = value.get(); + setAttributeValueImpl(attribute, id, value.get()); break; } case AttributeType::uint32: { - if (id >= attribute.uint32_array->size()) - attribute.uint32_array->resize_fill(id, attribute.uint32_null_value); - (*attribute.uint32_array)[id] = value.get(); + setAttributeValueImpl(attribute, id, value.get()); break; } case AttributeType::uint64: { - if (id >= attribute.uint64_array->size()) - attribute.uint64_array->resize_fill(id, attribute.uint64_null_value); - (*attribute.uint64_array)[id] = value.get(); + setAttributeValueImpl(attribute, id, value.get()); break; } case AttributeType::int8: { - if (id >= attribute.int8_array->size()) - attribute.int8_array->resize_fill(id, attribute.int8_null_value); - (*attribute.int8_array)[id] = value.get(); + setAttributeValueImpl(attribute, id, value.get()); break; } case AttributeType::int16: { - if (id >= attribute.int16_array->size()) - attribute.int16_array->resize_fill(id, attribute.int16_null_value); - (*attribute.int16_array)[id] = value.get(); + setAttributeValueImpl(attribute, id, value.get()); break; } case AttributeType::int32: { - if (id >= attribute.int32_array->size()) - attribute.int32_array->resize_fill(id, attribute.int32_null_value); - (*attribute.int32_array)[id] = value.get(); + setAttributeValueImpl(attribute, id, value.get()); break; } case AttributeType::int64: { - if (id >= attribute.int64_array->size()) - attribute.int64_array->resize_fill(id, attribute.int64_null_value); - (*attribute.int64_array)[id] = value.get(); + setAttributeValueImpl(attribute, id, value.get()); break; } case AttributeType::float32: { - if (id >= attribute.float32_array->size()) - attribute.float32_array->resize_fill(id, attribute.float32_null_value); - (*attribute.float32_array)[id] = value.get(); + setAttributeValueImpl(attribute, id, value.get()); break; } case AttributeType::float64: { - if (id >= attribute.float64_array->size()) - attribute.float64_array->resize_fill(id, attribute.float64_null_value); - (*attribute.float64_array)[id] = value.get(); + setAttributeValueImpl(attribute, id, value.get()); break; } case AttributeType::string: { - if (id >= attribute.string_array->size()) - attribute.string_array->resize_fill(id, attribute.string_null_value); + auto & array = *std::get>>(attribute.arrays); + if (id >= array.size()) + array.resize_fill(id, std::get(attribute.null_values)); const auto & string = value.get(); const auto string_in_arena = attribute.string_arena->insert(string.data(), string.size()); - (*attribute.string_array)[id] = StringRef{string_in_arena, string.size()}; + array[id] = StringRef{string_in_arena, string.size()}; break; } } } - std::size_t getAttributeIndex(const std::string & attribute_name) const + const attribute_t & getAttribute(const std::string & attribute_name) const { const auto it = attribute_index_by_name.find(attribute_name); if (it == std::end(attribute_index_by_name)) @@ -382,7 +347,7 @@ private: ErrorCodes::BAD_ARGUMENTS }; - return it->second; + return attributes[it->second]; } const std::string name; diff --git a/dbms/include/DB/Dictionaries/HashedDictionary.h b/dbms/include/DB/Dictionaries/HashedDictionary.h index c46c7d94766..b80ff4e1e06 100644 --- a/dbms/include/DB/Dictionaries/HashedDictionary.h +++ b/dbms/include/DB/Dictionaries/HashedDictionary.h @@ -45,65 +45,26 @@ public: { const auto attr = hierarchical_attribute; - switch (hierarchical_attribute->type) - { - case AttributeType::uint8: - { - const auto it = attr->uint8_map->find(id); - return it != attr->uint8_map->end() ? it->second : attr->uint8_null_value; - } - case AttributeType::uint16: - { - const auto it = attr->uint16_map->find(id); - return it != attr->uint16_map->end() ? it->second : attr->uint16_null_value; - } - case AttributeType::uint32: - { - const auto it = attr->uint32_map->find(id); - return it != attr->uint32_map->end() ? it->second : attr->uint32_null_value; - } - case AttributeType::uint64: - { - const auto it = attr->uint64_map->find(id); - return it != attr->uint64_map->end() ? it->second : attr->uint64_null_value; - } - case AttributeType::int8: - { - const auto it = attr->int8_map->find(id); - return it != attr->int8_map->end() ? it->second : attr->int8_null_value; - } - case AttributeType::int16: - { - const auto it = attr->int16_map->find(id); - return it != attr->int16_map->end() ? it->second : attr->int16_null_value; - } - case AttributeType::int32: - { - const auto it = attr->int32_map->find(id); - return it != attr->int32_map->end() ? it->second : attr->int32_null_value; - } - case AttributeType::int64: - { - const auto it = attr->int64_map->find(id); - return it != attr->int64_map->end() ? it->second : attr->int64_null_value; - } - case AttributeType::float32: - case AttributeType::float64: - case AttributeType::string: - break; - }; + const auto it = attr->uint64_map->find(id); + return it != attr->uint64_map->end() ? it->second : attr->uint64_null_value; + } - throw Exception{ - "Hierarchical attribute has non-integer type " + toString(hierarchical_attribute->type), - ErrorCodes::TYPE_MISMATCH - }; + void toParent(const PODArray & ids, PODArray & out) const override + { + const auto & attr = *hierarchical_attribute->uint64_map; + const auto null_value = hierarchical_attribute->uint64_null_value; + + for (const auto i : ext::range(0, ids.size())) + { + const auto it = attr.find(ids[i]); + out[i] = it != attr.end() ? it->second : null_value; + } } #define DECLARE_INDIVIDUAL_GETTER(TYPE, LC_TYPE) \ TYPE get##TYPE(const std::string & attribute_name, const id_t id) const override\ {\ - const auto idx = getAttributeIndex(attribute_name);\ - const auto & attribute = attributes[idx];\ + const auto & attribute = getAttribute(attribute_name);\ if (attribute.type != AttributeType::LC_TYPE)\ throw Exception{\ "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ @@ -132,8 +93,7 @@ public: #define DECLARE_MULTIPLE_GETTER(TYPE, LC_TYPE)\ void get##TYPE(const std::string & attribute_name, const PODArray & ids, PODArray & out) const override\ {\ - const auto idx = getAttributeIndex(attribute_name);\ - const auto & attribute = attributes[idx];\ + const auto & attribute = getAttribute(attribute_name);\ if (attribute.type != AttributeType::LC_TYPE)\ throw Exception{\ "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ @@ -162,8 +122,7 @@ public: #undef DECLARE_MULTIPLE_GETTER void getString(const std::string & attribute_name, const PODArray & ids, ColumnString * out) const override { - const auto idx = getAttributeIndex(attribute_name); - const auto & attribute = attributes[idx]; + const auto & attribute = getAttribute(attribute_name); if (attribute.type != AttributeType::string) throw Exception{ "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), @@ -221,7 +180,15 @@ private: attribute.null_value)); if (attribute.hierarchical) + { hierarchical_attribute = &attributes.back(); + + if (hierarchical_attribute->type != AttributeType::uint64) + throw Exception{ + "Hierarchical attribute must be UInt64.", + ErrorCodes::TYPE_MISMATCH + }; + } } } @@ -367,7 +334,7 @@ private: }; } - std::size_t getAttributeIndex(const std::string & attribute_name) const + const attribute_t & getAttribute(const std::string & attribute_name) const { const auto it = attribute_index_by_name.find(attribute_name); if (it == std::end(attribute_index_by_name)) @@ -376,7 +343,7 @@ private: ErrorCodes::BAD_ARGUMENTS }; - return it->second; + return attributes[it->second]; } const std::string name; diff --git a/dbms/include/DB/Dictionaries/IDictionary.h b/dbms/include/DB/Dictionaries/IDictionary.h index d16d6d92c8b..1e0a8eb5e38 100644 --- a/dbms/include/DB/Dictionaries/IDictionary.h +++ b/dbms/include/DB/Dictionaries/IDictionary.h @@ -38,6 +38,7 @@ public: /// do not call unless you ensure that hasHierarchy() returns true virtual id_t toParent(id_t id) const = 0; + virtual void toParent(const PODArray & ids, PODArray & out) const = 0; bool in(id_t child_id, const id_t ancestor_id) const {