Dictionaries allow to specify bidirectional for hierarhical attribute

This commit is contained in:
Maksim Kita 2022-05-12 13:20:27 +02:00
parent 100afa8bcf
commit 1142e05683
12 changed files with 98 additions and 5 deletions

View File

@ -252,7 +252,7 @@ Strings DictionaryStructure::getKeysNames() const
static void checkAttributeKeys(const Poco::Util::AbstractConfiguration::Keys & keys)
{
static const std::unordered_set<std::string_view> valid_keys
= {"name", "type", "expression", "null_value", "hierarchical", "injective", "is_object_id"};
= {"name", "type", "expression", "null_value", "hierarchical", "bidirectional", "injective", "is_object_id"};
for (const auto & key : keys)
{
@ -350,6 +350,7 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
}
const auto hierarchical = config.getBool(prefix + "hierarchical", false);
const auto bidirectional = config.getBool(prefix + "bidirectional", false);
const auto injective = config.getBool(prefix + "injective", false);
const auto is_object_id = config.getBool(prefix + "is_object_id", false);
@ -362,6 +363,9 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
if (has_hierarchy && hierarchical)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only one hierarchical attribute supported");
if (bidirectional && !hierarchical)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bidirectional can only be applied to hierarchical attributes");
has_hierarchy = has_hierarchy || hierarchical;
res_attributes.emplace_back(DictionaryAttribute{
@ -372,6 +376,7 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
expression,
null_value,
hierarchical,
bidirectional,
injective,
is_object_id,
is_nullable});

View File

@ -67,6 +67,7 @@ struct DictionaryAttribute final
const std::string expression;
const Field null_value;
const bool hierarchical;
const bool bidirectional;
const bool injective;
const bool is_object_id;
const bool is_nullable;

View File

@ -43,6 +43,7 @@ FlatDictionary::FlatDictionary(
{
createAttributes();
loadData();
buildHierarchyParentToChildIndexIfNeeded();
calculateBytesAllocated();
}
@ -246,6 +247,9 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy(
DictionaryHierarchyParentToChildIndexPtr FlatDictionary::getHierarchyParentToChildIndex() const
{
if (hierarchy_parent_to_child_index)
return hierarchy_parent_to_child_index;
size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index;
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
const ContainerType<UInt64> & parent_keys = std::get<ContainerType<UInt64>>(hierarchical_attribute.container);
@ -406,6 +410,15 @@ void FlatDictionary::loadData()
throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY, "{}: dictionary source is empty and 'require_nonempty' property is set.", getFullName());
}
void FlatDictionary::buildHierarchyParentToChildIndexIfNeeded()
{
if (!dict_struct.hierarchical_attribute_index)
return;
if (dict_struct.attributes[*dict_struct.hierarchical_attribute_index].bidirectional)
hierarchy_parent_to_child_index = getHierarchyParentToChildIndex();
}
void FlatDictionary::calculateBytesAllocated()
{
bytes_allocated += attributes.size() * sizeof(attributes.front());
@ -445,6 +458,9 @@ void FlatDictionary::calculateBytesAllocated()
if (update_field_loaded_block)
bytes_allocated += update_field_loaded_block->allocatedBytes();
if (hierarchy_parent_to_child_index)
bytes_allocated += hierarchy_parent_to_child_index->getSizeInBytes();
bytes_allocated += string_arena.size();
}

View File

@ -140,10 +140,15 @@ private:
};
void createAttributes();
void blockToAttributes(const Block & block);
void updateData();
void loadData();
void buildHierarchyParentToChildIndexIfNeeded();
void calculateBytesAllocated();
Attribute createAttribute(const DictionaryAttribute & attribute);
@ -175,6 +180,7 @@ private:
BlockPtr update_field_loaded_block;
Arena string_arena;
DictionaryHierarchyParentToChildIndexPtr hierarchy_parent_to_child_index;
};
}

View File

@ -37,6 +37,7 @@ HashedArrayDictionary<dictionary_key_type>::HashedArrayDictionary(
{
createAttributes();
loadData();
buildHierarchyParentToChildIndexIfNeeded();
calculateBytesAllocated();
}
@ -286,6 +287,9 @@ DictionaryHierarchyParentToChildIndexPtr HashedArrayDictionary<dictionary_key_ty
{
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
{
if (hierarchy_parent_to_child_index)
return hierarchy_parent_to_child_index;
size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index;
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
const AttributeContainerType<UInt64> & parent_keys_container = std::get<AttributeContainerType<UInt64>>(hierarchical_attribute.container);
@ -707,6 +711,16 @@ void HashedArrayDictionary<dictionary_key_type>::loadData()
getFullName());
}
template <DictionaryKeyType dictionary_key_type>
void HashedArrayDictionary<dictionary_key_type>::buildHierarchyParentToChildIndexIfNeeded()
{
if (!dict_struct.hierarchical_attribute_index)
return;
if (dict_struct.attributes[*dict_struct.hierarchical_attribute_index].bidirectional)
hierarchy_parent_to_child_index = getHierarchyParentToChildIndex();
}
template <DictionaryKeyType dictionary_key_type>
void HashedArrayDictionary<dictionary_key_type>::calculateBytesAllocated()
{
@ -744,10 +758,16 @@ void HashedArrayDictionary<dictionary_key_type>::calculateBytesAllocated()
bytes_allocated += (*attribute.is_index_null).size();
}
bytes_allocated += string_arena.size();
if (update_field_loaded_block)
bytes_allocated += update_field_loaded_block->allocatedBytes();
if (hierarchy_parent_to_child_index)
{
bytes_allocated += hierarchy_parent_to_child_index->getSizeInBytes();
std::cout << "Hierarchy index size " << hierarchy_parent_to_child_index->getSizeInBytes() << std::endl;
}
bytes_allocated += string_arena.size();
}
template <DictionaryKeyType dictionary_key_type>

View File

@ -176,6 +176,8 @@ private:
void loadData();
void buildHierarchyParentToChildIndexIfNeeded();
void calculateBytesAllocated();
template <typename KeysProvider>
@ -224,6 +226,7 @@ private:
BlockPtr update_field_loaded_block;
Arena string_arena;
DictionaryHierarchyParentToChildIndexPtr hierarchy_parent_to_child_index;
};
extern template class HashedArrayDictionary<DictionaryKeyType::Simple>;

View File

@ -54,6 +54,7 @@ HashedDictionary<dictionary_key_type, sparse>::HashedDictionary(
{
createAttributes();
loadData();
buildHierarchyParentToChildIndexIfNeeded();
calculateBytesAllocated();
}
@ -322,6 +323,9 @@ DictionaryHierarchyParentToChildIndexPtr HashedDictionary<dictionary_key_type, s
{
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
{
if (hierarchy_parent_to_child_index)
return hierarchy_parent_to_child_index;
size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index;
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
const CollectionType<UInt64> & parent_keys = std::get<CollectionType<UInt64>>(hierarchical_attribute.container);
@ -646,6 +650,16 @@ void HashedDictionary<dictionary_key_type, sparse>::loadData()
getFullName());
}
template <DictionaryKeyType dictionary_key_type, bool sparse>
void HashedDictionary<dictionary_key_type, sparse>::buildHierarchyParentToChildIndexIfNeeded()
{
if (!dict_struct.hierarchical_attribute_index)
return;
if (dict_struct.attributes[*dict_struct.hierarchical_attribute_index].bidirectional)
hierarchy_parent_to_child_index = getHierarchyParentToChildIndex();
}
template <DictionaryKeyType dictionary_key_type, bool sparse>
void HashedDictionary<dictionary_key_type, sparse>::calculateBytesAllocated()
{
@ -699,10 +713,13 @@ void HashedDictionary<dictionary_key_type, sparse>::calculateBytesAllocated()
}
}
bytes_allocated += string_arena.size();
if (update_field_loaded_block)
bytes_allocated += update_field_loaded_block->allocatedBytes();
if (hierarchy_parent_to_child_index)
bytes_allocated += hierarchy_parent_to_child_index->getSizeInBytes();
bytes_allocated += string_arena.size();
}
template <DictionaryKeyType dictionary_key_type, bool sparse>

View File

@ -197,6 +197,8 @@ private:
void loadData();
void buildHierarchyParentToChildIndexIfNeeded();
void calculateBytesAllocated();
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
@ -229,6 +231,7 @@ private:
BlockPtr update_field_loaded_block;
Arena string_arena;
NoAttributesCollectionType no_attributes_container;
DictionaryHierarchyParentToChildIndexPtr hierarchy_parent_to_child_index;
};
extern template class HashedDictionary<DictionaryKeyType::Simple, false>;

View File

@ -290,6 +290,14 @@ void buildSingleAttribute(
attribute_element->appendChild(hierarchical_element);
}
if (dict_attr->bidirectional)
{
AutoPtr<Element> bidirectional_element(doc->createElement("bidirectional"));
AutoPtr<Text> bidirectional(doc->createTextNode("true"));
bidirectional_element->appendChild(bidirectional);
attribute_element->appendChild(bidirectional_element);
}
if (dict_attr->injective)
{
AutoPtr<Element> injective_element(doc->createElement("injective"));

View File

@ -58,6 +58,9 @@ void ASTDictionaryAttributeDeclaration::formatImpl(const FormatSettings & settin
if (hierarchical)
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "HIERARCHICAL";
if (bidirectional)
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "BIDIRECTIONAL";
if (injective)
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "INJECTIVE";

View File

@ -20,6 +20,8 @@ public:
ASTPtr expression;
/// Is attribute mirrored to the parent identifier
bool hierarchical;
/// Is hierarchical attribute bidirectional
bool bidirectional;
/// Flag that shows whether the id->attribute image is injective
bool injective;
/// MongoDB object ID

View File

@ -15,6 +15,7 @@ bool ParserDictionaryAttributeDeclaration::parseImpl(Pos & pos, ASTPtr & node, E
ParserKeyword s_default{"DEFAULT"};
ParserKeyword s_expression{"EXPRESSION"};
ParserKeyword s_hierarchical{"HIERARCHICAL"};
ParserKeyword s_bidirectional{"BIDIRECTIONAL"};
ParserKeyword s_injective{"INJECTIVE"};
ParserKeyword s_is_object_id{"IS_OBJECT_ID"};
ParserLiteral default_parser;
@ -30,6 +31,7 @@ bool ParserDictionaryAttributeDeclaration::parseImpl(Pos & pos, ASTPtr & node, E
ASTPtr default_value;
ASTPtr expression;
bool hierarchical = false;
bool bidirectional = false;
bool injective = false;
bool is_object_id = false;
@ -63,6 +65,12 @@ bool ParserDictionaryAttributeDeclaration::parseImpl(Pos & pos, ASTPtr & node, E
continue;
}
if (!bidirectional && s_bidirectional.ignore(pos, expected))
{
bidirectional = true;
continue;
}
if (!injective && s_injective.ignore(pos, expected))
{
injective = true;
@ -101,6 +109,7 @@ bool ParserDictionaryAttributeDeclaration::parseImpl(Pos & pos, ASTPtr & node, E
}
attribute_declaration->hierarchical = hierarchical;
attribute_declaration->bidirectional = bidirectional;
attribute_declaration->injective = injective;
attribute_declaration->is_object_id = is_object_id;