diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 1dbca428ab2..b8a0737c4b5 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -463,6 +463,7 @@ namespace ErrorCodes extern const int DICTIONARY_ALREADY_EXISTS = 486; extern const int UNKNOWN_DICTIONARY = 487; extern const int EMPTY_LIST_OF_ATTRIBUTES_PASSED = 488; + extern const int INCORRECT_DICTIONARY_DEFINITION = 489; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp new file mode 100644 index 00000000000..a0afd91d2fa --- /dev/null +++ b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -0,0 +1,423 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int INCORRECT_DICTIONARY_DEFINITION; +} + +/// There are a lot of code, but it's very simple and straightforward +/// We just convert +namespace +{ + +String unescapeString(const String & string) +{ + if (!string.empty() && string.front() == '\'' && string.back() == '\'') + return string.substr(1, string.size() - 2); + return string; +} + + +using namespace Poco; +using namespace Poco::XML; +/* + * Transforms next definition + * LIFETIME(MIN 10, MAX 100) + * to the next configuration + * + * 10 + * 100 + * + */ +void buildLifetimeConfiguration( + AutoPtr doc, + AutoPtr root, + const ASTDictionaryLifetime * lifetime) +{ + + AutoPtr lifetime_element(doc->createElement("lifetime")); + AutoPtr min_element(doc->createElement("min")); + AutoPtr max_element(doc->createElement("max")); + AutoPtr min_sec(doc->createTextNode(toString(lifetime->min_sec))); + min_element->appendChild(min_sec); + AutoPtr max_sec(doc->createTextNode(toString(lifetime->max_sec))); + max_element->appendChild(max_sec); + lifetime_element->appendChild(min_element); + lifetime_element->appendChild(max_element); + root->appendChild(lifetime_element); +} + +/* + * Transforms next definition + * LAYOUT(FLAT()) + * to the next configuration + * + * + * + * + * And next definition + * LAYOUT(CACHE(SIZE_IN_CELLS 1000)) + * to the next one + * + * + * 1000 + * + * + */ +void buildLayoutConfiguration( + AutoPtr doc, + AutoPtr root, + const ASTDictionaryLayout * layout) +{ + AutoPtr layout_element(doc->createElement("layout")); + root->appendChild(layout_element); + AutoPtr layout_type_element(doc->createElement(layout->layout_type)); + layout_element->appendChild(layout_type_element); + if (layout->parameter.has_value()) + { + const auto & param = layout->parameter; + AutoPtr layout_type_parameter_element(doc->createElement(param->first)); + const ASTLiteral & literal = param->second->as(); + AutoPtr value(doc->createTextNode(toString(literal.value.get()))); + layout_type_parameter_element->appendChild(value); + layout_type_element->appendChild(layout_type_parameter_element); + } +} + +/* + * Transforms next definition + * RANGE(MIN StartDate, MAX EndDate) + * to the next configuration + * StartDate + * EndDate + */ +void buildRangeConfiguration(AutoPtr doc, AutoPtr root, const ASTDictionaryRange * range) +{ + // appends value to root + auto appendElem = [&doc, &root](const std::string & key, const std::string & value) { + AutoPtr element(doc->createElement(key)); + AutoPtr name(doc->createElement("name")); + AutoPtr text(doc->createTextNode(value)); + name->appendChild(text); + element->appendChild(name); + root->appendChild(element); + }; + + appendElem("range_min", range->min_attr_name); + appendElem("range_max", range->max_attr_name); +} + + +/// Get primary key columns names from AST +Names getPrimaryKeyColumns(const ASTExpressionList * primary_key) +{ + Names result; + const auto & children = primary_key->children; + + for (size_t index = 0; index != children.size(); ++index) + { + const ASTIdentifier * key_part = children[index]->as(); + result.push_back(key_part->name); + } + return result; +} + +/** + * Transofrms single dictionary attribute to configuration + * third_column UInt8 DEFAULT 2 EXPRESSION rand() % 100 * 77 + * to + * + * third_column + * UInt8 + * 2 + * (rand() % 100) * 77 + * + */ +void buildSingleAttribute( + AutoPtr doc, + AutoPtr root, + const ASTDictionaryAttributeDeclaration * dict_attr) +{ + AutoPtr attribute_element(doc->createElement("attribute")); + root->appendChild(attribute_element); + + AutoPtr name_element(doc->createElement("name")); + AutoPtr name(doc->createTextNode(dict_attr->name)); + name_element->appendChild(name); + attribute_element->appendChild(name_element); + + AutoPtr type_element(doc->createElement("type")); + AutoPtr type(doc->createTextNode(queryToString(dict_attr->type))); + type_element->appendChild(type); + attribute_element->appendChild(type_element); + + AutoPtr null_value_element(doc->createElement("null_value")); + String null_value_str; + if (dict_attr->default_value) + null_value_str = queryToString(dict_attr->default_value); + AutoPtr null_value(doc->createTextNode(null_value_str)); + null_value_element->appendChild(null_value); + attribute_element->appendChild(null_value_element); + + if (dict_attr->expression != nullptr) + { + AutoPtr expression_element(doc->createElement("expression")); + AutoPtr expression(doc->createTextNode(queryToString(dict_attr->expression))); + expression_element->appendChild(expression); + attribute_element->appendChild(expression_element); + } + + if (dict_attr->hierarchical) + { + AutoPtr hierarchical_element(doc->createElement("hierarchical")); + AutoPtr hierarchical(doc->createTextNode("true")); + hierarchical_element->appendChild(hierarchical); + attribute_element->appendChild(hierarchical_element); + } + + if (dict_attr->injective) + { + AutoPtr injective_element(doc->createElement("injective")); + AutoPtr injective(doc->createTextNode("true")); + injective_element->appendChild(injective); + attribute_element->appendChild(injective_element); + } + + if (dict_attr->is_object_id) + { + AutoPtr is_object_id_element(doc->createElement("is_object_id")); + AutoPtr is_object_id(doc->createTextNode("true")); + is_object_id_element->appendChild(is_object_id); + attribute_element->appendChild(is_object_id_element); + } +} + + +/** + * Transforms + * PRIMARY KEY Attr1 ,..., AttrN + * to the next configuration + * Attr1 + * or + * + * + * Attr1 + * UInt8 + * + * ... + * fe + * + * + */ +void buildPrimaryKeyConfiguration( + AutoPtr doc, + AutoPtr root, + bool complex, + const Names & key_names, + const ASTExpressionList * dictionary_attributes) +{ + if (!complex) + { + if (key_names.size() != 1) + throw Exception("Primary key for simple dictionary must contain exactly one element", + ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + + AutoPtr id_element(doc->createElement("id")); + root->appendChild(id_element); + AutoPtr name_element(doc->createElement("name")); + id_element->appendChild(name_element); + AutoPtr name(doc->createTextNode(*key_names.begin())); + name_element->appendChild(name); + } + else + { + const auto & children = dictionary_attributes->children; + if (children.size() < key_names.size()) + throw Exception( + "Primary key fields count is more, than dictionary attributes count.", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + + AutoPtr key_element(doc->createElement("key")); + root->appendChild(key_element); + for (const auto & key_name : key_names) + { + bool found = false; + for (const auto & attr : children) + { + const ASTDictionaryAttributeDeclaration * dict_attr = attr->as(); + if (dict_attr->name == key_name) + { + found = true; + buildSingleAttribute(doc, key_element, dict_attr); + break; + } + } + } + } +} + + +/** + * Transforms list of ASTDictionaryAttributeDeclarations to list of dictionary attributes + */ +void buildDictionaryAttributesConfiguration( + AutoPtr doc, + AutoPtr root, + const ASTExpressionList * dictionary_attributes, + const Names & key_columns) +{ + const auto & children = dictionary_attributes->children; + for (size_t i = 0; i < children.size(); ++i) + { + const ASTDictionaryAttributeDeclaration * dict_attr = children[i]->as(); + if (!dict_attr->type) + throw Exception("Dictionary attribute must has type", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + + if (std::find(key_columns.begin(), key_columns.end(), dict_attr->name) == key_columns.end()) + buildSingleAttribute(doc, root, dict_attr); + + } +} + +/** Transform function with key-value arguments to configuration + * (used for source transformation) + */ +void buildConfigurationFromFunctionWithKeyValueArguments( + AutoPtr doc, + AutoPtr root, + const ASTExpressionList * ast_expr_list) +{ + const auto & children = ast_expr_list->children; + for (size_t i = 0; i != children.size(); ++i) + { + const ASTPair * pair = children[i]->as(); + AutoPtr current_xml_element(doc->createElement(pair->first)); + root->appendChild(current_xml_element); + + if (auto identifier = pair->second->as(); identifier) + { + AutoPtr value(doc->createTextNode(identifier->name)); + current_xml_element->appendChild(value); + } + else if (auto literal = pair->second->as(); literal) + { + String str_literal = applyVisitor(FieldVisitorToString(), literal->value); + AutoPtr value(doc->createTextNode(unescapeString(str_literal))); + current_xml_element->appendChild(value); + } + else if (auto list = pair->second->as(); list) + { + buildConfigurationFromFunctionWithKeyValueArguments(doc, current_xml_element, list); + } + else + { + throw Exception( + "Incorrect ASTPair contains wrong value, should be literal, identifier or list", + ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + } + } +} + +/** Build source definition from ast. + * SOURCE(MYSQL(HOST 'localhost' PORT 9000 USER 'default' REPLICA(HOST '127.0.0.1' PRIORITY 1) PASSWORD '')) + * to + * + * + * localhost + * ... + * + * 127.0.0.1 + * ... + * + * + * + */ +void buildSourceConfiguration(AutoPtr doc, AutoPtr root, const ASTFunctionWithKeyValueArguments * source) +{ + AutoPtr outer_element(doc->createElement("source")); + root->appendChild(outer_element); + AutoPtr source_element(doc->createElement(source->name)); + outer_element->appendChild(source_element); + buildConfigurationFromFunctionWithKeyValueArguments(doc, source_element, source->elements->as()); +} + +void checkAST(const ASTCreateQuery & query) +{ + if (!query.is_dictionary || query.dictionary == nullptr) + throw Exception("Cannot convert dictionary to configuration from non-dictionary AST.", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + + if (query.dictionary_attributes_list == nullptr || query.dictionary_attributes_list->children.empty()) + throw Exception("Dictionary AST missing attributes list.", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + + if (query.dictionary->layout == nullptr) + throw Exception("Cannot create dictionary with empty layout.", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + + if (query.dictionary->lifetime == nullptr) + throw Exception("Dictionary AST missing lifetime section", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + + if (query.dictionary->primary_key == nullptr) + throw Exception("Dictionary AST missing primary key", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + + if (query.dictionary->source == nullptr) + throw Exception("Dictionary AST missing source", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + + /// Range can be empty +} + +} + + +DictionaryConfigurationPtr getDictionaryConfigurationFromAST(const ASTCreateQuery & query) +{ + + checkAST(query); + + AutoPtr xml_document(new Poco::XML::Document()); + AutoPtr document_root(xml_document->createElement("dictionaries")); + xml_document->appendChild(document_root); + AutoPtr current_dictionary(xml_document->createElement("dictionary")); + document_root->appendChild(current_dictionary); + AutoPtr conf(new Poco::Util::XMLConfiguration()); + + AutoPtr name_element(xml_document->createElement("name")); + current_dictionary->appendChild(name_element); + AutoPtr name(xml_document->createTextNode(query.database + "." + query.table)); + name_element->appendChild(name); + + AutoPtr structure_element(xml_document->createElement("structure")); + current_dictionary->appendChild(structure_element); + Names pk_columns = getPrimaryKeyColumns(query.dictionary->primary_key); + auto dictionary_layout = query.dictionary->layout; + + bool complex = startsWith(dictionary_layout->layout_type, "complex"); + + buildDictionaryAttributesConfiguration(xml_document, structure_element, query.dictionary_attributes_list, pk_columns); + + buildPrimaryKeyConfiguration(xml_document, structure_element, complex, pk_columns, query.dictionary_attributes_list); + + buildLayoutConfiguration(xml_document, current_dictionary, dictionary_layout); + buildSourceConfiguration(xml_document, current_dictionary, query.dictionary->source); + buildLifetimeConfiguration(xml_document, current_dictionary, query.dictionary->lifetime); + + if (query.dictionary->range) + buildRangeConfiguration(xml_document, current_dictionary, query.dictionary->range); + + conf->load(xml_document); + return conf; +} + +} diff --git a/dbms/src/Dictionaries/tests/gtest_dictionary_configuration.cpp b/dbms/src/Dictionaries/tests/gtest_dictionary_configuration.cpp new file mode 100644 index 00000000000..b2fbdf70479 --- /dev/null +++ b/dbms/src/Dictionaries/tests/gtest_dictionary_configuration.cpp @@ -0,0 +1,198 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace DB; + +/// For debug +std::string configurationToString(const DictionaryConfigurationPtr & config) +{ + const Poco::Util::XMLConfiguration * xml_config = dynamic_cast(config.get()); + std::ostringstream oss; + xml_config->save(oss); + return oss.str(); +} + +TEST(ConvertDictionaryAST, SimpleDictConfiguration) +{ + String input = " CREATE DICTIONARY test.dict1" + " (" + " key_column UInt64 DEFAULT 0," + " second_column UInt8 DEFAULT 1," + " third_column UInt8 DEFAULT 2" + " )" + " PRIMARY KEY key_column" + " SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' PASSWORD '' DB 'test' TABLE 'table_for_dict'))" + " LAYOUT(FLAT())" + " LIFETIME(MIN 1 MAX 10)" + " RANGE(MIN second_column MAX third_column)"; + + ParserCreateDictionaryQuery parser; + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0); + ASTCreateQuery * create = ast->as(); + DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create); + + /// name + EXPECT_EQ(config->getString("dictionary.name"), "test.dict1"); + + /// lifetime + EXPECT_EQ(config->getInt("dictionary.lifetime.min"), 1); + EXPECT_EQ(config->getInt("dictionary.lifetime.max"), 10); + + /// range + EXPECT_EQ(config->getString("dictionary.range_min"), "second_column"); + EXPECT_EQ(config->getString("dictionary.range_max"), "third_column"); + + /// source + EXPECT_EQ(config->getString("dictionary.source.clickhouse.host"), "localhost"); + EXPECT_EQ(config->getInt("dictionary.source.clickhouse.port"), 9000); + EXPECT_EQ(config->getString("dictionary.source.clickhouse.user"), "default"); + EXPECT_EQ(config->getString("dictionary.source.clickhouse.password"), ""); + EXPECT_EQ(config->getString("dictionary.source.clickhouse.db"), "test"); + EXPECT_EQ(config->getString("dictionary.source.clickhouse.table"), "table_for_dict"); + + /// attributes and key + Poco::Util::AbstractConfiguration::Keys keys; + config->keys("dictionary.structure", keys); + + EXPECT_EQ(keys.size(), 3); + EXPECT_EQ(config->getString("dictionary.structure." + keys[0] + ".name"), "second_column"); + EXPECT_EQ(config->getString("dictionary.structure." + keys[0] + ".type"), "UInt8"); + EXPECT_EQ(config->getInt("dictionary.structure." + keys[0] + ".null_value"), 1); + + EXPECT_EQ(config->getString("dictionary.structure." + keys[1] + ".name"), "third_column"); + EXPECT_EQ(config->getString("dictionary.structure." + keys[1] + ".type"), "UInt8"); + EXPECT_EQ(config->getInt("dictionary.structure." + keys[1] + ".null_value"), 2); + + EXPECT_EQ(keys[2], "id"); + EXPECT_EQ(config->getString("dictionary.structure." + keys[2] + ".name"), "key_column"); + + /// layout + EXPECT_TRUE(config->has("dictionary.layout.flat")); +} + + +TEST(ConvertDictionaryAST, TrickyAttributes) +{ + String input = " CREATE DICTIONARY dict2" + " (" + " key_column UInt64 IS_OBJECT_ID," + " second_column UInt8 HIERARCHICAL INJECTIVE," + " third_column UInt8 DEFAULT 2 EXPRESSION rand() % 100 * 77" + " )" + " PRIMARY KEY key_column" + " LAYOUT(hashed())" + " LIFETIME(MIN 1 MAX 10)" + " SOURCE(CLICKHOUSE(HOST 'localhost'))"; + + ParserCreateDictionaryQuery parser; + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0); + ASTCreateQuery * create = ast->as(); + DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create); + + Poco::Util::AbstractConfiguration::Keys keys; + config->keys("dictionary.structure", keys); + + EXPECT_EQ(keys.size(), 3); + EXPECT_EQ(config->getString("dictionary.structure." + keys[0] + ".name"), "second_column"); + EXPECT_EQ(config->getString("dictionary.structure." + keys[0] + ".type"), "UInt8"); + EXPECT_EQ(config->getString("dictionary.structure." + keys[0] + ".null_value"), ""); + EXPECT_EQ(config->getString("dictionary.structure." + keys[0] + ".hierarchical"), "true"); + EXPECT_EQ(config->getString("dictionary.structure." + keys[0] + ".injective"), "true"); + + EXPECT_EQ(config->getString("dictionary.structure." + keys[1] + ".name"), "third_column"); + EXPECT_EQ(config->getString("dictionary.structure." + keys[1] + ".type"), "UInt8"); + EXPECT_EQ(config->getInt("dictionary.structure." + keys[1] + ".null_value"), 2); + EXPECT_EQ(config->getString("dictionary.structure." + keys[1] + ".expression"), "(rand() % 100) * 77"); + + EXPECT_EQ(keys[2], "id"); + EXPECT_EQ(config->getString("dictionary.structure." + keys[2] + ".name"), "key_column"); +} + + +TEST(ConvertDictionaryAST, ComplexKeyAndLayoutWithParams) +{ + String input = " CREATE DICTIONARY dict4" + " (" + " key_column1 String," + " key_column2 UInt64," + " third_column UInt8," + " fourth_column UInt8" + " )" + " PRIMARY KEY key_column1, key_column2" + " SOURCE(MYSQL())" + " LAYOUT(COMPLEX_KEY_CACHE(size_in_cells 50))" + " LIFETIME(MIN 1 MAX 10)"; + + ParserCreateDictionaryQuery parser; + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0); + ASTCreateQuery * create = ast->as(); + DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create); + + Poco::Util::AbstractConfiguration::Keys keys; + config->keys("dictionary.structure.key", keys); + + EXPECT_EQ(keys.size(), 2); + EXPECT_EQ(config->getString("dictionary.structure.key." + keys[0] + ".name"), "key_column1"); + EXPECT_EQ(config->getString("dictionary.structure.key." + keys[0] + ".type"), "String"); + + EXPECT_EQ(config->getString("dictionary.structure.key." + keys[1] + ".name"), "key_column2"); + EXPECT_EQ(config->getString("dictionary.structure.key." + keys[1] + ".type"), "UInt64"); + + Poco::Util::AbstractConfiguration::Keys attrs; + config->keys("dictionary.structure", attrs); + + EXPECT_EQ(attrs.size(), 3); + EXPECT_EQ(config->getString("dictionary.structure." + attrs[0] + ".name"), "third_column"); + EXPECT_EQ(config->getString("dictionary.structure." + attrs[0] + ".type"), "UInt8"); + + EXPECT_EQ(config->getString("dictionary.structure." + attrs[1] + ".name"), "fourth_column"); + EXPECT_EQ(config->getString("dictionary.structure." + attrs[1] + ".type"), "UInt8"); + + EXPECT_EQ(attrs[2], "key"); + + EXPECT_EQ(config->getInt("dictionary.layout.complex_key_cache.size_in_cells"), 50); +} + + +TEST(ConvertDictionaryAST, ComplexSource) +{ + String input = " CREATE DICTIONARY dict4" + " (" + " key_column UInt64," + " second_column UInt8," + " third_column UInt8" + " )" + " PRIMARY KEY key_column" + " SOURCE(MYSQL(HOST 'localhost' PORT 9000 USER 'default' REPLICA(HOST '127.0.0.1' PRIORITY 1) PASSWORD ''))" + " LAYOUT(CACHE(size_in_cells 50))" + " LIFETIME(MIN 1 MAX 10)" + " RANGE(MIN second_column MAX third_column)"; + + ParserCreateDictionaryQuery parser; + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0); + ASTCreateQuery * create = ast->as(); + DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create); + /// source + EXPECT_EQ(config->getString("dictionary.source.mysql.host"), "localhost"); + EXPECT_EQ(config->getInt("dictionary.source.mysql.port"), 9000); + EXPECT_EQ(config->getString("dictionary.source.mysql.user"), "default"); + EXPECT_EQ(config->getString("dictionary.source.mysql.password"), ""); + EXPECT_EQ(config->getString("dictionary.source.mysql.replica.host"), "127.0.0.1"); + EXPECT_EQ(config->getInt("dictionary.source.mysql.replica.priority"), 1); +}