From ae90ba08a667b1668f2c0be46684e9126324f2da Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 5 Apr 2014 04:47:36 +0400 Subject: [PATCH] dbms: OLAP compatibility: added support for 'regions_point_of_view' setting [#METR-10715]. --- dbms/src/Server/OLAPQueryConverter.cpp | 96 +++++++++++++++++--------- dbms/src/Server/OLAPQueryConverter.h | 48 ++++++++++--- dbms/src/Server/OLAPQueryParser.cpp | 4 ++ dbms/src/Server/OLAPQueryParser.h | 3 + 4 files changed, 107 insertions(+), 44 deletions(-) diff --git a/dbms/src/Server/OLAPQueryConverter.cpp b/dbms/src/Server/OLAPQueryConverter.cpp index 9fb4b3b3d4b..c4e1791c03f 100644 --- a/dbms/src/Server/OLAPQueryConverter.cpp +++ b/dbms/src/Server/OLAPQueryConverter.cpp @@ -19,7 +19,7 @@ QueryConverter::QueryConverter(Poco::Util::AbstractConfiguration & config) attribute_metadatas = GetOLAPAttributeMetadata(); } -static std::string FirstWord(std::string s) +static std::string firstWord(std::string s) { for (size_t i = 0; i < s.length(); ++i) { @@ -32,8 +32,18 @@ static std::string FirstWord(std::string s) return s; } -void QueryConverter::OLAPServerQueryToClickhouse(const QueryParseResult & query, Context & inout_context, std::string & out_query) +void QueryConverter::OLAPServerQueryToClickhouse(const QueryParseResult & query, Context & inout_context, std::string & out_query) const { + /// Пустая строка, или строка вида ", 'ua'". + std::string regions_point_of_view_formatted; + + if (!query.regions_point_of_view.empty()) + { + std::stringstream tmp; + tmp << ", " << mysqlxx::quote << query.regions_point_of_view; + regions_point_of_view_formatted = tmp.str(); + } + /// Проверим, умеем ли мы выполнять такой запрос. if (query.format != FORMAT_TAB) throw Exception("Only tab-separated output format is supported", ErrorCodes::UNSUPPORTED_PARAMETER); @@ -75,22 +85,22 @@ void QueryConverter::OLAPServerQueryToClickhouse(const QueryParseResult & query, for (size_t i = 0; i < query.key_attributes.size(); ++i) { const QueryParseResult::KeyAttribute & key = query.key_attributes[i]; - std::string s = convertAttributeFormatted(key.attribute, key.parameter); + std::string s = convertAttributeFormatted(key.attribute, key.parameter, regions_point_of_view_formatted); if (i > 0) out_query += ", "; - out_query += s + " AS _" + FirstWord(key.attribute) + (key.parameter ? "_" + toString(key.parameter) : ""); + out_query += s + " AS _" + firstWord(key.attribute) + (key.parameter ? "_" + toString(key.parameter) : ""); selected_expressions.push_back(s); } for (size_t i = 0; i < query.aggregates.size(); ++i) { const QueryParseResult::Aggregate & aggregate = query.aggregates[i]; - std::string s = convertAggregateFunction(aggregate.attribute, aggregate.parameter, aggregate.function, query); + std::string s = convertAggregateFunction(aggregate.attribute, aggregate.parameter, aggregate.function, query, regions_point_of_view_formatted); if (query.key_attributes.size() + i > 0) out_query += ", "; - out_query += s + " AS _" + FirstWord(aggregate.function) + "_" + FirstWord(aggregate.attribute) + (aggregate.parameter ? "_" + toString(aggregate.parameter) : ""); + out_query += s + " AS _" + firstWord(aggregate.function) + "_" + firstWord(aggregate.attribute) + (aggregate.parameter ? "_" + toString(aggregate.parameter) : ""); selected_expressions.push_back(s); } @@ -115,7 +125,8 @@ void QueryConverter::OLAPServerQueryToClickhouse(const QueryParseResult & query, for (size_t i = 0; i < query.where_conditions.size(); ++i) { const QueryParseResult::WhereCondition & condition = query.where_conditions[i]; - out_query += " AND " + convertCondition(condition.attribute, condition.parameter, condition.relation, condition.rhs); + out_query += " AND " + convertCondition( + condition.attribute, condition.parameter, condition.relation, condition.rhs, regions_point_of_view_formatted); } /// Группировка. @@ -153,17 +164,24 @@ void QueryConverter::OLAPServerQueryToClickhouse(const QueryParseResult & query, out_query += " LIMIT " + toString(query.limit); } -std::string QueryConverter::convertAttributeFormatted(const std::string & attribute, unsigned parameter) +std::string QueryConverter::convertAttributeFormatted(const std::string & attribute, unsigned parameter, + const std::string & regions_point_of_view_formatted) const { if (formatted_attribute_map.count(attribute)) - return Poco::format(formatted_attribute_map[attribute], parameter); + return Poco::format(formatted_attribute_map.at(attribute), parameter); + /** Для атрибутов по регионам, выражение содержит подстановку %s, + * куда должна быть подставлена regions_point_of_view_formatted. + */ + if (regions_attributes_set.count(attribute)) + return Poco::format(numeric_attribute_map.at(attribute), regions_point_of_view_formatted); + if (numeric_attribute_map.count(attribute)) { - std::string numeric = Poco::format(numeric_attribute_map[attribute], parameter); + std::string numeric = Poco::format(numeric_attribute_map.at(attribute), parameter); if (formatting_aggregated_attribute_map.count(attribute)) - return Poco::format(formatting_aggregated_attribute_map[attribute], std::string("(") + numeric + ")"); + return Poco::format(formatting_aggregated_attribute_map.at(attribute), std::string("(") + numeric + ")"); else return numeric; } @@ -171,10 +189,17 @@ std::string QueryConverter::convertAttributeFormatted(const std::string & attrib throw Exception("Unknown attribute: " + attribute, ErrorCodes::UNKNOWN_IDENTIFIER); } -std::string QueryConverter::convertAttributeNumeric(const std::string & attribute, unsigned parameter) +std::string QueryConverter::convertAttributeNumeric(const std::string & attribute, unsigned parameter, + const std::string & regions_point_of_view_formatted) const { + /** Для атрибутов по регионам, выражение содержит подстановку %s, + * куда должна быть подставлена regions_point_of_view_formatted. + */ + if (regions_attributes_set.count(attribute)) + return Poco::format(numeric_attribute_map.at(attribute), regions_point_of_view_formatted); + if (numeric_attribute_map.count(attribute)) - return Poco::format(numeric_attribute_map[attribute], parameter); + return Poco::format(numeric_attribute_map.at(attribute), parameter); throw Exception("Unknown attribute: " + attribute, ErrorCodes::UNKNOWN_IDENTIFIER); } @@ -185,7 +210,7 @@ static bool StartsWith(const std::string & str, const std::string & prefix) } std::string QueryConverter::convertAggregateFunction(const std::string & attribute, unsigned parameter, const std::string & name, - const QueryParseResult & query) + const QueryParseResult & query, const std::string & regions_point_of_view_formatted) const { bool float_value = false; @@ -201,7 +226,7 @@ std::string QueryConverter::convertAggregateFunction(const std::string & attribu return "sum(Sign)"; } - std::string numeric = convertAttributeNumeric(attribute, parameter); + std::string numeric = convertAttributeNumeric(attribute, parameter, regions_point_of_view_formatted); if (name == "uniq" || name == "uniq_sort" || @@ -247,7 +272,7 @@ std::string QueryConverter::convertAggregateFunction(const std::string & attribu std::string format; if (formatting_aggregated_attribute_map.count(attribute)) { - format = formatting_aggregated_attribute_map[attribute]; + format = formatting_aggregated_attribute_map.at(attribute); trivial_format = false; } else @@ -305,16 +330,21 @@ std::string QueryConverter::convertAggregateFunction(const std::string & attribu return Poco::format(format, std::string() + (need_cast ? "toInt64" : "") + "(" + s + ")"); } -std::string QueryConverter::convertConstant(const std::string & attribute, const std::string & value) +std::string QueryConverter::convertConstant(const std::string & attribute, const std::string & value) const { if (!attribute_metadatas.count(attribute)) throw Exception("Unknown attribute " + attribute, ErrorCodes::UNKNOWN_IDENTIFIER); - return toString(attribute_metadatas[attribute]->parse(value)); + return toString(attribute_metadatas.at(attribute)->parse(value)); } -std::string QueryConverter::convertCondition(const std::string & attribute, unsigned parameter, const std::string & name, const std::string & rhs) +std::string QueryConverter::convertCondition( + const std::string & attribute, + unsigned parameter, + const std::string & name, + const std::string & rhs, + const std::string & regions_point_of_view_formatted) const { - std::string value = convertAttributeNumeric(attribute, parameter); + std::string value = convertAttributeNumeric(attribute, parameter, regions_point_of_view_formatted); std::string constant = convertConstant(attribute, rhs); if (name == "equals") @@ -330,9 +360,9 @@ std::string QueryConverter::convertCondition(const std::string & attribute, unsi if (name == "greater_or_equals") return "(" + value + ")" + " >= " + constant; if (name == "region_in") - return "regionIn(" + value + ", toUInt32(" + constant + "))"; + return "regionIn(" + value + ", toUInt32(" + constant + ")" + regions_point_of_view_formatted + ")"; if (name == "region_not_in") - return "NOT regionIn(" + value + ", toUInt32(" + constant + "))"; + return "NOT regionIn(" + value + ", toUInt32(" + constant + ")" + regions_point_of_view_formatted + ")"; if (name == "os_in") return "OSIn(" + value + ", " + constant + ")"; if (name == "os_not_in") @@ -356,7 +386,7 @@ std::string QueryConverter::convertCondition(const std::string & attribute, unsi throw Exception("Unknown relation " + name, ErrorCodes::UNKNOWN_RELATION); } -std::string QueryConverter::convertSortDirection(const std::string & direction) +std::string QueryConverter::convertSortDirection(const std::string & direction) const { if (direction == "descending") return "DESC"; @@ -364,7 +394,7 @@ std::string QueryConverter::convertSortDirection(const std::string & direction) return "ASC"; } -std::string QueryConverter::convertDateRange(time_t date_first, time_t date_last) +std::string QueryConverter::convertDateRange(time_t date_first, time_t date_last) const { std::string first_str; std::string last_str; @@ -377,12 +407,12 @@ std::string QueryConverter::convertDateRange(time_t date_first, time_t date_last return "StartDate >= toDate('" + first_str + "') AND StartDate <= toDate('" + last_str + "')"; } -std::string QueryConverter::convertCounterID(CounterID_t CounterID) +std::string QueryConverter::convertCounterID(CounterID_t CounterID) const { return "CounterID == " + toString(CounterID); } -std::string QueryConverter::getTableName(CounterID_t CounterID, bool local) +std::string QueryConverter::getTableName(CounterID_t CounterID, bool local) const { if (CounterID == 0 && !local) return table_for_all_counters; @@ -390,7 +420,7 @@ std::string QueryConverter::getTableName(CounterID_t CounterID, bool local) return table_for_single_counter; } -std::string QueryConverter::getHavingSection() +std::string QueryConverter::getHavingSection() const { return "HAVING sum(Sign) > 0"; } @@ -449,13 +479,13 @@ void QueryConverter::fillNumericAttributeMap() M("StartURLHash", "NormalizedStartURLHash") M("StartURLDomainHash", "StartURLDomainHash") M("RegionID", "RegionID") - M("RegionCity", "regionToCity(RegionID)") - M("RegionArea", "regionToArea(RegionID)") - M("RegionCountry", "regionToCountry(RegionID)") + M("RegionCity", "regionToCity(RegionID%s)") + M("RegionArea", "regionToArea(RegionID%s)") + M("RegionCountry", "regionToCountry(RegionID%s)") M("URLRegionID", "URLRegions[0]") - M("URLRegionCity", "regionToCity(URLRegions[0])") - M("URLRegionArea", "regionToArea(URLRegions[0])") - M("URLRegionCountry", "regionToCountry(URLRegions[0])") + M("URLRegionCity", "regionToCity(URLRegions[0]%s)") + M("URLRegionArea", "regionToArea(URLRegions[0]%s)") + M("URLRegionCountry", "regionToCountry(URLRegions[0]%s)") M("URLCategoryID", "URLCategories[0]") M("URLCategoryMostAncestor", "categoryToRoot(URLCategories[0])") M("URLCategorySecondLevel", "categoryToSecondLevel(URLCategories[0])") diff --git a/dbms/src/Server/OLAPQueryConverter.h b/dbms/src/Server/OLAPQueryConverter.h index 2804507f852..9eea3a2bc4a 100644 --- a/dbms/src/Server/OLAPQueryConverter.h +++ b/dbms/src/Server/OLAPQueryConverter.h @@ -18,29 +18,41 @@ public: QueryConverter(Poco::Util::AbstractConfiguration & config); /// Получает из запроса в формате OLAP-server запрос и настройки для clickhouse. - void OLAPServerQueryToClickhouse(const QueryParseResult & query, Context & inout_context, std::string & out_query); + void OLAPServerQueryToClickhouse(const QueryParseResult & query, Context & inout_context, std::string & out_query) const; + private: /// Значение атрибута, подходящее для вывода в ответ и для группировки по нему. - std::string convertAttributeFormatted(const std::string & attribute, unsigned parameter); + std::string convertAttributeFormatted(const std::string & attribute, unsigned parameter, const std::string & regions_point_of_view_formatted) const; + /// Числовое значение атрибута, подходящее для подстановки в условия, агрегатные функции и ключи сортировки. - std::string convertAttributeNumeric(const std::string & attribute, unsigned parameter); + std::string convertAttributeNumeric(const std::string & attribute, unsigned parameter, const std::string & regions_point_of_view_formatted) const; /// => SELECT x std::string convertAggregateFunction(const std::string & attribute, unsigned parameter, const std::string & function, - const QueryParseResult & query); + const QueryParseResult & query, const std::string & regions_point_of_view_formatted) const; + /// => SELECT ... where F(A, x) - std::string convertConstant(const std::string & attribute, const std::string & value); + std::string convertConstant(const std::string & attribute, const std::string & value) const; + /// => SELECT ... WHERE x - std::string convertCondition(const std::string & attribute, unsigned parameter, const std::string & relation, const std::string & rhs); + std::string convertCondition( + const std::string & attribute, + unsigned parameter, + const std::string & relation, + const std::string & rhs, + const std::string & regions_point_of_view_formatted) const; + /// ASC или DESC - std::string convertSortDirection(const std::string & direction); + std::string convertSortDirection(const std::string & direction) const; + /// => SELECT ... WHERE x - std::string convertDateRange(time_t date_first, time_t date_last); + std::string convertDateRange(time_t date_first, time_t date_last) const; + /// => SELECT ... WHERE x - std::string convertCounterID(CounterID_t CounterID); + std::string convertCounterID(CounterID_t CounterID) const; - std::string getTableName(CounterID_t CounterID, bool local); - std::string getHavingSection(); + std::string getTableName(CounterID_t CounterID, bool local) const; + std::string getHavingSection() const; void fillFormattedAttributeMap(); void fillNumericAttributeMap(); @@ -51,10 +63,24 @@ private: /// Форматная строка для convertAttributeNumeric. Есть для всех атрибутов. std::map numeric_attribute_map; + /// Форматная строка для получения выводимого значения из агрегированного числового значения. std::map formatting_aggregated_attribute_map; + /// Форматная строка для convertAttributeFormatted. std::map formatted_attribute_map; + + /// Список атрибутов-регионов, для которых нужна передача параметра regions_point_of_view. + std::set regions_attributes_set = + { + "RegionCity", + "RegionArea", + "RegionCountry", + "URLRegionCity", + "URLRegionArea", + "URLRegionCountry" + }; + /// Парсеры значений атрибутов. AttributeMetadatas attribute_metadatas; }; diff --git a/dbms/src/Server/OLAPQueryParser.cpp b/dbms/src/Server/OLAPQueryParser.cpp index b0e997fb9ce..27dc221a4de 100644 --- a/dbms/src/Server/OLAPQueryParser.cpp +++ b/dbms/src/Server/OLAPQueryParser.cpp @@ -180,6 +180,10 @@ QueryParseResult QueryParser::parse(std::istream & s) if (result.sample <= 0 || result.sample > 1.) throw Exception(std::string("Wrong sample = ") + DB::toString(result.sample) + ". Sampling must be in range (0, 1]"); } + else if (settings_child_nodes->item(i)->nodeName() == "regions_point_of_view") + { + result.regions_point_of_view = settings_child_nodes->item(i)->innerText(); + } } } diff --git a/dbms/src/Server/OLAPQueryParser.h b/dbms/src/Server/OLAPQueryParser.h index 22abb12c289..0efb9fd412c 100644 --- a/dbms/src/Server/OLAPQueryParser.h +++ b/dbms/src/Server/OLAPQueryParser.h @@ -100,6 +100,9 @@ struct QueryParseResult std::vector aggregates; std::vector where_conditions; std::vector sort_columns; + + /// Какую иерархию регионов использовать. + std::string regions_point_of_view; };