2012-12-14 13:31:48 +00:00
|
|
|
#include "OLAPQueryConverter.h"
|
2012-12-17 15:48:24 +00:00
|
|
|
#include <DB/IO/WriteHelpers.h>
|
|
|
|
#include <DB/IO/WriteBufferFromString.h>
|
|
|
|
#include <Poco/NumberFormatter.h>
|
2012-12-14 13:31:48 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
namespace OLAP
|
|
|
|
{
|
|
|
|
|
|
|
|
QueryConverter::QueryConverter(Poco::Util::AbstractConfiguration & config)
|
|
|
|
{
|
2012-12-17 15:48:24 +00:00
|
|
|
table_for_single_counter = config.getString("olap_table_for_single_counter");
|
|
|
|
table_for_all_counters = config.getString("olap_table_for_all_counters");
|
2012-12-19 11:20:46 +00:00
|
|
|
|
|
|
|
fillFormattedAttributeMap();
|
|
|
|
fillNumericAttributeMap();
|
|
|
|
fillFormattingAggregatedAttributeMap();
|
2012-12-20 11:18:54 +00:00
|
|
|
attribute_metadatas = GetOLAPAttributeMetadata();
|
|
|
|
}
|
|
|
|
|
|
|
|
static std::string FirstWord(std::string s)
|
|
|
|
{
|
|
|
|
for (size_t i = 0; i < s.length(); ++i)
|
|
|
|
{
|
|
|
|
if ((s[i] < 'a' || s[i] > 'z') && (s[i] < 'A' || s[i] > 'Z'))
|
|
|
|
{
|
|
|
|
s.erase(s.begin() + i, s.end());
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return s;
|
2012-12-14 13:31:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void QueryConverter::OLAPServerQueryToClickhouse(const QueryParseResult & query, Context & inout_context, std::string & out_query)
|
|
|
|
{
|
2012-12-17 15:48:24 +00:00
|
|
|
/// Проверим, умеем ли мы выполнять такой запрос.
|
|
|
|
if (query.is_list_of_visits_query)
|
|
|
|
throw Exception("List of visits queries not supported", ErrorCodes::UNSUPPORTED_PARAMETER);
|
|
|
|
if (query.format != FORMAT_TAB)
|
|
|
|
throw Exception("Only tab-separated output format is supported", ErrorCodes::UNSUPPORTED_PARAMETER);
|
|
|
|
|
|
|
|
/// Учтем некоторые настройки (пока далеко не все).
|
|
|
|
|
2012-12-14 13:31:48 +00:00
|
|
|
Settings new_settings = inout_context.getSettings();
|
|
|
|
|
|
|
|
if (query.concurrency != 0)
|
|
|
|
new_settings.max_threads = query.concurrency;
|
|
|
|
|
|
|
|
inout_context.setSettings(new_settings);
|
|
|
|
|
2012-12-17 15:48:24 +00:00
|
|
|
/// Составим запрос.
|
|
|
|
out_query = "SELECT ";
|
|
|
|
|
2012-12-19 11:20:46 +00:00
|
|
|
std::vector<std::string> selected_expressions;
|
|
|
|
|
2012-12-17 15:48:24 +00:00
|
|
|
/// Что выбирать: ключи агрегации и агрегированные значения.
|
|
|
|
for (size_t i = 0; i < query.key_attributes.size(); ++i)
|
|
|
|
{
|
|
|
|
const QueryParseResult::KeyAttribute & key = query.key_attributes[i];
|
2012-12-19 11:20:46 +00:00
|
|
|
std::string s = convertAttributeFormatted(key.attribute, key.parameter);
|
2012-12-17 15:48:24 +00:00
|
|
|
|
|
|
|
if (i > 0)
|
|
|
|
out_query += ", ";
|
2012-12-28 10:50:46 +00:00
|
|
|
out_query += s + " AS _" + FirstWord(key.attribute);
|
2012-12-19 11:20:46 +00:00
|
|
|
selected_expressions.push_back(s);
|
2012-12-17 15:48:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t i = 0; i < query.aggregates.size(); ++i)
|
|
|
|
{
|
|
|
|
const QueryParseResult::Aggregate & aggregate = query.aggregates[i];
|
2012-12-19 11:20:46 +00:00
|
|
|
std::string s = convertAggregateFunction(aggregate.attribute, aggregate.parameter, aggregate.function);
|
2012-12-17 15:48:24 +00:00
|
|
|
|
2012-12-20 11:36:15 +00:00
|
|
|
if (query.key_attributes.size() + i > 0)
|
2012-12-17 15:48:24 +00:00
|
|
|
out_query += ", ";
|
2012-12-28 10:50:46 +00:00
|
|
|
out_query += s + " AS _" + FirstWord(aggregate.function) + "_" + FirstWord(aggregate.attribute);
|
2012-12-19 11:20:46 +00:00
|
|
|
selected_expressions.push_back(s);
|
2012-12-17 15:48:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Из какой таблицы.
|
|
|
|
out_query += " FROM " + getTableName(query.CounterID);
|
|
|
|
|
|
|
|
/// Условия.
|
|
|
|
out_query += " WHERE ";
|
|
|
|
|
|
|
|
/// Диапазон дат.
|
|
|
|
out_query += convertDateRange(query.date_first, query.date_last);
|
|
|
|
|
|
|
|
/// Счетчик.
|
|
|
|
if (query.CounterID != 0)
|
|
|
|
out_query += " AND " + convertCounterID(query.CounterID);
|
|
|
|
|
|
|
|
/// Произвольные условия.
|
|
|
|
for (size_t i = 0; i < query.where_conditions.size(); ++i)
|
|
|
|
{
|
|
|
|
const QueryParseResult::WhereCondition & condition = query.where_conditions[i];
|
|
|
|
out_query += " AND " + convertCondition(condition.attribute, condition.parameter, condition.relation, condition.rhs);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Группировка.
|
|
|
|
if (!query.key_attributes.empty())
|
|
|
|
{
|
|
|
|
out_query += " GROUP BY ";
|
|
|
|
for (size_t i = 0; i < query.key_attributes.size(); ++i)
|
|
|
|
{
|
|
|
|
if (i > 0)
|
|
|
|
out_query += ", ";
|
2012-12-19 11:20:46 +00:00
|
|
|
out_query += selected_expressions[i];
|
2012-12-17 15:48:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Условие для групп.
|
|
|
|
out_query += " " + getHavingSection();
|
|
|
|
|
|
|
|
/// Сортировка.
|
|
|
|
if (!query.sort_columns.empty())
|
|
|
|
{
|
|
|
|
out_query += " ORDER BY ";
|
|
|
|
for (size_t i = 0; i < query.sort_columns.size(); ++i)
|
|
|
|
{
|
|
|
|
const QueryParseResult::SortColumn & column = query.sort_columns[i];
|
|
|
|
|
|
|
|
if (i > 0)
|
|
|
|
out_query += ", ";
|
2012-12-19 11:20:46 +00:00
|
|
|
out_query += selected_expressions[column.index];
|
2012-12-17 15:48:24 +00:00
|
|
|
out_query += " " + convertSortDirection(column.direction);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Ограничение на количество выводимых строк.
|
|
|
|
if (query.limit != 0)
|
|
|
|
out_query += " LIMIT " + Poco::NumberFormatter::format(query.limit);
|
|
|
|
}
|
|
|
|
|
2012-12-19 11:20:46 +00:00
|
|
|
std::string QueryConverter::convertAttributeFormatted(const std::string & attribute, unsigned parameter)
|
2012-12-17 15:48:24 +00:00
|
|
|
{
|
2012-12-19 11:20:46 +00:00
|
|
|
if (formatted_attribute_map.count(attribute))
|
|
|
|
Poco::format(formatted_attribute_map[attribute], parameter);
|
|
|
|
|
|
|
|
if (numeric_attribute_map.count(attribute))
|
|
|
|
{
|
|
|
|
std::string numeric = Poco::format(numeric_attribute_map[attribute], parameter);
|
|
|
|
|
|
|
|
if (formatting_aggregated_attribute_map.count(attribute))
|
|
|
|
return Poco::format(formatting_aggregated_attribute_map[attribute], std::string("(") + numeric + ")");
|
|
|
|
else
|
|
|
|
return numeric;
|
|
|
|
}
|
|
|
|
|
|
|
|
throw Exception("Unknown attribute: " + attribute, ErrorCodes::UNKNOWN_IDENTIFIER);
|
2012-12-17 15:48:24 +00:00
|
|
|
}
|
|
|
|
|
2012-12-19 11:20:46 +00:00
|
|
|
std::string QueryConverter::convertAttributeNumeric(const std::string & attribute, unsigned parameter)
|
2012-12-17 15:48:24 +00:00
|
|
|
{
|
2012-12-19 11:20:46 +00:00
|
|
|
if (numeric_attribute_map.count(attribute))
|
|
|
|
return Poco::format(numeric_attribute_map[attribute], parameter);
|
|
|
|
|
|
|
|
throw Exception("Unknown attribute: " + attribute, ErrorCodes::UNKNOWN_IDENTIFIER);
|
2012-12-17 15:48:24 +00:00
|
|
|
}
|
|
|
|
|
2012-12-20 11:18:54 +00:00
|
|
|
static bool StartsWith(const std::string & str, const std::string & prefix)
|
|
|
|
{
|
|
|
|
return str.length() >= prefix.length() && str.substr(0, prefix.length()) == prefix;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string QueryConverter::convertAggregateFunction(const std::string & attribute, unsigned parameter, const std::string & name)
|
2012-12-17 15:48:24 +00:00
|
|
|
{
|
2012-12-20 11:18:54 +00:00
|
|
|
if (name == "count")
|
2012-12-28 10:50:46 +00:00
|
|
|
return "sum(Sign)";
|
2012-12-20 11:18:54 +00:00
|
|
|
|
|
|
|
std::string numeric = convertAttributeNumeric(attribute, parameter);
|
|
|
|
|
|
|
|
if (name == "uniq" ||
|
|
|
|
name == "uniq_sort" ||
|
|
|
|
name == "uniq_hash" ||
|
|
|
|
name == "uniq_approx" ||
|
|
|
|
name == "sequental_uniq" ||
|
|
|
|
StartsWith(name, "uniq_approx"))
|
|
|
|
return "uniq(" + numeric + ")";
|
|
|
|
|
|
|
|
if (name == "count_non_zero")
|
2012-12-28 11:27:02 +00:00
|
|
|
return "sum((" + numeric + ") == 0 ? toInt64(0) : toInt64(Sign))";
|
2012-12-20 11:18:54 +00:00
|
|
|
if (name == "count_non_minus_one")
|
2012-12-28 11:27:02 +00:00
|
|
|
return "sum((" + numeric + ") == -1 ? toInt64(0) : toInt64(Sign))";
|
2012-12-20 11:18:54 +00:00
|
|
|
|
2012-12-28 11:44:53 +00:00
|
|
|
bool trivial_format;
|
|
|
|
|
2012-12-20 11:32:52 +00:00
|
|
|
std::string format;
|
|
|
|
if (formatting_aggregated_attribute_map.count(attribute))
|
2012-12-28 11:44:53 +00:00
|
|
|
{
|
2012-12-20 11:32:52 +00:00
|
|
|
format = formatting_aggregated_attribute_map[attribute];
|
2012-12-28 11:44:53 +00:00
|
|
|
trivial_format = false;
|
|
|
|
}
|
2012-12-20 11:32:52 +00:00
|
|
|
else
|
2012-12-28 11:44:53 +00:00
|
|
|
{
|
2012-12-20 11:32:52 +00:00
|
|
|
format = "%s";
|
2012-12-28 11:44:53 +00:00
|
|
|
trivial_format = true;
|
|
|
|
}
|
2012-12-20 11:32:52 +00:00
|
|
|
|
2012-12-20 11:18:54 +00:00
|
|
|
std::string s;
|
2012-12-28 11:44:53 +00:00
|
|
|
bool float_value = false;
|
2012-12-20 11:18:54 +00:00
|
|
|
|
|
|
|
if (name == "sum")
|
2012-12-28 10:50:46 +00:00
|
|
|
s = "sum((" + numeric + ") * Sign)";
|
2012-12-20 11:18:54 +00:00
|
|
|
if (name == "sum_non_minus_one")
|
2012-12-28 11:27:02 +00:00
|
|
|
s = "sum((" + numeric + ") == -1 ? toInt64(0) : toInt64(" + numeric + ") * Sign)";
|
2012-12-20 11:18:54 +00:00
|
|
|
if (name == "avg")
|
2012-12-28 11:44:53 +00:00
|
|
|
{
|
2012-12-28 10:50:46 +00:00
|
|
|
s = "sum((" + numeric + ") * Sign) / sum(Sign)";
|
2012-12-28 11:44:53 +00:00
|
|
|
float_value = true;
|
|
|
|
}
|
2012-12-20 11:18:54 +00:00
|
|
|
if (name == "avg_non_zero")
|
2012-12-28 11:44:53 +00:00
|
|
|
{
|
2012-12-28 11:27:02 +00:00
|
|
|
s = "sum((" + numeric + ") * Sign) / sum((" + numeric + ") == 0 ? toInt64(0) : toInt64(Sign))";
|
2012-12-28 11:44:53 +00:00
|
|
|
float_value = true;
|
|
|
|
}
|
2012-12-20 11:18:54 +00:00
|
|
|
if (name == "avg_non_minus_one")
|
2012-12-28 11:44:53 +00:00
|
|
|
{
|
2012-12-28 11:27:02 +00:00
|
|
|
s = "sum((" + numeric + ") == -1 ? toInt64(0) : toInt64(" + numeric + ") * Sign) / sum((" + numeric + ") == -1 ? toInt64(0) : toInt64(Sign))";
|
2012-12-28 11:44:53 +00:00
|
|
|
float_value = true;
|
|
|
|
}
|
2012-12-20 11:18:54 +00:00
|
|
|
if (name == "min")
|
|
|
|
s = "min(" + numeric + ")";
|
|
|
|
if (name == "max")
|
|
|
|
s = "max(" + numeric + ")";
|
|
|
|
|
2012-12-28 11:44:53 +00:00
|
|
|
/// Если агрегатная функция возвращает дробное число, и атрибут имеет нетривиальное форматирование, после агрегации приведем дробное число к целому.
|
|
|
|
bool need_cast = !trivial_format && float_value;
|
|
|
|
|
|
|
|
return Poco::format(format, std::string() + (need_cast ? "toInt64" : "") + "(" + s + ")");
|
2012-12-17 15:48:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
std::string QueryConverter::convertConstant(const std::string & attribute, const std::string & value)
|
|
|
|
{
|
2012-12-20 11:18:54 +00:00
|
|
|
if (!attribute_metadatas.count(attribute))
|
|
|
|
throw Exception("Unknown attribute " + attribute, ErrorCodes::UNKNOWN_IDENTIFIER);
|
|
|
|
return Poco::NumberFormatter::format(attribute_metadatas[attribute]->parse(value));
|
2012-12-17 15:48:24 +00:00
|
|
|
}
|
|
|
|
|
2012-12-20 11:18:54 +00:00
|
|
|
std::string QueryConverter::convertCondition(const std::string & attribute, unsigned parameter, const std::string & name, const std::string & rhs)
|
2012-12-17 15:48:24 +00:00
|
|
|
{
|
2012-12-20 11:18:54 +00:00
|
|
|
std::string value = convertAttributeNumeric(attribute, parameter);
|
|
|
|
std::string constant = convertConstant(attribute, rhs);
|
|
|
|
|
|
|
|
if (name == "equals")
|
|
|
|
return "(" + value + ")" + " == " + constant;
|
|
|
|
if (name == "not_equals")
|
|
|
|
return "(" + value + ")" + " != " + constant;
|
|
|
|
if (name == "less")
|
|
|
|
return "(" + value + ")" + " < " + constant;
|
|
|
|
if (name == "greater")
|
|
|
|
return "(" + value + ")" + " > " + constant;
|
|
|
|
if (name == "less_or_equals")
|
|
|
|
return "(" + value + ")" + " <= " + constant;
|
|
|
|
if (name == "greater_or_equals")
|
|
|
|
return "(" + value + ")" + " >= " + constant;
|
|
|
|
if (name == "region_in")
|
|
|
|
return "regionIn(" + value + ", " + constant + ")";
|
|
|
|
if (name == "region_not_in")
|
|
|
|
return "NOT regionIn(" + value + ", " + constant + ")";
|
|
|
|
if (name == "os_in")
|
2012-12-24 08:15:18 +00:00
|
|
|
return "OSIn(" + value + ", " + constant + ")";
|
2012-12-20 11:18:54 +00:00
|
|
|
if (name == "os_not_in")
|
2012-12-24 08:15:18 +00:00
|
|
|
return "NOT OSIn(" + value + ", " + constant + ")";
|
2012-12-20 11:18:54 +00:00
|
|
|
if (name == "se_in")
|
2012-12-24 08:15:18 +00:00
|
|
|
return "SEIn(toUInt8(" + value + "), toUInt8(" + constant + "))";
|
2012-12-20 11:18:54 +00:00
|
|
|
if (name == "se_not_in")
|
2012-12-24 08:15:18 +00:00
|
|
|
return "NOT SEIn(toUInt8(" + value + "), toUInt8(" + constant + "))";
|
2012-12-20 11:18:54 +00:00
|
|
|
if (name == "interest_has_all_from")
|
|
|
|
return "bitwiseAnd(" + value + ", " + constant + ") == " + constant;
|
|
|
|
if (name == "interest_not_has_all_from")
|
|
|
|
return "bitwiseAnd(" + value + ", " + constant + ") != " + constant;
|
|
|
|
if (name == "interest_has_any_from")
|
|
|
|
return "bitwiseAnd(" + value + ", " + constant + ") != 0";
|
|
|
|
if (name == "interest_not_has_any_from")
|
|
|
|
return "bitwiseAnd(" + value + ", " + constant + ") == 0";
|
|
|
|
throw Exception("Unknown relation " + name, ErrorCodes::UNKNOWN_RELATION);
|
2012-12-17 15:48:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
std::string QueryConverter::convertSortDirection(const std::string & direction)
|
|
|
|
{
|
|
|
|
if (direction == "descending")
|
|
|
|
return "DESC";
|
2012-12-19 11:20:46 +00:00
|
|
|
else
|
|
|
|
return "ASC";
|
2012-12-17 15:48:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
std::string QueryConverter::convertDateRange(time_t date_first, time_t date_last)
|
|
|
|
{
|
|
|
|
std::string first_str;
|
|
|
|
std::string last_str;
|
|
|
|
{
|
|
|
|
WriteBufferFromString first_buf(first_str);
|
|
|
|
WriteBufferFromString last_buf(last_str);
|
|
|
|
writeDateText(Yandex::DateLUTSingleton::instance().toDayNum(date_first), first_buf);
|
|
|
|
writeDateText(Yandex::DateLUTSingleton::instance().toDayNum(date_last), last_buf);
|
|
|
|
}
|
2012-12-20 11:39:56 +00:00
|
|
|
return "StartDate >= toDate('" + first_str + "') AND StartDate <= toDate('" + last_str + "')";
|
2012-12-17 15:48:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
std::string QueryConverter::convertCounterID(Yandex::CounterID_t CounterID)
|
|
|
|
{
|
2012-12-19 11:20:46 +00:00
|
|
|
return "CounterID == " + Poco::NumberFormatter::format(CounterID);
|
2012-12-17 15:48:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
std::string QueryConverter::getTableName(Yandex::CounterID_t CounterID)
|
|
|
|
{
|
|
|
|
if (CounterID == 0)
|
|
|
|
return table_for_all_counters;
|
|
|
|
else
|
|
|
|
return table_for_single_counter;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string QueryConverter::getHavingSection()
|
|
|
|
{
|
|
|
|
return "HAVING sum(Sign) > 0";
|
|
|
|
}
|
|
|
|
|
2012-12-19 11:20:46 +00:00
|
|
|
void QueryConverter::fillNumericAttributeMap()
|
2012-12-17 15:48:24 +00:00
|
|
|
{
|
2012-12-19 11:20:46 +00:00
|
|
|
#define M(a, b) numeric_attribute_map[a] = b;
|
2012-12-17 15:48:24 +00:00
|
|
|
M("Dummy", "0")
|
2012-12-19 11:20:46 +00:00
|
|
|
M("VisitStartDateTime", "toUInt32(StartTime)")
|
|
|
|
M("VisitStartDate", "toUInt32(toDateTime(StartDate))")
|
|
|
|
M("VisitStartWeek", "toUInt32(toDateTime(toMonday(StartDate)))")
|
|
|
|
M("VisitStartTime", "toUInt32(toTime(StartTime))")
|
|
|
|
|
|
|
|
M("VisitStartYear", "toYear(StartDate)")
|
|
|
|
M("VisitStartMonth", "toMonth(StartDate)")
|
|
|
|
M("VisitStartDayOfWeek", "toDayOfWeek(StartDate)")
|
|
|
|
M("VisitStartDayOfMonth", "toDayOfMonth(StartDate)")
|
|
|
|
M("VisitStartHour", "toHour(StartTime)")
|
|
|
|
M("VisitStartMinute", "toMinute(StartTime)")
|
|
|
|
M("VisitStartSecond", "toSecond(StartTime)")
|
|
|
|
|
|
|
|
M("FirstVisitDateTime", "toUInt32(FirstVisit)")
|
|
|
|
M("FirstVisitDate", "toUInt32(toDateTime(toDate(FirstVisit)))")
|
|
|
|
M("FirstVisitWeek", "toUInt32(toDateTime(toMonday(FirstVisit)))")
|
|
|
|
M("FirstVisitTime", "toUInt32(toTime(FirstVisit))")
|
|
|
|
|
|
|
|
M("FirstVisitYear", "toYear(FirstVisit)")
|
|
|
|
M("FirstVisitMonth", "toMonth(FirstVisit)")
|
|
|
|
M("FirstVisitDayOfWeek", "toDayOfWeek(FirstVisit)")
|
|
|
|
M("FirstVisitDayOfMonth", "toDayOfMonth(FirstVisit)")
|
|
|
|
M("FirstVisitHour", "toHour(FirstVisit)")
|
|
|
|
M("FirstVisitMinute", "toMinute(FirstVisit)")
|
|
|
|
M("FirstVisitSecond", "toSecond(FirstVisit)")
|
|
|
|
|
|
|
|
M("PredLastVisitDate", "toUInt32(toDateTime(PredLastVisit))")
|
|
|
|
M("PredLastVisitWeek", "toUInt32(toDateTime(toMonday(PredLastVisit)))")
|
|
|
|
M("PredLastVisitYear", "toYear(PredLastVisit)")
|
|
|
|
M("PredLastVisitMonth", "toMonth(PredLastVisit)")
|
|
|
|
M("PredLastVisitDayOfWeek","toDayOfWeek(PredLastVisit)")
|
|
|
|
M("PredLastVisitDayOfMonth","toDayOfMonth(PredLastVisit)")
|
|
|
|
|
|
|
|
M("ClientDateTime", "toUInt32(ClientEventTime)")
|
|
|
|
M("ClientTime", "toUInt32(toTime(ClientEventTime))")
|
|
|
|
M("ClientTimeHour", "toHour(ClientEventTime)")
|
|
|
|
M("ClientTimeMinute", "toMinute(ClientEventTime)")
|
|
|
|
M("ClientTimeSecond", "toSecond(ClientEventTime)")
|
|
|
|
|
|
|
|
M("EndURLHash", "halfMD5(EndURL)")
|
|
|
|
M("RefererHash", "halfMD5(Referer)")
|
|
|
|
M("SearchPhraseHash", "halfMD5(SearchPhrase)")
|
|
|
|
M("RefererDomainHash", "halfMD5(domainWithoutWWW(Referer))")
|
|
|
|
M("StartURLHash", "halfMD5(StartURL)")
|
|
|
|
M("StartURLDomainHash", "halfMD5(domainWithoutWWW(StartURL))")
|
|
|
|
M("RegionID", "RegionID")
|
|
|
|
M("RegionCity", "regionToCity(RegionID)")
|
|
|
|
M("RegionArea", "regionToArea(RegionID)")
|
|
|
|
M("RegionCountry", "regionToCountry(RegionID)")
|
|
|
|
M("TraficSourceID", "TraficSourceID")
|
|
|
|
M("IsNewUser", "FirstVisit == StartTime")
|
|
|
|
M("UserNewness", "intDiv(toUInt64(StartTime)-toUInt64(FirstVisit), 86400)")
|
|
|
|
M("UserNewnessInterval", "roundToExp2(intDiv(toUInt64(StartTime)-toUInt64(FirstVisit), 86400))")
|
|
|
|
M("UserReturnTime", "toUInt32(toDate(StartTime))-toUInt32(PredLastVisit)")
|
|
|
|
M("UserReturnTimeInterval","roundToExp2(toUInt32(toDate(StartTime))-toUInt32(PredLastVisit))")
|
|
|
|
M("UserVisitsPeriod", "(TotalVisits <= 1 ? toUInt16(0) : toUInt16((toUInt64(StartTime)-toUInt64(FirstVisit)) / (86400 * (TotalVisits - 1))))")
|
|
|
|
M("UserVisitsPeriodInterval","(TotalVisits <= 1 ? toUInt16(0) : roundToExp2(toUInt16((toUInt64(StartTime)-toUInt64(FirstVisit)) / (86400 * (TotalVisits - 1)))))")
|
|
|
|
M("VisitTime", "Duration")
|
|
|
|
M("VisitTimeInterval", "roundDuration(Duration)")
|
|
|
|
M("PageViews", "PageViews")
|
|
|
|
M("PageViewsInterval", "roundToExp2(PageViews)")
|
|
|
|
M("Bounce", "PageViews <= 1")
|
|
|
|
M("BouncePrecise", "IsBounce")
|
|
|
|
M("IsYandex", "IsYandex")
|
|
|
|
M("UserID", "UserID")
|
|
|
|
|
2012-12-28 11:15:10 +00:00
|
|
|
M("UserIDCreateDateTime", "(UserID > 10000000000000000000 OR UserID %% 10000000000 > 2000000000 OR UserID %% 10000000000 < 1000000000 ? toUInt64(0) : UserID %% 10000000000)")
|
|
|
|
M("UserIDCreateDate", "(UserID > 10000000000000000000 OR UserID %% 10000000000 > 2000000000 OR UserID %% 10000000000 < 1000000000 ? toUInt64(0) : UserID %% 10000000000)")
|
2012-12-19 11:20:46 +00:00
|
|
|
|
2012-12-28 11:15:10 +00:00
|
|
|
M("UserIDAge", "(UserID > 10000000000000000000 OR UserID %% 10000000000 < 1000000000 OR UserID %% 10000000000 > toUInt64(StartTime) ? toInt64(-1) : intDiv(toInt64(StartTime) - UserID %% 10000000000, 86400))")
|
|
|
|
M("UserIDAgeInterval", "(UserID > 10000000000000000000 OR UserID %% 10000000000 < 1000000000 OR UserID %% 10000000000 > toUInt64(StartTime) ? toInt64(-1) : toInt64(roundToExp2(intDiv(toUInt64(StartTime) - UserID %% 10000000000, 86400))))")
|
2012-12-19 11:20:46 +00:00
|
|
|
M("TotalVisits", "TotalVisits")
|
|
|
|
M("TotalVisitsInterval", "roundToExp2(TotalVisits)")
|
|
|
|
M("Age", "Age")
|
|
|
|
M("AgeInterval", "roundAge(Age)")
|
|
|
|
M("Sex", "Sex")
|
|
|
|
M("Income", "Income")
|
|
|
|
M("AdvEngineID", "AdvEngineID")
|
|
|
|
|
|
|
|
M("DotNet", "NetMajor * 256 + NetMinor")
|
|
|
|
|
|
|
|
M("DotNetMajor", "NetMajor")
|
|
|
|
|
|
|
|
M("Flash", "FlashMajor * 256 + FlashMinor")
|
|
|
|
|
|
|
|
M("FlashExists", "FlashMajor > 0")
|
|
|
|
M("FlashMajor", "FlashMajor")
|
|
|
|
|
|
|
|
M("Silverlight", "SilverlightVersion1 * 72057594037927936 + SilverlightVersion2 * 281474976710656 + SilverlightVersion3 * 65536 + SilverlightVersion4")
|
|
|
|
|
|
|
|
M("SilverlightMajor", "SilverlightVersion1")
|
|
|
|
M("Hits", "Hits")
|
|
|
|
M("HitsInterval", "roundToExp2(Hits)")
|
|
|
|
M("JavaEnable", "JavaEnable")
|
|
|
|
M("CookieEnable", "CookieEnable")
|
|
|
|
M("JavascriptEnable", "JavascriptEnable")
|
|
|
|
M("IsMobile", "IsMobile")
|
|
|
|
M("MobilePhoneID", "MobilePhone")
|
|
|
|
M("MobilePhoneModelHash", "halfMD5(MobilePhoneModel)")
|
|
|
|
|
|
|
|
M("MobilePhoneModel", "reinterpretAsUInt64(MobilePhoneModel)")
|
|
|
|
M("BrowserLanguage", "BrowserLanguage")
|
|
|
|
M("BrowserCountry", "BrowserCountry")
|
|
|
|
M("TopLevelDomain", "reinterpretAsUInt64(topLevelDomain(StartURL))")
|
|
|
|
M("URLScheme", "reinterpretAsUInt64(protocol(StartURL))")
|
|
|
|
|
|
|
|
M("IPNetworkID", "IPNetworkID")
|
|
|
|
M("ClientTimeZone", "ClientTimeZone")
|
|
|
|
M("OSID", "OS")
|
2012-12-24 08:15:18 +00:00
|
|
|
M("OSMostAncestor", "OSToRoot(OS)")
|
2012-12-19 11:20:46 +00:00
|
|
|
|
|
|
|
M("ClientIP", "ClientIP")
|
|
|
|
M("Resolution", "ResolutionWidth * 16777216 + ResolutionHeight * 256 + ResolutionDepth")
|
|
|
|
M("ResolutionWidthHeight","ResolutionWidth * 65536 + ResolutionHeight")
|
|
|
|
|
|
|
|
M("ResolutionWidth", "ResolutionWidth")
|
|
|
|
M("ResolutionHeight", "ResolutionHeight")
|
|
|
|
M("ResolutionWidthInterval","intDiv(ResolutionWidth, 100) * 100")
|
|
|
|
M("ResolutionHeightInterval","intDiv(ResolutionHeight, 100) * 100")
|
|
|
|
M("ResolutionColor", "ResolutionDepth")
|
|
|
|
|
|
|
|
M("WindowClientArea", "WindowClientWidth * 65536 + WindowClientHeight")
|
|
|
|
|
|
|
|
M("WindowClientAreaInterval","intDiv(WindowClientWidth, 100) * 6553600 + intDiv(WindowClientHeight, 100) * 100")
|
|
|
|
M("WindowClientWidth", "WindowClientWidth")
|
|
|
|
M("WindowClientWidthInterval","intDiv(WindowClientWidth, 100) * 100")
|
|
|
|
M("WindowClientHeight", "WindowClientHeight")
|
|
|
|
M("WindowClientHeightInterval","intDiv(WindowClientHeight, 100) * 100")
|
|
|
|
M("SearchEngineID", "SearchEngineID")
|
2012-12-24 08:15:18 +00:00
|
|
|
M("SEMostAncestor", "SEToRoot(toUInt8(SearchEngineID))")
|
2012-12-19 11:20:46 +00:00
|
|
|
M("CodeVersion", "CodeVersion")
|
|
|
|
|
|
|
|
M("UserAgent", "UserAgent * 16777216 + UserAgentMajor * 65536 + UserAgentMinor")
|
|
|
|
M("UserAgentVersion", "UserAgentMajor * 65536 + UserAgentMinor")
|
|
|
|
M("UserAgentMajor", "UserAgent * 256 + UserAgentMajor")
|
|
|
|
|
|
|
|
M("UserAgentID", "UserAgent")
|
|
|
|
M("ClickGoodEvent", "ClickGoodEvent")
|
|
|
|
M("ClickPriorityID", "ClickPriorityID")
|
|
|
|
M("ClickBannerID", "ClickBannerID")
|
|
|
|
M("ClickPhraseID", "ClickPhraseID")
|
|
|
|
M("ClickPageID", "ClickPageID")
|
|
|
|
M("ClickPlaceID", "ClickPlaceID")
|
|
|
|
M("ClickTypeID", "ClickTypeID")
|
|
|
|
M("ClickResourceID", "ClickResourceID")
|
|
|
|
M("ClickDomainID", "ClickDomainID")
|
|
|
|
M("ClickCost", "ClickCost")
|
|
|
|
M("ClickURLHash", "halfMD5(ClickURL)")
|
|
|
|
M("ClickOrderID", "ClickOrderID")
|
2012-12-27 14:54:35 +00:00
|
|
|
M("GoalReachesAny", "GoalReachesAny")
|
2012-12-26 17:13:23 +00:00
|
|
|
M("GoalReachesDepth", "GoalReachesDepth")
|
|
|
|
M("GoalReachesURL", "GoalReachesURL")
|
2012-12-28 11:29:01 +00:00
|
|
|
M("ConvertedAny", "(GoalReachesAny > 1 ? toInt32(1) : GoalReachesAny)")
|
|
|
|
M("ConvertedDepth", "(GoalReachesDepth > 1 ? toInt32(1) : GoalReachesDepth)")
|
|
|
|
M("ConvertedURL", "(GoalReachesURL > 1 ? toInt32(1) : GoalReachesURL)")
|
2012-12-28 11:30:58 +00:00
|
|
|
M("GoalReaches", "countEqual(GoalsReached, toUInt32(%u))")
|
|
|
|
M("Converted", "has(GoalsReached, toUInt32(%u))")
|
2012-12-19 11:20:46 +00:00
|
|
|
M("CounterID", "CounterID")
|
|
|
|
M("VisitID", "VisitID")
|
|
|
|
|
|
|
|
M("Interests", "Interests")
|
|
|
|
|
|
|
|
M("HasInterestPhoto", "modulo(intDiv(Interests, 128), 2)")
|
|
|
|
M("HasInterestMoviePremieres","modulo(intDiv(Interests, 64), 2)")
|
|
|
|
M("HasInterestTourism", "modulo(intDiv(Interests, 32), 2)")
|
|
|
|
M("HasInterestFamilyAndChildren","modulo(intDiv(Interests, 16), 2)")
|
|
|
|
M("HasInterestFinance", "modulo(intDiv(Interests, 8), 2)")
|
|
|
|
M("HasInterestB2B", "modulo(intDiv(Interests, 4), 2)")
|
|
|
|
M("HasInterestCars", "modulo(intDiv(Interests, 2), 2)")
|
|
|
|
M("HasInterestMobileAndInternetCommunications","modulo(Interests, 2)")
|
|
|
|
M("HasInterestBuilding", "modulo(intDiv(Interests, 256), 2)")
|
|
|
|
M("HasInterestCulinary", "modulo(intDiv(Interests, 512), 2)")
|
|
|
|
M("OpenstatServiceNameHash","halfMD5(OpenstatServiceName)")
|
|
|
|
M("OpenstatCampaignIDHash","halfMD5(OpenstatCampaignID)")
|
|
|
|
M("OpenstatAdIDHash", "halfMD5(OpenstatAdID)")
|
|
|
|
M("OpenstatSourceIDHash", "halfMD5(OpenstatSourceID)")
|
|
|
|
M("UTMSourceHash", "halfMD5(UTMSource)")
|
|
|
|
M("UTMMediumHash", "halfMD5(UTMMedium)")
|
|
|
|
M("UTMCampaignHash", "halfMD5(UTMCampaign)")
|
|
|
|
M("UTMContentHash", "halfMD5(UTMContent)")
|
|
|
|
M("UTMTermHash", "halfMD5(UTMTerm)")
|
|
|
|
M("FromHash", "halfMD5(FromTag)")
|
|
|
|
M("CLID", "CLID")
|
|
|
|
#undef M
|
|
|
|
}
|
|
|
|
|
2012-12-20 11:18:54 +00:00
|
|
|
void QueryConverter::fillFormattedAttributeMap()
|
2012-12-19 11:20:46 +00:00
|
|
|
{
|
2012-12-20 11:18:54 +00:00
|
|
|
#define M(a, b) formatted_attribute_map[a] = b;
|
2012-12-17 15:48:24 +00:00
|
|
|
M("VisitStartDateTime", "StartTime")
|
|
|
|
M("VisitStartDate", "StartDate")
|
|
|
|
M("VisitStartWeek", "toMonday(StartDate)")
|
|
|
|
M("VisitStartTime", "substring(toString(StartTime), 12, 8)")
|
|
|
|
|
2012-12-19 11:20:46 +00:00
|
|
|
M("FirstVisitDateTime", "FirstVisit")
|
|
|
|
M("FirstVisitDate", "toDate(FirstVisit)")
|
|
|
|
M("FirstVisitWeek", "toMonday(FirstVisit)")
|
|
|
|
M("FirstVisitTime", "substring(toString(FirstVisit), 12, 8)")
|
|
|
|
|
|
|
|
M("PredLastVisitDate", "PredLastVisit")
|
|
|
|
M("PredLastVisitWeek", "toMonday(PredLastVisit)")
|
|
|
|
|
|
|
|
M("ClientDateTime", "ClientEventTime")
|
|
|
|
M("ClientTime", "substring(toString(ClientEventTime), 12, 8)")
|
|
|
|
|
|
|
|
M("DotNet", "concat(concat(toString(NetMajor), '.'), toString(NetMinor))")
|
|
|
|
M("Flash", "concat(concat(toString(FlashMajor),'.'),toString(FlashMinor))")
|
|
|
|
M("Silverlight", "concat(concat(concat(concat(concat(concat(toString(SilverlightVersion1), '.'), toString(SilverlightVersion2)), '.'), toString(SilverlightVersion3)), '.'), toString(SilverlightVersion4))")
|
|
|
|
M("MobilePhoneModel", "MobilePhoneModel")
|
|
|
|
M("TopLevelDomain", "topLevelDomain(StartURL)")
|
|
|
|
M("URLScheme", "protocol(StartURL)")
|
2012-12-20 11:18:54 +00:00
|
|
|
|
2012-12-28 11:15:10 +00:00
|
|
|
M("ClientIP", "concat(concat(concat(concat(concat(concat(toString(intDiv(ClientIP, 16777216)),'.'),toString(intDiv(ClientIP, 65536) %% 256)),'.'),toString(intDiv(ClientIP, 256) %% 256)),'.'),toString(ClientIP %% 256))")
|
2012-12-19 11:20:46 +00:00
|
|
|
M("Resolution", "concat(concat(concat(concat(toString(ResolutionWidth),'x'),toString(ResolutionHeight)),'x'),toString(ResolutionDepth))")
|
|
|
|
M("ResolutionWidthHeight","concat(concat(toString(ResolutionWidth),'x'),toString(ResolutionHeight))")
|
2012-12-20 11:18:54 +00:00
|
|
|
|
2012-12-19 11:20:46 +00:00
|
|
|
M("WindowClientArea", "concat(concat(toString(WindowClientWidth),'x'),toString(WindowClientHeight))")
|
2012-12-20 11:18:54 +00:00
|
|
|
|
2012-12-26 15:03:27 +00:00
|
|
|
M("UserAgent", "concat(concat(concat(toString(UserAgent), ' '), toString(UserAgentMajor)), UserAgentMinor == 0 ? '' : concat('.', reinterpretAsString(UserAgentMinor)))")
|
|
|
|
M("UserAgentVersion", "concat(toString(UserAgentMajor), UserAgentMinor == 0 ? '' : concat('.', reinterpretAsString(UserAgentMinor)))")
|
2012-12-19 11:20:46 +00:00
|
|
|
M("UserAgentMajor", "concat(concat(toString(UserAgent), ' '), toString(UserAgentMajor))")
|
|
|
|
#undef M
|
|
|
|
}
|
|
|
|
|
2012-12-20 11:18:54 +00:00
|
|
|
void QueryConverter::fillFormattingAggregatedAttributeMap()
|
2012-12-19 11:20:46 +00:00
|
|
|
{
|
2012-12-20 11:18:54 +00:00
|
|
|
#define M(a, b) formatting_aggregated_attribute_map[a] = b;
|
2012-12-19 11:20:46 +00:00
|
|
|
std::string todate = "toDate(toDateTime(%s))";
|
|
|
|
std::string todatetime = "toDateTime(%s)";
|
|
|
|
std::string cuttime = "substring(toString(toDateTime(%s)), 12, 8)";
|
2012-12-26 15:03:27 +00:00
|
|
|
std::string tostring = "reinterpretAsString(%s)";
|
2012-12-19 11:20:46 +00:00
|
|
|
|
|
|
|
M("VisitStartDateTime", todatetime)
|
|
|
|
M("VisitStartDate", todate)
|
|
|
|
M("VisitStartWeek", todate)
|
|
|
|
M("VisitStartTime", cuttime)
|
|
|
|
|
|
|
|
M("FirstVisitDateTime", todatetime)
|
|
|
|
M("FirstVisitDate", todate)
|
|
|
|
M("FirstVisitWeek", todate)
|
|
|
|
M("FirstVisitTime", cuttime)
|
|
|
|
|
|
|
|
M("PredLastVisitDate", todate)
|
|
|
|
M("PredLastVisitWeek", todate)
|
|
|
|
|
2012-12-20 11:18:54 +00:00
|
|
|
M("ClientDateTime", todatetime)
|
|
|
|
M("ClientTime", cuttime)
|
2012-12-17 15:48:24 +00:00
|
|
|
|
2012-12-20 11:18:54 +00:00
|
|
|
M("UserIDCreateDateTime", todatetime)
|
|
|
|
M("UserIDCreateDate", todate)
|
2012-12-17 15:48:24 +00:00
|
|
|
|
2012-12-20 11:18:54 +00:00
|
|
|
M("DotNet", "concat(concat(toString(intDiv(toUInt32(%[0]s), 256)), '.'), toString(modulo(toUInt32(%[0]s), 256)))")
|
|
|
|
|
|
|
|
M("Flash", "concat(concat(toString(intDiv(toUInt32(%[0]s), 256)), '.'), toString(modulo(toUInt32(%[0]s), 256)))")
|
|
|
|
|
|
|
|
M("Silverlight", "concat(concat(concat(concat(concat(concat(toString(intDiv(toUInt64(%[0]s), 72057594037927936)), '.'), toString(modulo(intDiv(toUInt64(%[0]s), 281474976710656), 256))), '.'), toString(modulo(intDiv(toUInt64(%[0]s), 65536), 4294967296))), '.'), toString(modulo(toUInt64(%[0]s), 65536)))")
|
|
|
|
|
|
|
|
M("MobilePhoneModel", tostring)
|
|
|
|
M("BrowserLanguage", tostring)
|
|
|
|
M("BrowserCountry", tostring)
|
|
|
|
M("TopLevelDomain", tostring)
|
|
|
|
M("URLScheme", tostring)
|
|
|
|
|
2012-12-28 11:15:10 +00:00
|
|
|
M("ClientIP", "concat(concat(concat(concat(concat(concat(toString(intDiv(toUInt32(%[0]s), 16777216)),'.'),toString(intDiv(toUInt32(%[0]s), 65536) %% 256)),'.'),toString(intDiv(toUInt32(%[0]s), 256) %% 256)),'.'),toString(toUInt32(%[0]s) %% 256))")
|
|
|
|
M("Resolution", "concat(concat(concat(concat(toString(intDiv(toUInt64(%[0]s), 16777216)),'x'),toString(intDiv(toUInt64(%[0]s), 256) %% 65536)),'x'),toString(toUInt64(%[0]s) %% 256))")
|
|
|
|
M("ResolutionWidthHeight","concat(concat(toString(intDiv(toUInt64(%[0]s), 65536)),'x'),toString(toUInt64(%[0]s) %% 65536))")
|
2012-12-20 11:18:54 +00:00
|
|
|
|
2012-12-28 11:15:10 +00:00
|
|
|
M("WindowClientArea", "concat(concat(toString(intDiv(toUInt64(%[0]s), 65536)),'x'),toString(toUInt64(%[0]s) %% 65536))")
|
2012-12-20 11:18:54 +00:00
|
|
|
|
2012-12-28 11:15:10 +00:00
|
|
|
M("UserAgent", "concat(concat(concat(toString(intDiv(toUInt32(%[0]s), 16777216)), ' '), toString(intDiv(toUInt32(%[0]s), 65536) %% 256)), (toUInt32(%[0]s) %% 65536) == 0 ? '' : concat('.', reinterpretAsString(toUInt32(%[0]s) %% 65536)))")
|
|
|
|
M("UserAgentVersion", "concat(toString(intDiv(toUInt32(%[0]s), 65536)), (toUInt32(%[0]s) %% 65536) == 0 ? '' : concat('.', reinterpretAsString(toUInt32(%[0]s) %% 65536)))")
|
|
|
|
M("UserAgentMajor", "concat(concat(toString(intDiv(toUInt32(%[0]s), 256)), ' '), toString(toUInt32(%[0]s) %% 256))")
|
2012-12-20 11:18:54 +00:00
|
|
|
|
|
|
|
M("Interests", "bitmaskToList(%s)")
|
2012-12-17 15:48:24 +00:00
|
|
|
#undef M
|
2012-12-14 13:31:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|