2018-11-25 23:24:26 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <common/find_symbols.h>
|
2019-06-20 09:12:49 +00:00
|
|
|
#include "domain.h"
|
|
|
|
#include "tldLookup.h"
|
2018-11-25 23:24:26 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
struct ExtractFirstSignificantSubdomain
|
|
|
|
{
|
|
|
|
static size_t getReserveLengthForElement() { return 10; }
|
|
|
|
|
|
|
|
static void execute(const Pos data, const size_t size, Pos & res_data, size_t & res_size, Pos * out_domain_end = nullptr)
|
|
|
|
{
|
|
|
|
res_data = data;
|
|
|
|
res_size = 0;
|
|
|
|
|
|
|
|
Pos tmp;
|
|
|
|
size_t domain_length;
|
|
|
|
ExtractDomain<true>::execute(data, size, tmp, domain_length);
|
|
|
|
|
|
|
|
if (domain_length == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (out_domain_end)
|
|
|
|
*out_domain_end = tmp + domain_length;
|
|
|
|
|
|
|
|
/// cut useless dot
|
|
|
|
if (tmp[domain_length - 1] == '.')
|
|
|
|
--domain_length;
|
|
|
|
|
|
|
|
res_data = tmp;
|
|
|
|
res_size = domain_length;
|
|
|
|
|
|
|
|
auto begin = tmp;
|
|
|
|
auto end = begin + domain_length;
|
|
|
|
const char * last_3_periods[3]{};
|
|
|
|
|
|
|
|
auto pos = find_first_symbols<'.'>(begin, end);
|
|
|
|
while (pos < end)
|
|
|
|
{
|
|
|
|
last_3_periods[2] = last_3_periods[1];
|
|
|
|
last_3_periods[1] = last_3_periods[0];
|
|
|
|
last_3_periods[0] = pos;
|
|
|
|
pos = find_first_symbols<'.'>(pos + 1, end);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!last_3_periods[0])
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (!last_3_periods[1])
|
|
|
|
{
|
|
|
|
res_size = last_3_periods[0] - begin;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!last_3_periods[2])
|
|
|
|
last_3_periods[2] = begin - 1;
|
|
|
|
|
2019-04-30 06:50:17 +00:00
|
|
|
auto end_of_level_domain = find_first_symbols<'/'>(last_3_periods[0], end);
|
2019-04-18 03:49:24 +00:00
|
|
|
if (!end_of_level_domain)
|
|
|
|
{
|
|
|
|
end_of_level_domain = end;
|
|
|
|
}
|
2019-04-17 12:27:15 +00:00
|
|
|
|
2020-03-23 02:12:31 +00:00
|
|
|
if (tldLookup::isValid(last_3_periods[1] + 1, end_of_level_domain - last_3_periods[1] - 1) != nullptr)
|
2019-04-18 03:49:24 +00:00
|
|
|
{
|
2019-04-17 06:21:26 +00:00
|
|
|
res_data += last_3_periods[2] + 1 - begin;
|
|
|
|
res_size = last_3_periods[1] - last_3_periods[2] - 1;
|
2019-04-26 05:01:02 +00:00
|
|
|
}
|
2019-04-18 03:49:24 +00:00
|
|
|
else
|
|
|
|
{
|
2019-04-17 06:21:26 +00:00
|
|
|
res_data += last_3_periods[1] + 1 - begin;
|
|
|
|
res_size = last_3_periods[0] - last_3_periods[1] - 1;
|
2019-04-18 03:49:24 +00:00
|
|
|
}
|
2019-04-17 06:21:26 +00:00
|
|
|
}
|
2018-11-25 23:24:26 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
|