mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-21 17:20:50 +00:00
Merge pull request #3628 from yandex/first-significant-subdomain-performance
Attempt to fight back performance of firstSignificantSubdomain function
This commit is contained in:
commit
2e15e52b74
@ -202,18 +202,28 @@ struct ExtractFirstSignificantSubdomain
|
||||
if (!last_3_periods[2])
|
||||
last_3_periods[2] = begin - 1;
|
||||
|
||||
if (!strncmp(last_3_periods[1] + 1, "com.", 4) /// Note that in ColumnString every value has zero byte after it.
|
||||
|| !strncmp(last_3_periods[1] + 1, "net.", 4)
|
||||
|| !strncmp(last_3_periods[1] + 1, "org.", 4)
|
||||
|| !strncmp(last_3_periods[1] + 1, "co.", 3)
|
||||
|| !strncmp(last_3_periods[1] + 1, "biz.", 4)
|
||||
|| !strncmp(last_3_periods[1] + 1, "gov.", 4)
|
||||
|| !strncmp(last_3_periods[1] + 1, "mil.", 4)
|
||||
|| !strncmp(last_3_periods[1] + 1, "edu.", 4))
|
||||
size_t size_of_second_subdomain_plus_period = last_3_periods[0] - last_3_periods[1];
|
||||
if (size_of_second_subdomain_plus_period == 4 || size_of_second_subdomain_plus_period == 3)
|
||||
{
|
||||
res_data += last_3_periods[2] + 1 - begin;
|
||||
res_size = last_3_periods[1] - last_3_periods[2] - 1;
|
||||
return;
|
||||
/// We will key by four bytes that are either ".xyz" or ".xy.".
|
||||
UInt32 key = unalignedLoad<UInt32>(last_3_periods[1]);
|
||||
|
||||
/// NOTE: assuming little endian.
|
||||
/// NOTE: does the compiler generate SIMD code?
|
||||
/// NOTE: for larger amount of cases we can use a perfect hash table (see 'gperf' as an example).
|
||||
if ( key == '.' + 'c' * 0x100U + 'o' * 0x10000U + 'm' * 0x1000000U
|
||||
|| key == '.' + 'n' * 0x100U + 'e' * 0x10000U + 't' * 0x1000000U
|
||||
|| key == '.' + 'o' * 0x100U + 'r' * 0x10000U + 'g' * 0x1000000U
|
||||
|| key == '.' + 'b' * 0x100U + 'i' * 0x10000U + 'z' * 0x1000000U
|
||||
|| key == '.' + 'g' * 0x100U + 'o' * 0x10000U + 'v' * 0x1000000U
|
||||
|| key == '.' + 'm' * 0x100U + 'i' * 0x10000U + 'l' * 0x1000000U
|
||||
|| key == '.' + 'e' * 0x100U + 'd' * 0x10000U + 'u' * 0x1000000U
|
||||
|| key == '.' + 'c' * 0x100U + 'o' * 0x10000U + '.' * 0x1000000U)
|
||||
{
|
||||
res_data += last_3_periods[2] + 1 - begin;
|
||||
res_size = last_3_periods[1] - last_3_periods[2] - 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
res_data += last_3_periods[1] + 1 - begin;
|
||||
|
Loading…
Reference in New Issue
Block a user