Fix topLevelDomain() for IDN hosts

This commit is contained in:
Azat Khuzhin 2021-06-09 02:30:29 +03:00
parent 38ac83dff9
commit e0c1780370
4 changed files with 12 additions and 3 deletions

View File

@ -28,7 +28,10 @@ struct ExtractTopLevelDomain
return;
/// For IPv4 addresses select nothing.
if (last_dot[1] <= '9')
///
/// NOTE: it is safe to access last_dot[1]
/// since getURLHost() will not return a host if there is symbol after dot.
if (isNumericASCII(last_dot[1]))
return;
res_data = last_dot + 1;

View File

@ -35,6 +35,9 @@ ru
com
com
com
рф
====PATH====
П
%D%9

View File

@ -38,6 +38,9 @@ SELECT topLevelDomain('svn+ssh://example.ru.?q=hello%20world') AS Domain;
SELECT topLevelDomain('//www.example.com') AS Domain;
SELECT topLevelDomain('www.example.com') as Domain;
SELECT topLevelDomain('example.com') as Domain;
SELECT topLevelDomain('example.рф') as Domain;
SELECT topLevelDomain('example.') as Domain;
SELECT topLevelDomain('example') as Domain;
SELECT '====PATH====';
SELECT decodeURLComponent('%D0%9F');

View File

@ -1,5 +1,5 @@
ru 262914 69218
92101 89421
91872 89417
com 63298 30285
ua 29037 17475
html 25079 15039
@ -53,6 +53,7 @@ eu 237 234
liveinteria 218 218
to 215 213
mamba 214 214
рф 209 204
auto-supers 208 208
sberbank 207 207
tj 205 205
@ -97,4 +98,3 @@ loveplaceOfSearchplus 111 111
nl 111 111
bstatistika 107 107
br 102 102
sport 99 99