Trying to do everything in one pass

This commit is contained in:
alesapin 2019-06-25 18:54:47 +03:00
parent fff18f78db
commit ff72cf4893
2 changed files with 43 additions and 13 deletions

View File

@ -3,6 +3,7 @@
#include "protocol.h" #include "protocol.h"
#include <common/find_symbols.h> #include <common/find_symbols.h>
#include <cstring> #include <cstring>
#include <Common/StringUtils/StringUtils.h>
namespace DB namespace DB
@ -31,22 +32,51 @@ inline StringRef getURLHost(const char * data, size_t size)
Pos pos = data; Pos pos = data;
Pos end = data + size; Pos end = data + size;
Pos slash_pos = find_first_symbols<'/'>(pos, end); if (*pos == '/' && *(pos + 1) == '/')
if (slash_pos < end - 1 && *(slash_pos + 1) == '/') pos += 2;
pos = slash_pos + 2; else if (isAlphaASCII(*pos)) /// Slightly modified getURLScheme
else
pos = data;
if (pos != data)
{ {
StringRef scheme = getURLScheme(data, pos - data - 2); for (++pos; pos < end; ++pos)
Pos scheme_end = data + scheme.size; {
if (scheme.size && (pos - scheme_end != 3 || *scheme_end != ':')) if (!isAlphaNumericASCII(*pos))
return StringRef{}; {
switch(*pos)
{
case '.':
case '-':
case '+':
break;
case ' ': /// restricted symbols
case '\t':
case '<':
case '>':
case '%':
case '{':
case '}':
case '|':
case '\\':
case '^':
case '~':
case '[':
case ']':
case ';':
case '=':
case '&':
return StringRef{};
default:
goto exit_loop;
}
}
}
exit_loop:;
if (end - pos > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos + 2) == '/')
pos += 3;
else
pos = data;
} }
auto start_of_host = pos;
Pos dot_pos = nullptr; Pos dot_pos = nullptr;
auto start_of_host = pos;
for (; pos < end; ++pos) for (; pos < end; ++pos)
{ {
switch (*pos) switch (*pos)

View File

@ -1,4 +1,4 @@
4508175 712434 4508153 712428
auto.ru 576845 8935 auto.ru 576845 8935
yandex.ru 410776 111278 yandex.ru 410776 111278
korer.ru 277987 0 korer.ru 277987 0