diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h index 540072dd045..74a41811ebd 100644 --- a/dbms/src/Functions/URL/domain.h +++ b/dbms/src/Functions/URL/domain.h @@ -3,6 +3,7 @@ #include "protocol.h" #include #include +#include namespace DB @@ -31,22 +32,51 @@ inline StringRef getURLHost(const char * data, size_t size) Pos pos = data; Pos end = data + size; - Pos slash_pos = find_first_symbols<'/'>(pos, end); - if (slash_pos < end - 1 && *(slash_pos + 1) == '/') - pos = slash_pos + 2; - else - pos = data; - - if (pos != data) + if (*pos == '/' && *(pos + 1) == '/') + pos += 2; + else if (isAlphaASCII(*pos)) /// Slightly modified getURLScheme { - StringRef scheme = getURLScheme(data, pos - data - 2); - Pos scheme_end = data + scheme.size; - if (scheme.size && (pos - scheme_end != 3 || *scheme_end != ':')) - return StringRef{}; + for (++pos; pos < end; ++pos) + { + if (!isAlphaNumericASCII(*pos)) + { + switch(*pos) + { + case '.': + case '-': + case '+': + break; + case ' ': /// restricted symbols + case '\t': + case '<': + case '>': + case '%': + case '{': + case '}': + case '|': + case '\\': + case '^': + case '~': + case '[': + case ']': + case ';': + case '=': + case '&': + return StringRef{}; + default: + goto exit_loop; + } + } + } + exit_loop:; + if (end - pos > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos + 2) == '/') + pos += 3; + else + pos = data; } - auto start_of_host = pos; Pos dot_pos = nullptr; + auto start_of_host = pos; for (; pos < end; ++pos) { switch (*pos) diff --git a/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference b/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference index 05e97417263..364115011f9 100644 --- a/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference +++ b/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference @@ -1,4 +1,4 @@ - 4508175 712434 + 4508153 712428 auto.ru 576845 8935 yandex.ru 410776 111278 korer.ru 277987 0