From 219de205e39e508825f6cfe7ee1f110d98a321c8 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Tue, 20 Feb 2018 09:34:50 +0800 Subject: [PATCH] ISSUES-995 fix cut www --- dbms/src/Functions/FunctionsURL.h | 43 +++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/dbms/src/Functions/FunctionsURL.h b/dbms/src/Functions/FunctionsURL.h index 67b2df2fc30..0614f66a809 100644 --- a/dbms/src/Functions/FunctionsURL.h +++ b/dbms/src/Functions/FunctionsURL.h @@ -396,18 +396,39 @@ struct ExtractWWW Pos pos = data; Pos end = pos + size; - Pos tmp; - size_t protocol_length; - ExtractProtocol::execute(data, size, tmp, protocol_length); - pos += protocol_length + 3; - - if (pos >= end || pos[-1] != '/' || pos[-2] != '/') - return; - - if (pos + 4 < end && !strncmp(pos, "www.", 4)) + if (nullptr != (pos = strchr(pos, '/'))) { - res_data = pos; - res_size = 4; + if (pos != data) + { + Pos tmp; + size_t protocol_length; + ExtractProtocol::execute(data, size, tmp, protocol_length); + + if (pos != data + protocol_length + 1) + return; + } + + if (end - pos < 2 || *(pos++) != '/' || *(pos++) != '/') + return; + + const char *st = pos; + for (; pos < end; ++pos) + { + if (*pos == '@') + { + st = pos + 1; + } else if (*pos == ':' || *pos == '/' || *pos == '?' || *pos == '#') + { + break; + } + } + + + if (st + 4 < end && !strncmp(st, "www.", 4)) + { + res_data = st; + res_size = 4; + } } } };