From d3657d311b976280388e8c4c1bb878d15d1977be Mon Sep 17 00:00:00 2001 From: artpaul Date: Thu, 15 Dec 2016 22:12:41 +0500 Subject: [PATCH] fix decodeUrl; decode all values; add more tests --- dbms/src/Functions/FunctionsURL.cpp | 47 ++++++++++--------- ...eference => 00398_url_functions.reference} | 3 ++ ..._functions.sql => 00398_url_functions.sql} | 3 ++ 3 files changed, 31 insertions(+), 22 deletions(-) rename dbms/tests/queries/0_stateless/{00395_url_functions.reference => 00398_url_functions.reference} (76%) rename dbms/tests/queries/0_stateless/{00395_url_functions.sql => 00398_url_functions.sql} (83%) diff --git a/dbms/src/Functions/FunctionsURL.cpp b/dbms/src/Functions/FunctionsURL.cpp index 07bb497b03d..dcaa53b45dd 100644 --- a/dbms/src/Functions/FunctionsURL.cpp +++ b/dbms/src/Functions/FunctionsURL.cpp @@ -5,12 +5,13 @@ namespace DB { -template -static void decodeUrl(const StringView & url, T & dest, size_t & offset) +/// We assume that size of the buf isn't less than url.size(). +static size_t decodeUrl(const StringView & url, char* dst) { const char* p = url.data(); const char* st = url.data(); - const char* end = url.data() + url.size(); + const char* const end = url.data() + url.size(); + char* buf = dst; for (; p < end; ++p) { @@ -24,15 +25,11 @@ static void decodeUrl(const StringView & url, T & dest, size_t & offset) { unsigned char digit = (h << 4) + l; - if (digit < 127) { - dest.resize(dest.size() + p - st + 1); - memcpy(&dest[offset], st, p - st); - offset += p - st; - dest[offset] = digit; - offset++; - - st = p + 3; - } + memcpy(buf, st, p - st); + buf += p - st; + *buf = digit; + ++buf; + st = p + 3; } p += 2; @@ -40,17 +37,16 @@ static void decodeUrl(const StringView & url, T & dest, size_t & offset) if (st == url.data()) { - dest.resize(dest.size() + url.size() + 1); - memcpy(&dest[offset], url.data(), url.size()); - offset += url.size() + 1; - dest[offset - 1] = 0; + memcpy(buf, url.data(), url.size()); + return url.size(); } else if (st < p) { - dest.resize(dest.size() + p - st); - memcpy(&dest[offset], st, p - st); - offset += p - st; + memcpy(buf, st, p - st); + buf += p - st; } + + return buf - dst; } @@ -90,8 +86,14 @@ void DecodeURLComponentImpl::vector(const ColumnString::Chars_t & data, const Co { const char * current = reinterpret_cast(&data[prev_offset]); const StringView url(current, offsets[i] - prev_offset - 1); + size_t prev_size = res_data.size(); - decodeUrl(url, res_data, res_offset); + res_data.resize(prev_size + url.size() + 1); + size_t len = decodeUrl(url, reinterpret_cast(res_data.data() + res_offset)); + res_data.resize(prev_size + len); + res_offset += len; + res_data[res_offset] = 0; + res_offset++; res_offsets[i] = res_offset; prev_offset = offsets[i]; @@ -102,8 +104,9 @@ void DecodeURLComponentImpl::vector(const ColumnString::Chars_t & data, const Co void DecodeURLComponentImpl::constant(const std::string & data, std::string & res_data) { - size_t offset = 0; - decodeUrl(data, res_data, offset); + res_data.resize(data.size()); + size_t len = decodeUrl(data, &res_data[0]); + res_data.resize(len); } diff --git a/dbms/tests/queries/0_stateless/00395_url_functions.reference b/dbms/tests/queries/0_stateless/00398_url_functions.reference similarity index 76% rename from dbms/tests/queries/0_stateless/00395_url_functions.reference rename to dbms/tests/queries/0_stateless/00398_url_functions.reference index eee26e093b1..3d2914a5407 100644 --- a/dbms/tests/queries/0_stateless/00395_url_functions.reference +++ b/dbms/tests/queries/0_stateless/00398_url_functions.reference @@ -12,4 +12,7 @@ com ru ru +П +%D%9 +/?query=hello world+foo+bar /?query=hello world+foo+bar diff --git a/dbms/tests/queries/0_stateless/00395_url_functions.sql b/dbms/tests/queries/0_stateless/00398_url_functions.sql similarity index 83% rename from dbms/tests/queries/0_stateless/00395_url_functions.sql rename to dbms/tests/queries/0_stateless/00398_url_functions.sql index ec9b3eeeaad..029465ccffa 100644 --- a/dbms/tests/queries/0_stateless/00395_url_functions.sql +++ b/dbms/tests/queries/0_stateless/00398_url_functions.sql @@ -15,4 +15,7 @@ SELECT topLevelDomain('http://127.0.0.1:443/') AS Domain; SELECT topLevelDomain('svn+ssh://example.ru?q=hello%20world') AS Domain; SELECT topLevelDomain('svn+ssh://example.ru.?q=hello%20world') AS Domain; +SELECT decodeURLComponent('%D0%9F'); +SELECT decodeURLComponent('%D%9'); SELECT decodeURLComponent(pathFull('http://127.0.0.1/?query=hello%20world+foo%2Bbar')) AS Path; +SELECT decodeURLComponent(materialize(pathFull('http://127.0.0.1/?query=hello%20world+foo%2Bbar'))) AS Path;