From 6b78da6f02dd2859f047456a98c6bfd8a31cfc1d Mon Sep 17 00:00:00 2001 From: zzsmdfj Date: Thu, 17 Feb 2022 11:32:47 +0800 Subject: [PATCH] to issue/#31092_add_encodeURLComponent_function --- src/Functions/URL/decodeURLComponent.cpp | 29 ++++++++++++++++-------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/Functions/URL/decodeURLComponent.cpp b/src/Functions/URL/decodeURLComponent.cpp index 5fff9c2920a..64f117708c8 100644 --- a/src/Functions/URL/decodeURLComponent.cpp +++ b/src/Functions/URL/decodeURLComponent.cpp @@ -17,7 +17,7 @@ static size_t encodeURL(const char * src, size_t src_size, char * dst, bool spac for (size_t i = 0; i < src_size - 1; i++) { if ((src[i] >= '0' && src[i] <= '9') || (src[i] >= 'a' && src[i] <= 'z') || (src[i] >= 'A' && src[i] <= 'Z') - || src[i] == '-' || src[i] == '_' || src[i] == '.' || src[i] == '!' || src[i] == '~' || (src[i] >= '\'' && src[i] <= '*')) + || src[i] == '-' || src[i] == '_' || src[i] == '.' || src[i] == '~') { *dst_pos++ = src[i]; } @@ -106,15 +106,24 @@ static size_t decodeURL(const char * src, size_t src_size, char * dst, bool plus return dst_pos - dst; } +enum URLCodeStrategy +{ + encode, + decode +}; /// Percent decode of URL data. -template +template struct CodeURLComponentImpl { static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { - res_data.resize(data.size()); + if (code_strategy == encode) + //the destination(res_data) string is at most three times the length of the source string + res_data.resize(data.size() * 3); + else + res_data.resize(data.size()); size_t size = offsets.size(); res_offsets.resize(size); @@ -126,10 +135,10 @@ struct CodeURLComponentImpl const char * src_data = reinterpret_cast(&data[prev_offset]); size_t src_size = offsets[i] - prev_offset; size_t dst_size; - if (encode) - dst_size = encodeURL(src_data, src_size, reinterpret_cast(res_data.data() + res_offset), plus_as_space); + if (code_strategy == encode) + dst_size = encodeURL(src_data, src_size, reinterpret_cast(res_data.data() + res_offset), plus_space_swap); else - dst_size = decodeURL(src_data, src_size, reinterpret_cast(res_data.data() + res_offset), plus_as_space); + dst_size = decodeURL(src_data, src_size, reinterpret_cast(res_data.data() + res_offset), plus_space_swap); res_offset += dst_size; res_offsets[i] = res_offset; prev_offset = offsets[i]; @@ -149,10 +158,10 @@ struct NameDecodeURLComponent { static constexpr auto name = "decodeURLComponent struct NameEncodeURLComponent { static constexpr auto name = "encodeURLComponent"; }; struct NameDecodeURLFormComponent { static constexpr auto name = "decodeURLFormComponent"; }; struct NameEncodeURLFormComponent { static constexpr auto name = "encodeURLFormComponent"; }; -using FunctionDecodeURLComponent = FunctionStringToString, NameDecodeURLComponent>; -using FunctionEncodeURLComponent = FunctionStringToString, NameEncodeURLComponent>; -using FunctionDecodeURLFormComponent = FunctionStringToString, NameDecodeURLFormComponent>; -using FunctionEncodeURLFormComponent = FunctionStringToString, NameEncodeURLFormComponent>; +using FunctionDecodeURLComponent = FunctionStringToString, NameDecodeURLComponent>; +using FunctionEncodeURLComponent = FunctionStringToString, NameEncodeURLComponent>; +using FunctionDecodeURLFormComponent = FunctionStringToString, NameDecodeURLFormComponent>; +using FunctionEncodeURLFormComponent = FunctionStringToString, NameEncodeURLFormComponent>; void registerFunctionEncodeAndDecodeURLComponent(FunctionFactory & factory) {