mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
add encodeXMLComponent function
This commit is contained in:
parent
8e615a1d8d
commit
ab5f397022
142
src/Functions/encodeXML.cpp
Normal file
142
src/Functions/encodeXML.cpp
Normal file
@ -0,0 +1,142 @@
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include <common/find_symbols.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
struct EncodeXMLName
|
||||
{
|
||||
static constexpr auto name = "encodeXMLComponent";
|
||||
};
|
||||
|
||||
class FunctionEncodeXMLImpl
|
||||
{
|
||||
public:
|
||||
static void vector(
|
||||
const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
res_data.resize(data.size());
|
||||
size_t size = offsets.size();
|
||||
res_offsets.resize(size);
|
||||
|
||||
size_t prev_offset = 0;
|
||||
size_t res_offset = 0;
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
const char * src_data = reinterpret_cast<const char *>(&data[prev_offset]);
|
||||
size_t src_size = offsets[i] - prev_offset;
|
||||
size_t dst_size = execute(src_data, src_size, reinterpret_cast<char *>(res_data.data() + res_offset));
|
||||
|
||||
res_offset += dst_size;
|
||||
res_offsets[i] = res_offset;
|
||||
prev_offset = offsets[i];
|
||||
}
|
||||
|
||||
res_data.resize(res_offset);
|
||||
}
|
||||
|
||||
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
|
||||
{
|
||||
throw Exception("Function encodeXML cannot work with FixedString argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
private:
|
||||
static size_t execute(const char * src, size_t src_size, char * dst)
|
||||
{
|
||||
const char * src_prev_pos = src;
|
||||
const char * src_curr_pos = src;
|
||||
const char * src_end = src + src_size;
|
||||
char * dst_pos = dst;
|
||||
|
||||
while (true)
|
||||
{
|
||||
src_curr_pos = find_first_symbols<'<', '&', '>', '"', '\''>(src_curr_pos, src_end);
|
||||
|
||||
if (src_curr_pos == src_end)
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if (*src_curr_pos == '<')
|
||||
{
|
||||
size_t bytes_to_copy = src_curr_pos - src_prev_pos;
|
||||
memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy);
|
||||
dst_pos += bytes_to_copy;
|
||||
memcpySmallAllowReadWriteOverflow15(dst_pos, "<", 4);
|
||||
dst_pos += 4;
|
||||
src_prev_pos = src_curr_pos + 1;
|
||||
src_curr_pos++;
|
||||
}
|
||||
else if (*src_curr_pos == '&')
|
||||
{
|
||||
size_t bytes_to_copy = src_curr_pos - src_prev_pos;
|
||||
memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy);
|
||||
dst_pos += bytes_to_copy;
|
||||
memcpySmallAllowReadWriteOverflow15(dst_pos, "&", 5);
|
||||
dst_pos += 5;
|
||||
src_prev_pos = src_curr_pos + 1;
|
||||
src_curr_pos++;
|
||||
}
|
||||
else if (*src_curr_pos == '>')
|
||||
{
|
||||
size_t bytes_to_copy = src_curr_pos - src_prev_pos;
|
||||
memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy);
|
||||
dst_pos += bytes_to_copy;
|
||||
memcpySmallAllowReadWriteOverflow15(dst_pos, ">", 4);
|
||||
dst_pos += 4;
|
||||
src_prev_pos = src_curr_pos + 1;
|
||||
src_curr_pos++;
|
||||
}
|
||||
else if (*src_curr_pos == '"')
|
||||
{
|
||||
size_t bytes_to_copy = src_curr_pos - src_prev_pos;
|
||||
memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy);
|
||||
dst_pos += bytes_to_copy;
|
||||
memcpySmallAllowReadWriteOverflow15(dst_pos, """, 6);
|
||||
dst_pos += 6;
|
||||
src_prev_pos = src_curr_pos + 1;
|
||||
src_curr_pos++;
|
||||
}
|
||||
else if (*src_curr_pos == '\'')
|
||||
{
|
||||
size_t bytes_to_copy = src_curr_pos - src_prev_pos;
|
||||
memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy);
|
||||
dst_pos += bytes_to_copy;
|
||||
memcpySmallAllowReadWriteOverflow15(dst_pos, "'", 6);
|
||||
dst_pos += 6;
|
||||
src_prev_pos = src_curr_pos + 1;
|
||||
src_curr_pos++;
|
||||
}
|
||||
}
|
||||
|
||||
if (src_prev_pos < src_curr_pos)
|
||||
{
|
||||
size_t bytes_to_copy = src_curr_pos - src_prev_pos;
|
||||
memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy);
|
||||
dst_pos += bytes_to_copy;
|
||||
}
|
||||
|
||||
return dst_pos - dst;
|
||||
}
|
||||
};
|
||||
|
||||
using FunctionEncodeXML = FunctionStringToString<FunctionEncodeXMLImpl, EncodeXMLName>;
|
||||
|
||||
}
|
||||
|
||||
void registerFunctionEncodeXML(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionEncodeXML>();
|
||||
}
|
||||
}
|
@ -33,6 +33,7 @@ void registerFunctionRegexpQuoteMeta(FunctionFactory &);
|
||||
void registerFunctionNormalizeQuery(FunctionFactory &);
|
||||
void registerFunctionNormalizedQueryHash(FunctionFactory &);
|
||||
void registerFunctionCountMatches(FunctionFactory &);
|
||||
void registerFunctionEncodeXML(FunctionFactory & factory);
|
||||
|
||||
#if USE_BASE64
|
||||
void registerFunctionBase64Encode(FunctionFactory &);
|
||||
@ -68,6 +69,7 @@ void registerFunctionsString(FunctionFactory & factory)
|
||||
registerFunctionNormalizeQuery(factory);
|
||||
registerFunctionNormalizedQueryHash(factory);
|
||||
registerFunctionCountMatches(factory);
|
||||
registerFunctionEncodeXML(factory);
|
||||
#if USE_BASE64
|
||||
registerFunctionBase64Encode(factory);
|
||||
registerFunctionBase64Decode(factory);
|
||||
|
@ -224,6 +224,7 @@ SRCS(
|
||||
dumpColumnStructure.cpp
|
||||
e.cpp
|
||||
empty.cpp
|
||||
encodeXML.cpp
|
||||
encrypt.cpp
|
||||
endsWith.cpp
|
||||
equals.cpp
|
||||
|
4
tests/queries/0_stateless/01600_encode_XML.reference
Normal file
4
tests/queries/0_stateless/01600_encode_XML.reference
Normal file
@ -0,0 +1,4 @@
|
||||
Hello, "world"!
|
||||
<123>
|
||||
&clickhouse
|
||||
'foo'
|
4
tests/queries/0_stateless/01600_encode_XML.sql
Normal file
4
tests/queries/0_stateless/01600_encode_XML.sql
Normal file
@ -0,0 +1,4 @@
|
||||
SELECT encodeXMLComponent('Hello, "world"!');
|
||||
SELECT encodeXMLComponent('<123>');
|
||||
SELECT encodeXMLComponent('&clickhouse');
|
||||
SELECT encodeXMLComponent('\'foo\'');
|
Loading…
Reference in New Issue
Block a user