Refactor writeAnyMarkdownEscapedString and Add more info into doc.

This commit is contained in:
irenjj 2023-08-29 23:54:40 +08:00
parent 13322f1a90
commit 473ba91c21
7 changed files with 142 additions and 193 deletions

View File

@ -205,16 +205,6 @@ inline const char * find_first_symbols_sse2(const char * const begin, const char
return return_mode == ReturnMode::End ? end : nullptr;
}
template <bool positive, ReturnMode return_mode, char... symbols>
inline const char * find_first_symbols_sse2_markdown(const char * const begin, const char * const end)
{
const char * pos = begin;
for (; pos < end; ++pos)
if (maybe_negate<positive>(is_in<symbols...>(*pos)))
return pos;
return return_mode == ReturnMode::End ? end : nullptr;
}
template <bool positive, ReturnMode return_mode, char... symbols>
inline const char * find_last_symbols_sse2(const char * const begin, const char * const end)
@ -356,12 +346,6 @@ inline const char * find_first_symbols_dispatch(const std::string_view haystack,
return find_first_symbols_sse2<positive, return_mode>(haystack.begin(), haystack.end(), symbols.str.data(), symbols.str.size());
}
template <bool positive, ReturnMode return_mode, char... symbols>
inline const char * find_first_symbols_dispatch_markdown(const char * begin, const char * end)
{
return find_first_symbols_sse2_markdown<positive, return_mode, symbols...>(begin, end);
}
}
@ -384,12 +368,6 @@ inline const char * find_first_symbols(std::string_view haystack, const SearchSy
return detail::find_first_symbols_dispatch<true, detail::ReturnMode::End>(haystack, symbols);
}
template <char... symbols>
inline const char * find_first_symbols_markdown(const char * begin, const char * end)
{
return detail::find_first_symbols_dispatch_markdown<true, detail::ReturnMode::End, symbols...>(begin, end);
}
template <char... symbols>
inline const char * find_first_not_symbols(const char * begin, const char * end)
{

View File

@ -638,6 +638,12 @@ Disabled by default.
When enabled, escape special characters in Markdown.
[CommonMark](https://spec.commonmark.org/0.30/#example-12) defines the following special characters that can be escaped:
```
\! \" \# \$ \% \& \' \( \) \* \+ \, \- \. \/ \: \; \< \= \> \? \@ \[ \\ \] \^ \_ \` \{ \| \} \~
```
Possible values:
+ 0 — Disable.

View File

@ -215,17 +215,4 @@ void SerializationAggregateFunction::deserializeTextCSV(IColumn & column, ReadBu
deserializeFromString(function, column, s, version);
}
void SerializationAggregateFunction::serializeTextMarkdown(
const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const
{
if (settings.output_format_markdown_escape_special_characters)
{
writeMarkdownEscapedString(serializeToString(function, column, row_num, version), ostr);
}
else
{
serializeTextEscaped(column, row_num, ostr, settings);
}
}
}

View File

@ -41,8 +41,6 @@ public:
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
};
}

View File

@ -216,7 +216,7 @@ void SerializationFixedString::serializeTextMarkdown(
if (settings.output_format_markdown_escape_special_characters)
{
const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
writeAnyMarkdownEscapedString<'\''>(pos, pos + n, ostr);
writeAnyMarkdownEscapedString(pos, pos + n, ostr);
}
else
{

View File

@ -365,13 +365,9 @@ void SerializationString::serializeTextMarkdown(
const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
if (settings.output_format_markdown_escape_special_characters)
{
writeMarkdownEscapedString(assert_cast<const ColumnString &>(column).getDataAt(row_num).toView(), ostr);
}
else
{
serializeTextEscaped(column, row_num, ostr, settings);
}
}
}

View File

@ -380,159 +380,143 @@ void writeAnyEscapedString(const char * begin, const char * end, WriteBuffer & b
}
}
template <char quote_character, bool escape_quote_with_quote = false, bool escape_backslash_with_backslash = true>
void writeAnyMarkdownEscapedString(const char * begin, const char * end, WriteBuffer & buf)
/// Define special characters in Markdown according to the standards specified by CommonMark.
inline void writeAnyMarkdownEscapedString(const char * begin, const char * end, WriteBuffer & buf)
{
const char * pos = begin;
while (true)
for (const char * it = begin; it != end; ++it)
{
const char * next_pos = find_first_symbols_markdown<
'\b',
'\f',
'\n',
'\r',
'\t',
'\0',
'\\',
quote_character,
'`',
'*',
'_',
'{',
'}',
'[',
']',
'<',
'>',
'(',
')',
'#',
'+',
'-',
'.',
'!',
'|'>(pos, end);
if (next_pos == end)
switch (*it)
{
buf.write(pos, next_pos - pos);
break;
}
else
{
buf.write(pos, next_pos - pos);
pos = next_pos;
switch (*pos)
{
case '\b':
writeChar('\\', buf);
writeChar('b', buf);
break;
case '\f':
writeChar('\\', buf);
writeChar('f', buf);
break;
case '\n':
writeChar('\\', buf);
writeChar('n', buf);
break;
case '\r':
writeChar('\\', buf);
writeChar('r', buf);
break;
case '\t':
writeChar('\\', buf);
writeChar('t', buf);
break;
case '\0':
writeChar('\\', buf);
writeChar('0', buf);
break;
case '\\':
if constexpr (escape_backslash_with_backslash)
writeChar('\\', buf);
writeChar('\\', buf);
break;
case quote_character: {
if constexpr (escape_quote_with_quote)
writeChar(quote_character, buf);
else
writeChar('\\', buf);
writeChar(quote_character, buf);
break;
}
case '`':
writeChar('`', buf);
writeChar('`', buf);
break;
case '*':
writeChar('\\', buf);
writeChar('*', buf);
break;
case '_':
writeChar('\\', buf);
writeChar('_', buf);
break;
case '{':
writeChar('\\', buf);
writeChar('{', buf);
break;
case '}':
writeChar('\\', buf);
writeChar('}', buf);
break;
case '[':
writeChar('\\', buf);
writeChar('[', buf);
break;
case ']':
writeChar('\\', buf);
writeChar(']', buf);
break;
case '<':
writeChar('\\', buf);
writeChar('<', buf);
break;
case '>':
writeChar('\\', buf);
writeChar('>', buf);
break;
case '(':
writeChar('\\', buf);
writeChar('(', buf);
break;
case ')':
writeChar('\\', buf);
writeChar(')', buf);
break;
case '#':
writeChar('\\', buf);
writeChar('#', buf);
break;
case '+':
writeChar('\\', buf);
writeChar('+', buf);
break;
case '-':
writeChar('\\', buf);
writeChar('-', buf);
break;
case '.':
writeChar('\\', buf);
writeChar('.', buf);
break;
case '!':
writeChar('\\', buf);
writeChar('!', buf);
break;
case '|':
writeChar('\\', buf);
writeChar('|', buf);
break;
default:
writeChar(*pos, buf);
}
++pos;
case '!':
writeChar('\\', buf);
writeChar('!', buf);
break;
case '"':
writeChar('\\', buf);
writeChar('"', buf);
break;
case '#':
writeChar('\\', buf);
writeChar('#', buf);
break;
case '$':
writeChar('\\', buf);
writeChar('$', buf);
break;
case '%':
writeChar('\\', buf);
writeChar('%', buf);
break;
case '&':
writeChar('\\', buf);
writeChar('&', buf);
break;
case '\'':
writeChar('\\', buf);
writeChar('\'', buf);
break;
case '(':
writeChar('\\', buf);
writeChar('(', buf);
break;
case ')':
writeChar('\\', buf);
writeChar(')', buf);
break;
case '*':
writeChar('\\', buf);
writeChar('*', buf);
break;
case '+':
writeChar('\\', buf);
writeChar('+', buf);
break;
case ',':
writeChar('\\', buf);
writeChar(',', buf);
break;
case '-':
writeChar('\\', buf);
writeChar('-', buf);
break;
case '.':
writeChar('\\', buf);
writeChar('.', buf);
break;
case '/':
writeChar('\\', buf);
writeChar('/', buf);
break;
case ':':
writeChar('\\', buf);
writeChar(':', buf);
break;
case ';':
writeChar('\\', buf);
writeChar(';', buf);
break;
case '<':
writeChar('\\', buf);
writeChar('<', buf);
break;
case '=':
writeChar('\\', buf);
writeChar('=', buf);
break;
case '>':
writeChar('\\', buf);
writeChar('>', buf);
break;
case '?':
writeChar('\\', buf);
writeChar('?', buf);
break;
case '@':
writeChar('\\', buf);
writeChar('@', buf);
break;
case '[':
writeChar('\\', buf);
writeChar('[', buf);
break;
case '\\':
writeChar('\\', buf);
writeChar('\\', buf);
break;
case ']':
writeChar('\\', buf);
writeChar(']', buf);
break;
case '^':
writeChar('\\', buf);
writeChar('^', buf);
break;
case '_':
writeChar('\\', buf);
writeChar('_', buf);
break;
case '`':
writeChar('\\', buf);
writeChar('`', buf);
break;
case '{':
writeChar('\\', buf);
writeChar('{', buf);
break;
case '|':
writeChar('\\', buf);
writeChar('|', buf);
break;
case '}':
writeChar('\\', buf);
writeChar('}', buf);
break;
case '~':
writeChar('\\', buf);
writeChar('~', buf);
break;
default:
writeChar(*it, buf);
}
}
}
@ -602,7 +586,7 @@ inline void writeEscapedString(std::string_view ref, WriteBuffer & buf)
inline void writeMarkdownEscapedString(const char * str, size_t size, WriteBuffer & buf)
{
writeAnyMarkdownEscapedString<'\''>(str, str + size, buf);
writeAnyMarkdownEscapedString(str, str + size, buf);
}
inline void writeMarkdownEscapedString(std::string_view ref, WriteBuffer & buf)