diff --git a/base/base/find_symbols.h b/base/base/find_symbols.h index ad8fbd126d3..fda94edaa88 100644 --- a/base/base/find_symbols.h +++ b/base/base/find_symbols.h @@ -205,16 +205,6 @@ inline const char * find_first_symbols_sse2(const char * const begin, const char return return_mode == ReturnMode::End ? end : nullptr; } -template -inline const char * find_first_symbols_sse2_markdown(const char * const begin, const char * const end) -{ - const char * pos = begin; - for (; pos < end; ++pos) - if (maybe_negate(is_in(*pos))) - return pos; - - return return_mode == ReturnMode::End ? end : nullptr; -} template inline const char * find_last_symbols_sse2(const char * const begin, const char * const end) @@ -356,12 +346,6 @@ inline const char * find_first_symbols_dispatch(const std::string_view haystack, return find_first_symbols_sse2(haystack.begin(), haystack.end(), symbols.str.data(), symbols.str.size()); } -template -inline const char * find_first_symbols_dispatch_markdown(const char * begin, const char * end) -{ - return find_first_symbols_sse2_markdown(begin, end); -} - } @@ -384,12 +368,6 @@ inline const char * find_first_symbols(std::string_view haystack, const SearchSy return detail::find_first_symbols_dispatch(haystack, symbols); } -template -inline const char * find_first_symbols_markdown(const char * begin, const char * end) -{ - return detail::find_first_symbols_dispatch_markdown(begin, end); -} - template inline const char * find_first_not_symbols(const char * begin, const char * end) { diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index a54994aa363..9ac91d15583 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -638,6 +638,12 @@ Disabled by default. When enabled, escape special characters in Markdown. +[CommonMark](https://spec.commonmark.org/0.30/#example-12) defines the following special characters that can be escaped: + +``` +\! \" \# \$ \% \& \' \( \) \* \+ \, \- \. \/ \: \; \< \= \> \? \@ \[ \\ \] \^ \_ \` \{ \| \} \~ +``` + Possible values: + 0 — Disable. diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp index bbc586480e4..c482c9623e9 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp @@ -215,17 +215,4 @@ void SerializationAggregateFunction::deserializeTextCSV(IColumn & column, ReadBu deserializeFromString(function, column, s, version); } -void SerializationAggregateFunction::serializeTextMarkdown( - const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const -{ - if (settings.output_format_markdown_escape_special_characters) - { - writeMarkdownEscapedString(serializeToString(function, column, row_num, version), ostr); - } - else - { - serializeTextEscaped(column, row_num, ostr, settings); - } -} - } diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.h b/src/DataTypes/Serializations/SerializationAggregateFunction.h index 6344054111a..4212298bbc1 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.h +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.h @@ -41,8 +41,6 @@ public: void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; - - void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; }; } diff --git a/src/DataTypes/Serializations/SerializationFixedString.cpp b/src/DataTypes/Serializations/SerializationFixedString.cpp index f2de1c530e3..0939c0bf05b 100644 --- a/src/DataTypes/Serializations/SerializationFixedString.cpp +++ b/src/DataTypes/Serializations/SerializationFixedString.cpp @@ -216,7 +216,7 @@ void SerializationFixedString::serializeTextMarkdown( if (settings.output_format_markdown_escape_special_characters) { const char * pos = reinterpret_cast(&assert_cast(column).getChars()[n * row_num]); - writeAnyMarkdownEscapedString<'\''>(pos, pos + n, ostr); + writeAnyMarkdownEscapedString(pos, pos + n, ostr); } else { diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp index 0e4c0407fe2..28ebdde3258 100644 --- a/src/DataTypes/Serializations/SerializationString.cpp +++ b/src/DataTypes/Serializations/SerializationString.cpp @@ -365,13 +365,9 @@ void SerializationString::serializeTextMarkdown( const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { if (settings.output_format_markdown_escape_special_characters) - { writeMarkdownEscapedString(assert_cast(column).getDataAt(row_num).toView(), ostr); - } else - { serializeTextEscaped(column, row_num, ostr, settings); - } } } diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index eaeaa806b68..02a24aeb01f 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -380,159 +380,143 @@ void writeAnyEscapedString(const char * begin, const char * end, WriteBuffer & b } } -template -void writeAnyMarkdownEscapedString(const char * begin, const char * end, WriteBuffer & buf) +/// Define special characters in Markdown according to the standards specified by CommonMark. +inline void writeAnyMarkdownEscapedString(const char * begin, const char * end, WriteBuffer & buf) { - const char * pos = begin; - while (true) + for (const char * it = begin; it != end; ++it) { - const char * next_pos = find_first_symbols_markdown< - '\b', - '\f', - '\n', - '\r', - '\t', - '\0', - '\\', - quote_character, - '`', - '*', - '_', - '{', - '}', - '[', - ']', - '<', - '>', - '(', - ')', - '#', - '+', - '-', - '.', - '!', - '|'>(pos, end); - - if (next_pos == end) + switch (*it) { - buf.write(pos, next_pos - pos); - break; - } - else - { - buf.write(pos, next_pos - pos); - pos = next_pos; - switch (*pos) - { - case '\b': - writeChar('\\', buf); - writeChar('b', buf); - break; - case '\f': - writeChar('\\', buf); - writeChar('f', buf); - break; - case '\n': - writeChar('\\', buf); - writeChar('n', buf); - break; - case '\r': - writeChar('\\', buf); - writeChar('r', buf); - break; - case '\t': - writeChar('\\', buf); - writeChar('t', buf); - break; - case '\0': - writeChar('\\', buf); - writeChar('0', buf); - break; - case '\\': - if constexpr (escape_backslash_with_backslash) - writeChar('\\', buf); - writeChar('\\', buf); - break; - case quote_character: { - if constexpr (escape_quote_with_quote) - writeChar(quote_character, buf); - else - writeChar('\\', buf); - writeChar(quote_character, buf); - break; - } - case '`': - writeChar('`', buf); - writeChar('`', buf); - break; - case '*': - writeChar('\\', buf); - writeChar('*', buf); - break; - case '_': - writeChar('\\', buf); - writeChar('_', buf); - break; - case '{': - writeChar('\\', buf); - writeChar('{', buf); - break; - case '}': - writeChar('\\', buf); - writeChar('}', buf); - break; - case '[': - writeChar('\\', buf); - writeChar('[', buf); - break; - case ']': - writeChar('\\', buf); - writeChar(']', buf); - break; - case '<': - writeChar('\\', buf); - writeChar('<', buf); - break; - case '>': - writeChar('\\', buf); - writeChar('>', buf); - break; - case '(': - writeChar('\\', buf); - writeChar('(', buf); - break; - case ')': - writeChar('\\', buf); - writeChar(')', buf); - break; - case '#': - writeChar('\\', buf); - writeChar('#', buf); - break; - case '+': - writeChar('\\', buf); - writeChar('+', buf); - break; - case '-': - writeChar('\\', buf); - writeChar('-', buf); - break; - case '.': - writeChar('\\', buf); - writeChar('.', buf); - break; - case '!': - writeChar('\\', buf); - writeChar('!', buf); - break; - case '|': - writeChar('\\', buf); - writeChar('|', buf); - break; - default: - writeChar(*pos, buf); - } - ++pos; + case '!': + writeChar('\\', buf); + writeChar('!', buf); + break; + case '"': + writeChar('\\', buf); + writeChar('"', buf); + break; + case '#': + writeChar('\\', buf); + writeChar('#', buf); + break; + case '$': + writeChar('\\', buf); + writeChar('$', buf); + break; + case '%': + writeChar('\\', buf); + writeChar('%', buf); + break; + case '&': + writeChar('\\', buf); + writeChar('&', buf); + break; + case '\'': + writeChar('\\', buf); + writeChar('\'', buf); + break; + case '(': + writeChar('\\', buf); + writeChar('(', buf); + break; + case ')': + writeChar('\\', buf); + writeChar(')', buf); + break; + case '*': + writeChar('\\', buf); + writeChar('*', buf); + break; + case '+': + writeChar('\\', buf); + writeChar('+', buf); + break; + case ',': + writeChar('\\', buf); + writeChar(',', buf); + break; + case '-': + writeChar('\\', buf); + writeChar('-', buf); + break; + case '.': + writeChar('\\', buf); + writeChar('.', buf); + break; + case '/': + writeChar('\\', buf); + writeChar('/', buf); + break; + case ':': + writeChar('\\', buf); + writeChar(':', buf); + break; + case ';': + writeChar('\\', buf); + writeChar(';', buf); + break; + case '<': + writeChar('\\', buf); + writeChar('<', buf); + break; + case '=': + writeChar('\\', buf); + writeChar('=', buf); + break; + case '>': + writeChar('\\', buf); + writeChar('>', buf); + break; + case '?': + writeChar('\\', buf); + writeChar('?', buf); + break; + case '@': + writeChar('\\', buf); + writeChar('@', buf); + break; + case '[': + writeChar('\\', buf); + writeChar('[', buf); + break; + case '\\': + writeChar('\\', buf); + writeChar('\\', buf); + break; + case ']': + writeChar('\\', buf); + writeChar(']', buf); + break; + case '^': + writeChar('\\', buf); + writeChar('^', buf); + break; + case '_': + writeChar('\\', buf); + writeChar('_', buf); + break; + case '`': + writeChar('\\', buf); + writeChar('`', buf); + break; + case '{': + writeChar('\\', buf); + writeChar('{', buf); + break; + case '|': + writeChar('\\', buf); + writeChar('|', buf); + break; + case '}': + writeChar('\\', buf); + writeChar('}', buf); + break; + case '~': + writeChar('\\', buf); + writeChar('~', buf); + break; + default: + writeChar(*it, buf); } } } @@ -602,7 +586,7 @@ inline void writeEscapedString(std::string_view ref, WriteBuffer & buf) inline void writeMarkdownEscapedString(const char * str, size_t size, WriteBuffer & buf) { - writeAnyMarkdownEscapedString<'\''>(str, str + size, buf); + writeAnyMarkdownEscapedString(str, str + size, buf); } inline void writeMarkdownEscapedString(std::string_view ref, WriteBuffer & buf)