mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-30 11:32:03 +00:00
Fancy quotes
This commit is contained in:
parent
7b1c39acbc
commit
a501887f15
@ -11,8 +11,9 @@ namespace
|
||||
{
|
||||
|
||||
/// This must be consistent with functions in ReadHelpers.h
|
||||
template <char quote, TokenType success_token, TokenType error_token>
|
||||
Token quotedString(const char *& pos, const char * const token_begin, const char * const end)
|
||||
template <char quote>
|
||||
Token quotedString(const char *& pos, const char * const token_begin, const char * const end,
|
||||
TokenType success_token, TokenType error_token)
|
||||
{
|
||||
++pos;
|
||||
while (true)
|
||||
@ -45,6 +46,37 @@ Token quotedString(const char *& pos, const char * const token_begin, const char
|
||||
}
|
||||
}
|
||||
|
||||
Token quotedStringWithUnicodeQuotes(const char *& pos, const char * const token_begin, const char * const end,
|
||||
char expected_end_byte, TokenType success_token, TokenType error_token)
|
||||
{
|
||||
/// ‘: e2 80 98
|
||||
/// ’: e2 80 99
|
||||
/// “: e2 80 9c
|
||||
/// ”: e2 80 9d
|
||||
|
||||
while (true)
|
||||
{
|
||||
pos = find_first_symbols<'\xE2', '\\'>(pos, end);
|
||||
if (pos + 2 >= end)
|
||||
return Token(error_token, token_begin, end);
|
||||
|
||||
if (pos[0] == '\xE2' && pos[1] == '\x80' && pos[2] == expected_end_byte)
|
||||
{
|
||||
pos += 3;
|
||||
return Token(success_token, token_begin, pos);
|
||||
}
|
||||
|
||||
if (*pos == '\\')
|
||||
{
|
||||
++pos;
|
||||
if (pos >= end)
|
||||
return Token(error_token, token_begin, end);
|
||||
++pos;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Token quotedHexOrBinString(const char *& pos, const char * const token_begin, const char * const end)
|
||||
{
|
||||
constexpr char quote = '\'';
|
||||
@ -224,11 +256,11 @@ Token Lexer::nextTokenImpl()
|
||||
}
|
||||
|
||||
case '\'':
|
||||
return quotedString<'\'', TokenType::StringLiteral, TokenType::ErrorSingleQuoteIsNotClosed>(pos, token_begin, end);
|
||||
return quotedString<'\''>(pos, token_begin, end, TokenType::StringLiteral, TokenType::ErrorSingleQuoteIsNotClosed);
|
||||
case '"':
|
||||
return quotedString<'"', TokenType::QuotedIdentifier, TokenType::ErrorDoubleQuoteIsNotClosed>(pos, token_begin, end);
|
||||
return quotedString<'"'>(pos, token_begin, end, TokenType::QuotedIdentifier, TokenType::ErrorDoubleQuoteIsNotClosed);
|
||||
case '`':
|
||||
return quotedString<'`', TokenType::QuotedIdentifier, TokenType::ErrorBackQuoteIsNotClosed>(pos, token_begin, end);
|
||||
return quotedString<'`'>(pos, token_begin, end, TokenType::QuotedIdentifier, TokenType::ErrorBackQuoteIsNotClosed);
|
||||
|
||||
case '(':
|
||||
return Token(TokenType::OpeningRoundBracket, token_begin, ++pos);
|
||||
@ -434,6 +466,16 @@ Token Lexer::nextTokenImpl()
|
||||
pos += 3;
|
||||
return Token(TokenType::Minus, token_begin, pos);
|
||||
}
|
||||
/// Unicode quoted string, ‘Hello’ or “World”.
|
||||
if (pos + 5 < end && pos[0] == '\xE2' && pos[1] == '\x80' && (pos[2] == '\x98' || pos[2] == '\x9C'))
|
||||
{
|
||||
const char expected_end_byte = pos[2] + 1;
|
||||
pos += 3;
|
||||
|
||||
TokenType success_token = pos[2] == '\x98' ? TokenType::StringLiteral : TokenType::QuotedIdentifier;
|
||||
TokenType error_token = pos[2] == '\x98' ? TokenType::ErrorSingleQuoteIsNotClosed : TokenType::ErrorDoubleQuoteIsNotClosed;
|
||||
return quotedStringWithUnicodeQuotes(pos, token_begin, end, expected_end_byte, success_token, error_token);
|
||||
}
|
||||
/// Other characters starting at E2 can be parsed, see skipWhitespacesUTF8
|
||||
[[fallthrough]];
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user