mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-02 12:32:04 +00:00
Fancy quotes
This commit is contained in:
parent
7b1c39acbc
commit
a501887f15
@ -11,8 +11,9 @@ namespace
|
|||||||
{
|
{
|
||||||
|
|
||||||
/// This must be consistent with functions in ReadHelpers.h
|
/// This must be consistent with functions in ReadHelpers.h
|
||||||
template <char quote, TokenType success_token, TokenType error_token>
|
template <char quote>
|
||||||
Token quotedString(const char *& pos, const char * const token_begin, const char * const end)
|
Token quotedString(const char *& pos, const char * const token_begin, const char * const end,
|
||||||
|
TokenType success_token, TokenType error_token)
|
||||||
{
|
{
|
||||||
++pos;
|
++pos;
|
||||||
while (true)
|
while (true)
|
||||||
@ -45,6 +46,37 @@ Token quotedString(const char *& pos, const char * const token_begin, const char
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Token quotedStringWithUnicodeQuotes(const char *& pos, const char * const token_begin, const char * const end,
|
||||||
|
char expected_end_byte, TokenType success_token, TokenType error_token)
|
||||||
|
{
|
||||||
|
/// ‘: e2 80 98
|
||||||
|
/// ’: e2 80 99
|
||||||
|
/// “: e2 80 9c
|
||||||
|
/// ”: e2 80 9d
|
||||||
|
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
pos = find_first_symbols<'\xE2', '\\'>(pos, end);
|
||||||
|
if (pos + 2 >= end)
|
||||||
|
return Token(error_token, token_begin, end);
|
||||||
|
|
||||||
|
if (pos[0] == '\xE2' && pos[1] == '\x80' && pos[2] == expected_end_byte)
|
||||||
|
{
|
||||||
|
pos += 3;
|
||||||
|
return Token(success_token, token_begin, pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*pos == '\\')
|
||||||
|
{
|
||||||
|
++pos;
|
||||||
|
if (pos >= end)
|
||||||
|
return Token(error_token, token_begin, end);
|
||||||
|
++pos;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Token quotedHexOrBinString(const char *& pos, const char * const token_begin, const char * const end)
|
Token quotedHexOrBinString(const char *& pos, const char * const token_begin, const char * const end)
|
||||||
{
|
{
|
||||||
constexpr char quote = '\'';
|
constexpr char quote = '\'';
|
||||||
@ -224,11 +256,11 @@ Token Lexer::nextTokenImpl()
|
|||||||
}
|
}
|
||||||
|
|
||||||
case '\'':
|
case '\'':
|
||||||
return quotedString<'\'', TokenType::StringLiteral, TokenType::ErrorSingleQuoteIsNotClosed>(pos, token_begin, end);
|
return quotedString<'\''>(pos, token_begin, end, TokenType::StringLiteral, TokenType::ErrorSingleQuoteIsNotClosed);
|
||||||
case '"':
|
case '"':
|
||||||
return quotedString<'"', TokenType::QuotedIdentifier, TokenType::ErrorDoubleQuoteIsNotClosed>(pos, token_begin, end);
|
return quotedString<'"'>(pos, token_begin, end, TokenType::QuotedIdentifier, TokenType::ErrorDoubleQuoteIsNotClosed);
|
||||||
case '`':
|
case '`':
|
||||||
return quotedString<'`', TokenType::QuotedIdentifier, TokenType::ErrorBackQuoteIsNotClosed>(pos, token_begin, end);
|
return quotedString<'`'>(pos, token_begin, end, TokenType::QuotedIdentifier, TokenType::ErrorBackQuoteIsNotClosed);
|
||||||
|
|
||||||
case '(':
|
case '(':
|
||||||
return Token(TokenType::OpeningRoundBracket, token_begin, ++pos);
|
return Token(TokenType::OpeningRoundBracket, token_begin, ++pos);
|
||||||
@ -434,6 +466,16 @@ Token Lexer::nextTokenImpl()
|
|||||||
pos += 3;
|
pos += 3;
|
||||||
return Token(TokenType::Minus, token_begin, pos);
|
return Token(TokenType::Minus, token_begin, pos);
|
||||||
}
|
}
|
||||||
|
/// Unicode quoted string, ‘Hello’ or “World”.
|
||||||
|
if (pos + 5 < end && pos[0] == '\xE2' && pos[1] == '\x80' && (pos[2] == '\x98' || pos[2] == '\x9C'))
|
||||||
|
{
|
||||||
|
const char expected_end_byte = pos[2] + 1;
|
||||||
|
pos += 3;
|
||||||
|
|
||||||
|
TokenType success_token = pos[2] == '\x98' ? TokenType::StringLiteral : TokenType::QuotedIdentifier;
|
||||||
|
TokenType error_token = pos[2] == '\x98' ? TokenType::ErrorSingleQuoteIsNotClosed : TokenType::ErrorDoubleQuoteIsNotClosed;
|
||||||
|
return quotedStringWithUnicodeQuotes(pos, token_begin, end, expected_end_byte, success_token, error_token);
|
||||||
|
}
|
||||||
/// Other characters starting at E2 can be parsed, see skipWhitespacesUTF8
|
/// Other characters starting at E2 can be parsed, see skipWhitespacesUTF8
|
||||||
[[fallthrough]];
|
[[fallthrough]];
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user