2016-01-13 21:05:11 +00:00
|
|
|
|
#if defined(__x86_64__)
|
|
|
|
|
#include <emmintrin.h>
|
|
|
|
|
#endif
|
2013-01-05 10:07:01 +00:00
|
|
|
|
|
2012-05-08 05:42:05 +00:00
|
|
|
|
#include <sstream>
|
|
|
|
|
|
2012-09-24 05:40:45 +00:00
|
|
|
|
#include <mysqlxx/Manip.h>
|
|
|
|
|
|
|
|
|
|
#include <DB/Core/Defines.h>
|
2016-02-16 16:39:39 +00:00
|
|
|
|
#include <DB/Common/PODArray.h>
|
2016-08-16 21:23:53 +00:00
|
|
|
|
#include <DB/Common/StringUtils.h>
|
2010-06-04 18:25:25 +00:00
|
|
|
|
#include <DB/IO/ReadHelpers.h>
|
2016-02-16 16:42:00 +00:00
|
|
|
|
#include <common/find_first_symbols.h>
|
2012-05-08 05:42:05 +00:00
|
|
|
|
|
2010-06-04 18:25:25 +00:00
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
2016-01-11 21:46:36 +00:00
|
|
|
|
namespace ErrorCodes
|
|
|
|
|
{
|
|
|
|
|
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
|
|
|
|
|
extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
|
|
|
|
|
extern const int CANNOT_PARSE_QUOTED_STRING;
|
2016-09-20 19:11:25 +00:00
|
|
|
|
extern const int INCORRECT_DATA;
|
2016-01-11 21:46:36 +00:00
|
|
|
|
}
|
|
|
|
|
|
2012-09-24 05:40:45 +00:00
|
|
|
|
|
|
|
|
|
static void __attribute__((__noinline__)) throwAtAssertionFailed(const char * s, ReadBuffer & buf)
|
|
|
|
|
{
|
|
|
|
|
std::stringstream message;
|
|
|
|
|
message << "Cannot parse input: expected " << mysqlxx::escape << s;
|
|
|
|
|
|
|
|
|
|
if (buf.eof())
|
|
|
|
|
message << " at end of stream.";
|
|
|
|
|
else
|
|
|
|
|
message << " before: " << mysqlxx::escape << String(buf.position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf.buffer().end() - buf.position()));
|
|
|
|
|
|
|
|
|
|
throw Exception(message.str(), ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-10-05 14:20:56 +00:00
|
|
|
|
bool checkString(const char * s, ReadBuffer & buf)
|
2010-06-04 18:25:25 +00:00
|
|
|
|
{
|
|
|
|
|
for (; *s; ++s)
|
|
|
|
|
{
|
|
|
|
|
if (buf.eof() || *buf.position() != *s)
|
2015-10-05 14:20:56 +00:00
|
|
|
|
return false;
|
2010-06-04 18:25:25 +00:00
|
|
|
|
++buf.position();
|
|
|
|
|
}
|
2015-10-05 14:20:56 +00:00
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-08-16 21:23:53 +00:00
|
|
|
|
|
|
|
|
|
static bool checkStringCaseInsensitive(const char * s, ReadBuffer & buf)
|
|
|
|
|
{
|
|
|
|
|
for (; *s; ++s)
|
|
|
|
|
{
|
|
|
|
|
if (buf.eof())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
char c = *buf.position();
|
|
|
|
|
if (!(*s == c || (isAlphaASCII(*s) && alternateCaseIfAlphaASCII(*s) == c)))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
++buf.position();
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-10-05 14:20:56 +00:00
|
|
|
|
void assertString(const char * s, ReadBuffer & buf)
|
|
|
|
|
{
|
|
|
|
|
if (!checkString(s, buf))
|
|
|
|
|
throwAtAssertionFailed(s, buf);
|
2010-06-04 18:25:25 +00:00
|
|
|
|
}
|
|
|
|
|
|
2015-06-03 15:32:06 +00:00
|
|
|
|
void assertChar(char symbol, ReadBuffer & buf)
|
|
|
|
|
{
|
|
|
|
|
if (buf.eof() || *buf.position() != symbol)
|
|
|
|
|
{
|
|
|
|
|
char err[2] = {symbol, '\0'};
|
|
|
|
|
throwAtAssertionFailed(err, buf);
|
|
|
|
|
}
|
|
|
|
|
++buf.position();
|
|
|
|
|
}
|
|
|
|
|
|
2014-03-27 11:29:40 +00:00
|
|
|
|
void assertEOF(ReadBuffer & buf)
|
|
|
|
|
{
|
|
|
|
|
if (!buf.eof())
|
|
|
|
|
throwAtAssertionFailed("eof", buf);
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
|
|
|
|
|
template <typename T>
|
2016-02-19 16:59:31 +00:00
|
|
|
|
static void appendToStringOrVector(T & s, const char * begin, const char * end)
|
2016-02-16 16:39:39 +00:00
|
|
|
|
{
|
|
|
|
|
s.append(begin, end - begin);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <>
|
2016-04-15 00:33:21 +00:00
|
|
|
|
inline void appendToStringOrVector(PaddedPODArray<UInt8> & s, const char * begin, const char * end)
|
2016-02-16 16:39:39 +00:00
|
|
|
|
{
|
2016-04-15 00:33:21 +00:00
|
|
|
|
s.insert(begin, end); /// TODO memcpySmall
|
2016-02-16 16:39:39 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename Vector>
|
|
|
|
|
void readStringInto(Vector & s, ReadBuffer & buf)
|
2010-06-04 18:25:25 +00:00
|
|
|
|
{
|
|
|
|
|
while (!buf.eof())
|
|
|
|
|
{
|
|
|
|
|
size_t bytes = 0;
|
|
|
|
|
for (; buf.position() + bytes != buf.buffer().end(); ++bytes)
|
|
|
|
|
if (buf.position()[bytes] == '\t' || buf.position()[bytes] == '\n')
|
|
|
|
|
break;
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
appendToStringOrVector(s, buf.position(), buf.position() + bytes);
|
2010-06-04 18:25:25 +00:00
|
|
|
|
buf.position() += bytes;
|
|
|
|
|
|
2015-02-07 23:13:04 +00:00
|
|
|
|
if (buf.hasPendingData())
|
2010-06-04 18:25:25 +00:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
void readString(String & s, ReadBuffer & buf)
|
2015-09-08 14:24:25 +00:00
|
|
|
|
{
|
2016-02-07 08:42:21 +00:00
|
|
|
|
s.clear();
|
2016-02-16 16:39:39 +00:00
|
|
|
|
readStringInto(s, buf);
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-15 00:33:21 +00:00
|
|
|
|
template void readStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
|
2016-02-16 16:39:39 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename Vector>
|
|
|
|
|
void readStringUntilEOFInto(Vector & s, ReadBuffer & buf)
|
|
|
|
|
{
|
2015-09-08 14:24:25 +00:00
|
|
|
|
while (!buf.eof())
|
|
|
|
|
{
|
|
|
|
|
size_t bytes = buf.buffer().end() - buf.position();
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
appendToStringOrVector(s, buf.position(), buf.position() + bytes);
|
2015-09-08 14:24:25 +00:00
|
|
|
|
buf.position() += bytes;
|
|
|
|
|
|
|
|
|
|
if (buf.hasPendingData())
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-01-05 10:07:01 +00:00
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
void readStringUntilEOF(String & s, ReadBuffer & buf)
|
|
|
|
|
{
|
|
|
|
|
s.clear();
|
|
|
|
|
readStringUntilEOFInto(s, buf);
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-15 00:33:21 +00:00
|
|
|
|
template void readStringUntilEOFInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
|
2016-02-16 16:39:39 +00:00
|
|
|
|
|
|
|
|
|
|
2015-11-25 03:11:17 +00:00
|
|
|
|
/** Распарсить escape-последовательность, которая может быть простой (один символ после бэкслеша) или более сложной (несколько символов).
|
|
|
|
|
* Предполагается, что курсор расположен на символе \
|
|
|
|
|
*/
|
2016-02-16 16:39:39 +00:00
|
|
|
|
template <typename Vector>
|
|
|
|
|
static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf)
|
2015-11-25 03:11:17 +00:00
|
|
|
|
{
|
|
|
|
|
++buf.position();
|
|
|
|
|
if (buf.eof())
|
|
|
|
|
throw Exception("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
|
|
|
|
|
|
|
|
|
|
if (*buf.position() == 'x')
|
|
|
|
|
{
|
|
|
|
|
++buf.position();
|
|
|
|
|
/// escape-последовательность вида \xAA
|
|
|
|
|
UInt8 c1;
|
|
|
|
|
UInt8 c2;
|
|
|
|
|
readPODBinary(c1, buf);
|
|
|
|
|
readPODBinary(c2, buf);
|
2016-02-16 16:39:39 +00:00
|
|
|
|
s.push_back(static_cast<char>(unhex(c1) * 16 + unhex(c2)));
|
2015-11-25 03:11:17 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
/// Обычная escape-последовательность из одного символа.
|
2016-02-16 16:39:39 +00:00
|
|
|
|
s.push_back(parseEscapeSequence(*buf.position()));
|
2015-11-25 03:11:17 +00:00
|
|
|
|
++buf.position();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-02-18 11:47:37 +00:00
|
|
|
|
/// TODO Обобщить с кодом в FunctionsVisitParam.h и JSON.h
|
2016-02-18 11:44:50 +00:00
|
|
|
|
template <typename Vector>
|
|
|
|
|
static void parseJSONEscapeSequence(Vector & s, ReadBuffer & buf)
|
|
|
|
|
{
|
|
|
|
|
++buf.position();
|
|
|
|
|
if (buf.eof())
|
|
|
|
|
throw Exception("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
|
|
|
|
|
|
|
|
|
|
switch(*buf.position())
|
|
|
|
|
{
|
|
|
|
|
case '"':
|
|
|
|
|
s.push_back('"');
|
|
|
|
|
break;
|
|
|
|
|
case '\\':
|
|
|
|
|
s.push_back('\\');
|
|
|
|
|
break;
|
|
|
|
|
case '/':
|
|
|
|
|
s.push_back('/');
|
|
|
|
|
break;
|
|
|
|
|
case 'b':
|
|
|
|
|
s.push_back('\b');
|
|
|
|
|
break;
|
|
|
|
|
case 'f':
|
|
|
|
|
s.push_back('\f');
|
|
|
|
|
break;
|
|
|
|
|
case 'n':
|
|
|
|
|
s.push_back('\n');
|
|
|
|
|
break;
|
|
|
|
|
case 'r':
|
|
|
|
|
s.push_back('\r');
|
|
|
|
|
break;
|
|
|
|
|
case 't':
|
|
|
|
|
s.push_back('\t');
|
|
|
|
|
break;
|
|
|
|
|
case 'u':
|
|
|
|
|
{
|
|
|
|
|
++buf.position();
|
|
|
|
|
|
|
|
|
|
char hex_code[4];
|
|
|
|
|
readPODBinary(hex_code, buf);
|
|
|
|
|
|
|
|
|
|
/// \u0000 - частый случай.
|
|
|
|
|
if (0 == memcmp(hex_code, "0000", 4))
|
|
|
|
|
{
|
|
|
|
|
s.push_back(0);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
UInt16 code_point =
|
|
|
|
|
unhex(hex_code[0]) * 4096
|
|
|
|
|
+ unhex(hex_code[1]) * 256
|
|
|
|
|
+ unhex(hex_code[2]) * 16
|
|
|
|
|
+ unhex(hex_code[3]);
|
|
|
|
|
|
|
|
|
|
if (code_point <= 0x7F)
|
|
|
|
|
{
|
|
|
|
|
s.push_back(code_point);
|
|
|
|
|
}
|
|
|
|
|
else if (code_point <= 0x7FF)
|
|
|
|
|
{
|
|
|
|
|
s.push_back(((code_point >> 6) & 0x1F) | 0xC0);
|
|
|
|
|
s.push_back((code_point & 0x3F) | 0x80);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
/// Суррогатная пара.
|
|
|
|
|
if (code_point >= 0xD800 && code_point <= 0xDBFF)
|
|
|
|
|
{
|
|
|
|
|
assertString("\\u", buf);
|
|
|
|
|
char second_hex_code[4];
|
|
|
|
|
readPODBinary(second_hex_code, buf);
|
|
|
|
|
|
|
|
|
|
UInt16 second_code_point =
|
|
|
|
|
unhex(second_hex_code[0]) * 4096
|
|
|
|
|
+ unhex(second_hex_code[1]) * 256
|
|
|
|
|
+ unhex(second_hex_code[2]) * 16
|
|
|
|
|
+ unhex(second_hex_code[3]);
|
|
|
|
|
|
|
|
|
|
if (second_code_point >= 0xDC00 && second_code_point <= 0xDFFF)
|
|
|
|
|
{
|
2016-02-18 11:54:16 +00:00
|
|
|
|
UInt32 full_code_point = 0x10000 + (code_point - 0xD800) * 1024 + (second_code_point - 0xDC00);
|
2016-02-18 11:44:50 +00:00
|
|
|
|
|
|
|
|
|
s.push_back(((full_code_point >> 18) & 0x07) | 0xF0);
|
|
|
|
|
s.push_back(((full_code_point >> 12) & 0x3F) | 0x80);
|
|
|
|
|
s.push_back(((full_code_point >> 6) & 0x3F) | 0x80);
|
|
|
|
|
s.push_back((full_code_point & 0x3F) | 0x80);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
throw Exception("Incorrect surrogate pair of unicode escape sequences in JSON", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
s.push_back(((code_point >> 12) & 0x0F) | 0xE0);
|
|
|
|
|
s.push_back(((code_point >> 6) & 0x3F) | 0x80);
|
|
|
|
|
s.push_back((code_point & 0x3F) | 0x80);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
default:
|
|
|
|
|
s.push_back(*buf.position());
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
++buf.position();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
template <typename Vector>
|
|
|
|
|
void readEscapedStringInto(Vector & s, ReadBuffer & buf)
|
2010-06-04 18:25:25 +00:00
|
|
|
|
{
|
|
|
|
|
while (!buf.eof())
|
|
|
|
|
{
|
2016-02-16 21:32:24 +00:00
|
|
|
|
const char * next_pos = find_first_symbols<'\t', '\n', '\\'>(buf.position(), buf.buffer().end());
|
2010-06-04 18:25:25 +00:00
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
appendToStringOrVector(s, buf.position(), next_pos);
|
2013-01-05 10:07:01 +00:00
|
|
|
|
buf.position() += next_pos - buf.position();
|
2010-06-04 18:25:25 +00:00
|
|
|
|
|
2015-02-07 23:13:04 +00:00
|
|
|
|
if (!buf.hasPendingData())
|
2011-12-26 02:17:33 +00:00
|
|
|
|
continue;
|
|
|
|
|
|
2010-06-04 18:25:25 +00:00
|
|
|
|
if (*buf.position() == '\t' || *buf.position() == '\n')
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (*buf.position() == '\\')
|
2015-11-25 03:11:17 +00:00
|
|
|
|
parseComplexEscapeSequence(s, buf);
|
2010-06-04 18:25:25 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
void readEscapedString(String & s, ReadBuffer & buf)
|
|
|
|
|
{
|
|
|
|
|
s.clear();
|
|
|
|
|
readEscapedStringInto(s, buf);
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-15 00:33:21 +00:00
|
|
|
|
template void readEscapedStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
|
2016-02-19 16:59:31 +00:00
|
|
|
|
template void readEscapedStringInto<NullSink>(NullSink & s, ReadBuffer & buf);
|
2016-02-16 16:39:39 +00:00
|
|
|
|
|
2011-06-15 18:54:18 +00:00
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
template <char quote, typename Vector>
|
|
|
|
|
static void readAnyQuotedStringInto(Vector & s, ReadBuffer & buf)
|
2010-06-04 18:25:25 +00:00
|
|
|
|
{
|
2011-06-15 18:54:18 +00:00
|
|
|
|
if (buf.eof() || *buf.position() != quote)
|
|
|
|
|
throw Exception("Cannot parse quoted string: expected opening quote",
|
2010-06-04 18:25:25 +00:00
|
|
|
|
ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
|
|
|
|
|
++buf.position();
|
|
|
|
|
|
|
|
|
|
while (!buf.eof())
|
|
|
|
|
{
|
2016-02-16 21:32:24 +00:00
|
|
|
|
const char * next_pos = find_first_symbols<'\\', quote>(buf.position(), buf.buffer().end());
|
2010-06-04 18:25:25 +00:00
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
appendToStringOrVector(s, buf.position(), next_pos);
|
2013-01-05 10:07:01 +00:00
|
|
|
|
buf.position() += next_pos - buf.position();
|
2015-02-07 23:13:04 +00:00
|
|
|
|
|
|
|
|
|
if (!buf.hasPendingData())
|
2011-12-26 02:17:33 +00:00
|
|
|
|
continue;
|
|
|
|
|
|
2011-06-15 18:54:18 +00:00
|
|
|
|
if (*buf.position() == quote)
|
2010-06-04 18:25:25 +00:00
|
|
|
|
{
|
|
|
|
|
++buf.position();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (*buf.position() == '\\')
|
2015-11-25 03:11:17 +00:00
|
|
|
|
parseComplexEscapeSequence(s, buf);
|
2010-06-04 18:25:25 +00:00
|
|
|
|
}
|
|
|
|
|
|
2011-06-15 18:54:18 +00:00
|
|
|
|
throw Exception("Cannot parse quoted string: expected closing quote",
|
2010-06-04 18:25:25 +00:00
|
|
|
|
ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
template <typename Vector>
|
|
|
|
|
void readQuotedStringInto(Vector & s, ReadBuffer & buf)
|
|
|
|
|
{
|
|
|
|
|
readAnyQuotedStringInto<'\''>(s, buf);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename Vector>
|
|
|
|
|
void readDoubleQuotedStringInto(Vector & s, ReadBuffer & buf)
|
|
|
|
|
{
|
|
|
|
|
readAnyQuotedStringInto<'"'>(s, buf);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename Vector>
|
|
|
|
|
void readBackQuotedStringInto(Vector & s, ReadBuffer & buf)
|
|
|
|
|
{
|
|
|
|
|
readAnyQuotedStringInto<'`'>(s, buf);
|
|
|
|
|
}
|
|
|
|
|
|
2011-06-15 18:54:18 +00:00
|
|
|
|
|
|
|
|
|
void readQuotedString(String & s, ReadBuffer & buf)
|
|
|
|
|
{
|
2016-02-16 16:39:39 +00:00
|
|
|
|
s.clear();
|
|
|
|
|
readQuotedStringInto(s, buf);
|
2011-06-15 18:54:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-04-15 00:33:21 +00:00
|
|
|
|
template void readQuotedStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
|
2016-09-20 19:11:25 +00:00
|
|
|
|
template void readDoubleQuotedStringInto(NullSink & s, ReadBuffer & buf);
|
2016-02-16 16:39:39 +00:00
|
|
|
|
|
2011-06-15 18:54:18 +00:00
|
|
|
|
void readDoubleQuotedString(String & s, ReadBuffer & buf)
|
|
|
|
|
{
|
2016-02-16 16:39:39 +00:00
|
|
|
|
s.clear();
|
|
|
|
|
readDoubleQuotedStringInto(s, buf);
|
2011-06-15 18:54:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-04-15 00:33:21 +00:00
|
|
|
|
template void readDoubleQuotedStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
|
2016-02-16 16:39:39 +00:00
|
|
|
|
|
2011-11-01 17:57:37 +00:00
|
|
|
|
void readBackQuotedString(String & s, ReadBuffer & buf)
|
|
|
|
|
{
|
2016-02-16 16:39:39 +00:00
|
|
|
|
s.clear();
|
|
|
|
|
readBackQuotedStringInto(s, buf);
|
2011-11-01 17:57:37 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-04-15 00:33:21 +00:00
|
|
|
|
template void readBackQuotedStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
|
2012-05-08 05:42:05 +00:00
|
|
|
|
|
2016-02-07 08:42:21 +00:00
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
template <typename Vector>
|
|
|
|
|
void readCSVStringInto(Vector & s, ReadBuffer & buf, const char delimiter)
|
|
|
|
|
{
|
2016-02-07 08:42:21 +00:00
|
|
|
|
if (buf.eof())
|
|
|
|
|
throwReadAfterEOF();
|
|
|
|
|
|
|
|
|
|
char maybe_quote = *buf.position();
|
|
|
|
|
|
|
|
|
|
/// Пустота и даже не в кавычках.
|
|
|
|
|
if (maybe_quote == delimiter)
|
|
|
|
|
return;
|
|
|
|
|
|
2016-02-07 11:49:49 +00:00
|
|
|
|
if (maybe_quote == '\'' || maybe_quote == '"')
|
2016-02-07 08:42:21 +00:00
|
|
|
|
{
|
|
|
|
|
++buf.position();
|
|
|
|
|
|
|
|
|
|
/// Закавыченный случай. Ищем следующую кавычку.
|
|
|
|
|
while (!buf.eof())
|
|
|
|
|
{
|
|
|
|
|
const char * next_pos = reinterpret_cast<const char *>(memchr(buf.position(), maybe_quote, buf.buffer().end() - buf.position()));
|
|
|
|
|
|
|
|
|
|
if (nullptr == next_pos)
|
|
|
|
|
next_pos = buf.buffer().end();
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
appendToStringOrVector(s, buf.position(), next_pos);
|
2016-02-07 08:42:21 +00:00
|
|
|
|
buf.position() += next_pos - buf.position();
|
|
|
|
|
|
|
|
|
|
if (!buf.hasPendingData())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/// Сейчас под курсором кавычка. Есть ли следующая?
|
|
|
|
|
++buf.position();
|
|
|
|
|
if (buf.eof())
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (*buf.position() == maybe_quote)
|
|
|
|
|
{
|
2016-02-16 16:39:39 +00:00
|
|
|
|
s.push_back(maybe_quote);
|
2016-02-07 08:42:21 +00:00
|
|
|
|
++buf.position();
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
/// Незакавыченный случай. Ищем delimiter или \r или \n.
|
|
|
|
|
while (!buf.eof())
|
|
|
|
|
{
|
|
|
|
|
const char * next_pos = buf.position();
|
|
|
|
|
while (next_pos < buf.buffer().end()
|
|
|
|
|
&& *next_pos != delimiter && *next_pos != '\r' && *next_pos != '\n') /// NOTE Можно сделать SIMD версию.
|
|
|
|
|
++next_pos;
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
appendToStringOrVector(s, buf.position(), next_pos);
|
2016-02-07 08:42:21 +00:00
|
|
|
|
buf.position() += next_pos - buf.position();
|
|
|
|
|
|
|
|
|
|
if (!buf.hasPendingData())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/** CSV формат может содержать незначащие пробелы и табы.
|
|
|
|
|
* Обычно задача их пропускать - у вызывающего кода.
|
|
|
|
|
* Но в данном случае, сделать это будет сложно, поэтому удаляем концевые пробельные символы самостоятельно.
|
|
|
|
|
*/
|
|
|
|
|
size_t size = s.size();
|
|
|
|
|
while (size > 0
|
|
|
|
|
&& (s[size - 1] == ' ' || s[size - 1] == '\t'))
|
|
|
|
|
--size;
|
|
|
|
|
|
|
|
|
|
s.resize(size);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
void readCSVString(String & s, ReadBuffer & buf, const char delimiter)
|
|
|
|
|
{
|
|
|
|
|
s.clear();
|
|
|
|
|
readCSVStringInto(s, buf, delimiter);
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-15 00:33:21 +00:00
|
|
|
|
template void readCSVStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf, const char delimiter);
|
2016-02-16 16:39:39 +00:00
|
|
|
|
|
2016-02-07 08:42:21 +00:00
|
|
|
|
|
2016-02-18 11:44:50 +00:00
|
|
|
|
template <typename Vector>
|
2016-03-07 04:31:10 +00:00
|
|
|
|
void readJSONStringInto(Vector & s, ReadBuffer & buf)
|
2016-02-18 11:44:50 +00:00
|
|
|
|
{
|
|
|
|
|
if (buf.eof() || *buf.position() != '"')
|
|
|
|
|
throw Exception("Cannot parse JSON string: expected opening quote",
|
|
|
|
|
ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
|
|
|
|
|
++buf.position();
|
|
|
|
|
|
|
|
|
|
while (!buf.eof())
|
|
|
|
|
{
|
|
|
|
|
const char * next_pos = find_first_symbols<'\\', '"'>(buf.position(), buf.buffer().end());
|
|
|
|
|
|
|
|
|
|
appendToStringOrVector(s, buf.position(), next_pos);
|
|
|
|
|
buf.position() += next_pos - buf.position();
|
|
|
|
|
|
|
|
|
|
if (!buf.hasPendingData())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (*buf.position() == '"')
|
|
|
|
|
{
|
|
|
|
|
++buf.position();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (*buf.position() == '\\')
|
|
|
|
|
parseJSONEscapeSequence(s, buf);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
throw Exception("Cannot parse JSON string: expected closing quote",
|
|
|
|
|
ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void readJSONString(String & s, ReadBuffer & buf)
|
|
|
|
|
{
|
|
|
|
|
s.clear();
|
|
|
|
|
readJSONStringInto(s, buf);
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-15 00:33:21 +00:00
|
|
|
|
template void readJSONStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
|
2016-09-20 19:11:25 +00:00
|
|
|
|
template void readJSONStringInto<NullSink>(NullSink & s, ReadBuffer & buf);
|
2016-02-18 11:44:50 +00:00
|
|
|
|
|
|
|
|
|
|
2015-04-01 02:55:52 +00:00
|
|
|
|
void readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf)
|
|
|
|
|
{
|
2015-10-21 19:04:02 +00:00
|
|
|
|
static constexpr auto DATE_TIME_BROKEN_DOWN_LENGTH = 19;
|
|
|
|
|
static constexpr auto UNIX_TIMESTAMP_MAX_LENGTH = 10;
|
2015-04-01 02:55:52 +00:00
|
|
|
|
|
2015-10-21 19:04:02 +00:00
|
|
|
|
char s[DATE_TIME_BROKEN_DOWN_LENGTH];
|
|
|
|
|
char * s_pos = s;
|
|
|
|
|
|
|
|
|
|
/// Кусок, похожий на unix timestamp.
|
2016-08-16 21:23:53 +00:00
|
|
|
|
while (s_pos < s + UNIX_TIMESTAMP_MAX_LENGTH && !buf.eof() && isNumericASCII(*buf.position()))
|
2015-04-01 02:55:52 +00:00
|
|
|
|
{
|
2015-10-21 19:04:02 +00:00
|
|
|
|
*s_pos = *buf.position();
|
|
|
|
|
++s_pos;
|
|
|
|
|
++buf.position();
|
2015-04-01 02:55:52 +00:00
|
|
|
|
}
|
|
|
|
|
|
2015-10-21 19:04:02 +00:00
|
|
|
|
/// 2015-01-01 01:02:03
|
|
|
|
|
if (s_pos == s + 4 && !buf.eof() && (*buf.position() < '0' || *buf.position() > '9'))
|
2015-04-01 02:55:52 +00:00
|
|
|
|
{
|
2015-10-21 19:04:02 +00:00
|
|
|
|
const size_t remaining_size = DATE_TIME_BROKEN_DOWN_LENGTH - (s_pos - s);
|
|
|
|
|
size_t size = buf.read(s_pos, remaining_size);
|
|
|
|
|
if (remaining_size != size)
|
2015-04-01 02:55:52 +00:00
|
|
|
|
{
|
2015-10-21 19:04:02 +00:00
|
|
|
|
s_pos[size] = 0;
|
2015-04-01 02:55:52 +00:00
|
|
|
|
throw Exception(std::string("Cannot parse datetime ") + s, ErrorCodes::CANNOT_PARSE_DATETIME);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0');
|
|
|
|
|
UInt8 month = (s[5] - '0') * 10 + (s[6] - '0');
|
|
|
|
|
UInt8 day = (s[8] - '0') * 10 + (s[9] - '0');
|
|
|
|
|
|
|
|
|
|
UInt8 hour = (s[11] - '0') * 10 + (s[12] - '0');
|
|
|
|
|
UInt8 minute = (s[14] - '0') * 10 + (s[15] - '0');
|
|
|
|
|
UInt8 second = (s[17] - '0') * 10 + (s[18] - '0');
|
|
|
|
|
|
|
|
|
|
if (unlikely(year == 0))
|
|
|
|
|
datetime = 0;
|
|
|
|
|
else
|
|
|
|
|
datetime = DateLUT::instance().makeDateTime(year, month, day, hour, minute, second);
|
|
|
|
|
}
|
|
|
|
|
else
|
2015-10-21 19:04:02 +00:00
|
|
|
|
datetime = parse<time_t>(s, s_pos - s);
|
2015-04-01 02:55:52 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-09-20 19:11:25 +00:00
|
|
|
|
void skipJSONFieldPlain(ReadBuffer & buf, const String & name_of_filed)
|
|
|
|
|
{
|
|
|
|
|
if (buf.eof())
|
|
|
|
|
throw Exception("Unexpected EOF for key '" + name_of_filed + "'", ErrorCodes::INCORRECT_DATA);
|
|
|
|
|
else if (*buf.position() == '"') /// skip double-quoted string
|
|
|
|
|
{
|
|
|
|
|
NullSink sink;
|
|
|
|
|
readJSONStringInto(sink, buf);
|
|
|
|
|
}
|
|
|
|
|
else if (isdigit(*buf.position())) /// skip number
|
|
|
|
|
{
|
|
|
|
|
double v;
|
|
|
|
|
if (!tryReadFloatText(v, buf))
|
|
|
|
|
throw Exception("Expected a number field for key '" + name_of_filed + "'", ErrorCodes::INCORRECT_DATA);
|
|
|
|
|
}
|
|
|
|
|
else if (*buf.position() == 'n') /// skip null
|
|
|
|
|
{
|
|
|
|
|
assertString("null", buf);
|
|
|
|
|
}
|
|
|
|
|
else if (*buf.position() == 't') /// skip true
|
|
|
|
|
{
|
|
|
|
|
assertString("true", buf);
|
|
|
|
|
}
|
2016-09-28 13:11:03 +00:00
|
|
|
|
else if (*buf.position() == 'f') /// skip false
|
2016-09-20 19:11:25 +00:00
|
|
|
|
{
|
|
|
|
|
assertString("false", buf);
|
|
|
|
|
}
|
2016-09-28 13:11:03 +00:00
|
|
|
|
else if (*buf.position() == '[')
|
|
|
|
|
{
|
|
|
|
|
++buf.position();
|
|
|
|
|
skipWhitespaceIfAny(buf);
|
|
|
|
|
|
|
|
|
|
if (!buf.eof() && *buf.position() == ']') /// skip empty array
|
|
|
|
|
{
|
|
|
|
|
++buf.position();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (true)
|
|
|
|
|
{
|
|
|
|
|
skipJSONFieldPlain(buf, name_of_filed);
|
|
|
|
|
skipWhitespaceIfAny(buf);
|
|
|
|
|
|
|
|
|
|
if (!buf.eof() && *buf.position() == ',')
|
|
|
|
|
{
|
|
|
|
|
++buf.position();
|
|
|
|
|
skipWhitespaceIfAny(buf);
|
|
|
|
|
}
|
|
|
|
|
else if (!buf.eof() && *buf.position() == ']')
|
|
|
|
|
{
|
|
|
|
|
++buf.position();
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
throw Exception("Unexpected symbol for key '" + name_of_filed + "'", ErrorCodes::INCORRECT_DATA);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (*buf.position() == '{') /// fail on objects
|
2016-09-20 19:11:25 +00:00
|
|
|
|
{
|
|
|
|
|
throw Exception("Unexpected nested field for key '" + name_of_filed + "'", ErrorCodes::INCORRECT_DATA);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
throw Exception("Unexpected symbol for key '" + name_of_filed + "'", ErrorCodes::INCORRECT_DATA);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2012-05-08 05:42:05 +00:00
|
|
|
|
void readException(Exception & e, ReadBuffer & buf, const String & additional_message)
|
|
|
|
|
{
|
|
|
|
|
int code = 0;
|
|
|
|
|
String name;
|
|
|
|
|
String message;
|
|
|
|
|
String stack_trace;
|
|
|
|
|
bool has_nested = false;
|
2015-04-01 02:55:52 +00:00
|
|
|
|
|
2012-05-08 05:42:05 +00:00
|
|
|
|
readBinary(code, buf);
|
|
|
|
|
readBinary(name, buf);
|
|
|
|
|
readBinary(message, buf);
|
|
|
|
|
readBinary(stack_trace, buf);
|
|
|
|
|
readBinary(has_nested, buf);
|
|
|
|
|
|
|
|
|
|
std::stringstream message_stream;
|
|
|
|
|
|
|
|
|
|
if (!additional_message.empty())
|
|
|
|
|
message_stream << additional_message << ". ";
|
|
|
|
|
|
|
|
|
|
if (name != "DB::Exception")
|
|
|
|
|
message_stream << name << ". ";
|
|
|
|
|
|
|
|
|
|
message_stream << message
|
|
|
|
|
<< ". Stack trace:\n\n" << stack_trace;
|
|
|
|
|
|
|
|
|
|
if (has_nested)
|
|
|
|
|
{
|
|
|
|
|
Exception nested;
|
|
|
|
|
readException(nested, buf);
|
|
|
|
|
e = Exception(message_stream.str(), nested, code);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
e = Exception(message_stream.str(), code);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void readAndThrowException(ReadBuffer & buf, const String & additional_message)
|
|
|
|
|
{
|
|
|
|
|
Exception e;
|
|
|
|
|
readException(e, buf, additional_message);
|
|
|
|
|
e.rethrow();
|
|
|
|
|
}
|
|
|
|
|
|
2016-08-16 21:23:53 +00:00
|
|
|
|
|
|
|
|
|
/** Must successfully parse inf, INF and Infinity.
|
|
|
|
|
* All other variants in different cases are also parsed for simplicity.
|
|
|
|
|
*/
|
|
|
|
|
bool parseInfinity(ReadBuffer & buf)
|
|
|
|
|
{
|
|
|
|
|
if (!checkStringCaseInsensitive("inf", buf))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/// Just inf.
|
|
|
|
|
if (buf.eof() || !isWordCharASCII(*buf.position()))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
/// If word characters after inf, it should be infinity.
|
|
|
|
|
return checkStringCaseInsensitive("inity", buf);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** Must successfully parse nan, NAN and NaN.
|
|
|
|
|
* All other variants in different cases are also parsed for simplicity.
|
|
|
|
|
*/
|
|
|
|
|
bool parseNaN(ReadBuffer & buf)
|
|
|
|
|
{
|
|
|
|
|
return checkStringCaseInsensitive("nan", buf);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void assertInfinity(ReadBuffer & buf)
|
|
|
|
|
{
|
|
|
|
|
if (!parseInfinity(buf))
|
|
|
|
|
throw Exception("Cannot parse infinity.", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void assertNaN(ReadBuffer & buf)
|
|
|
|
|
{
|
|
|
|
|
if (!parseNaN(buf))
|
|
|
|
|
throw Exception("Cannot parse NaN.", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED);
|
|
|
|
|
}
|
|
|
|
|
|
2010-06-04 18:25:25 +00:00
|
|
|
|
}
|