ClickHouse/base/base/JSON.cpp
2024-10-06 12:29:20 +02:00

819 lines
18 KiB
C++

#include <string>
#include <cstring>
#include <Poco/UTF8Encoding.h>
#include <Poco/NumberParser.h>
#include <base/JSON.h>
#include <base/find_symbols.h>
#include <base/preciseExp10.h>
#define JSON_MAX_DEPTH 100
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec"
POCO_IMPLEMENT_EXCEPTION(JSONException, Poco::Exception, "JSONException") // NOLINT(cert-err60-cpp, modernize-use-noexcept, hicpp-use-noexcept)
#pragma clang diagnostic pop
/// Read unsigned integer in a simple form from a non-0-terminated string.
static UInt64 readUIntText(const char * buf, const char * end)
{
UInt64 x = 0;
if (buf == end)
throw JSONException("JSON: cannot parse unsigned integer: unexpected end of data.");
while (buf != end)
{
switch (*buf)
{
case '+':
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
x *= 10;
x += *buf - '0';
break;
default:
return x;
}
++buf;
}
return x;
}
/// Read signed integer in a simple form from a non-0-terminated string.
static Int64 readIntText(const char * buf, const char * end)
{
bool negative = false;
UInt64 x = 0;
if (buf == end)
throw JSONException("JSON: cannot parse signed integer: unexpected end of data.");
bool run = true;
while (buf != end && run)
{
switch (*buf)
{
case '+':
break;
case '-':
negative = true;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
x *= 10;
x += *buf - '0';
break;
default:
run = false;
break;
}
++buf;
}
return negative ? -x : x;
}
/// Read floating point number in simple format, imprecisely, from a non-0-terminated string.
static double readFloatText(const char * buf, const char * end)
{
bool negative = false;
double x = 0;
bool after_point = false;
double power_of_ten = 1;
if (buf == end)
throw JSONException("JSON: cannot parse floating point number: unexpected end of data.");
bool run = true;
while (buf != end && run)
{
switch (*buf)
{
case '+':
break;
case '-':
negative = true;
break;
case '.':
after_point = true;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
if (after_point)
{
power_of_ten /= 10;
x += (*buf - '0') * power_of_ten;
}
else
{
x *= 10;
x += *buf - '0';
}
break;
case 'e':
case 'E':
{
++buf;
auto exponent = readIntText(buf, end);
x *= preciseExp10(static_cast<double>(exponent));
run = false;
break;
}
default:
run = false;
break;
}
++buf;
}
if (negative)
x = -x;
return x;
}
void JSON::checkInit() const
{
if (!(ptr_begin < ptr_end))
throw JSONException("JSON: begin >= end.");
if (level > JSON_MAX_DEPTH)
throw JSONException("JSON: too deep.");
}
JSON::ElementType JSON::getType() const
{
switch (*ptr_begin)
{
case '{':
return TYPE_OBJECT;
case '[':
return TYPE_ARRAY;
case 't':
case 'f':
return TYPE_BOOL;
case 'n':
return TYPE_NULL;
case '-':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
return TYPE_NUMBER;
case '"':
{
/// Is it a string or a name-value pair?
Pos after_string = skipString();
if (after_string < ptr_end && *after_string == ':')
return TYPE_NAME_VALUE_PAIR;
return TYPE_STRING;
}
default:
throw JSONException(std::string("JSON: unexpected char ") + *ptr_begin + ", expected one of '{[tfn-0123456789\"'");
}
}
void JSON::checkPos(Pos pos) const
{
if (pos >= ptr_end || ptr_begin == nullptr)
throw JSONException("JSON: unexpected end of data.");
}
JSON::Pos JSON::skipString() const
{
Pos pos = ptr_begin;
checkPos(pos);
if (*pos != '"')
throw JSONException(std::string("JSON: expected \", got ") + *pos);
++pos;
/// fast path: find next double quote. If it is not escaped by backslash - then it's an end of string (assuming JSON is valid).
Pos closing_quote = reinterpret_cast<const char *>(memchr(reinterpret_cast<const void *>(pos), '\"', ptr_end - pos));
if (nullptr != closing_quote && closing_quote[-1] != '\\')
return closing_quote + 1;
/// slow path
while (pos < ptr_end && *pos != '"')
{
if (*pos == '\\')
{
++pos;
checkPos(pos);
if (*pos == 'u')
{
pos += 4;
checkPos(pos);
}
}
++pos;
}
checkPos(pos);
if (*pos != '"')
throw JSONException(std::string("JSON: expected \", got ") + *pos);
++pos;
return pos;
}
JSON::Pos JSON::skipNumber() const
{
Pos pos = ptr_begin;
checkPos(pos);
if (*pos == '-')
++pos;
while (pos < ptr_end && *pos >= '0' && *pos <= '9')
++pos;
if (pos < ptr_end && *pos == '.')
++pos;
while (pos < ptr_end && *pos >= '0' && *pos <= '9')
++pos;
if (pos < ptr_end && (*pos == 'e' || *pos == 'E'))
++pos;
if (pos < ptr_end && *pos == '-')
++pos;
while (pos < ptr_end && *pos >= '0' && *pos <= '9')
++pos;
return pos;
}
JSON::Pos JSON::skipBool() const
{
Pos pos = ptr_begin;
checkPos(pos);
if (*ptr_begin == 't')
pos += 4;
else if (*ptr_begin == 'f')
pos += 5;
else
throw JSONException("JSON: expected true or false.");
return pos;
}
JSON::Pos JSON::skipNull() const
{
return ptr_begin + 4;
}
JSON::Pos JSON::skipNameValuePair() const
{
Pos pos = skipString();
checkPos(pos);
if (*pos != ':')
throw JSONException("JSON: expected :.");
++pos;
return JSON(pos, ptr_end, level + 1).skipElement();
}
JSON::Pos JSON::skipArray() const
{
if (!isArray())
throw JSONException("JSON: expected [");
Pos pos = ptr_begin;
++pos;
checkPos(pos);
if (*pos == ']')
return ++pos;
while (true)
{
pos = JSON(pos, ptr_end, level + 1).skipElement();
checkPos(pos);
switch (*pos)
{
case ',':
++pos;
break;
case ']':
return ++pos;
default:
throw JSONException(std::string("JSON: expected one of ',]', got ") + *pos);
}
}
}
JSON::Pos JSON::skipObject() const
{
if (!isObject())
throw JSONException("JSON: expected {");
Pos pos = ptr_begin;
++pos;
checkPos(pos);
if (*pos == '}')
return ++pos;
while (true)
{
pos = JSON(pos, ptr_end, level + 1).skipNameValuePair();
checkPos(pos);
switch (*pos)
{
case ',':
++pos;
break;
case '}':
return ++pos;
default:
throw JSONException(std::string("JSON: expected one of ',}', got ") + *pos);
}
}
}
JSON::Pos JSON::skipElement() const
{
ElementType type = getType();
switch (type)
{
case TYPE_NULL:
return skipNull();
case TYPE_BOOL:
return skipBool();
case TYPE_NUMBER:
return skipNumber();
case TYPE_STRING:
return skipString();
case TYPE_NAME_VALUE_PAIR:
return skipNameValuePair();
case TYPE_ARRAY:
return skipArray();
case TYPE_OBJECT:
return skipObject();
default:
throw JSONException("Logical error in JSON: unknown element type: " + std::to_string(type));
}
}
size_t JSON::size() const
{
size_t i = 0;
for (const_iterator it = begin(); it != end(); ++it)
++i;
return i;
}
bool JSON::empty() const
{
return size() == 0;
}
JSON JSON::operator[] (size_t n) const
{
ElementType type = getType();
if (type != TYPE_ARRAY)
throw JSONException("JSON: not array when calling operator[](size_t) method.");
Pos pos = ptr_begin;
++pos;
checkPos(pos);
size_t i = 0;
const_iterator it = begin();
while (i < n && it != end())
{
++it;
++i;
}
if (i != n)
throw JSONException("JSON: array index " + std::to_string(n) + " out of bounds.");
return *it;
}
JSON::Pos JSON::searchField(const char * data, size_t size) const
{
ElementType type = getType();
if (type != TYPE_OBJECT)
throw JSONException("JSON: not object when calling operator[](const char *) or has(const char *) method.");
const_iterator it = begin();
for (; it != end(); ++it)
{
if (!it->hasEscapes())
{
const auto current_name = it->getRawName();
if (current_name.size() == size && 0 == memcmp(current_name.data(), data, size))
break;
}
else
{
std::string current_name = it->getName();
if (current_name.size() == size && 0 == memcmp(current_name.data(), data, size))
break;
}
}
if (it == end())
return nullptr;
return it->data();
}
bool JSON::hasEscapes() const
{
Pos pos = ptr_begin + 1;
while (pos < ptr_end && *pos != '"' && *pos != '\\')
++pos;
if (*pos == '"')
return false;
if (*pos == '\\')
return true;
throw JSONException("JSON: unexpected end of data.");
}
bool JSON::hasSpecialChars() const
{
Pos pos = ptr_begin + 1;
while (pos < ptr_end && *pos != '"'
&& *pos != '\\' && *pos != '\r' && *pos != '\n' && *pos != '\t'
&& *pos != '\f' && *pos != '\b' && *pos != '\0' && *pos != '\'')
++pos;
if (*pos == '"')
return false;
if (pos < ptr_end)
return true;
throw JSONException("JSON: unexpected end of data.");
}
JSON JSON::operator[] (const std::string & name) const
{
Pos pos = searchField(name);
if (!pos)
throw JSONException("JSON: there is no element '" + std::string(name) + "' in object.");
return JSON(pos, ptr_end, level + 1).getValue();
}
bool JSON::has(const char * data, size_t size) const
{
return nullptr != searchField(data, size);
}
double JSON::getDouble() const
{
return readFloatText(ptr_begin, ptr_end);
}
Int64 JSON::getInt() const
{
return readIntText(ptr_begin, ptr_end);
}
UInt64 JSON::getUInt() const
{
return readUIntText(ptr_begin, ptr_end);
}
bool JSON::getBool() const
{
if (*ptr_begin == 't')
return true;
if (*ptr_begin == 'f')
return false;
throw JSONException("JSON: cannot parse boolean.");
}
std::string JSON::getString() const
{
Pos s = ptr_begin;
if (*s != '"')
throw JSONException(std::string("JSON: expected \", got ") + *s);
++s;
checkPos(s);
std::string buf;
do
{
Pos p = find_first_symbols<'\\','"'>(s, ptr_end);
if (p >= ptr_end)
{
break;
}
buf.append(s, p);
s = p;
switch (*s)
{
case '\\':
++s;
checkPos(s);
switch (*s)
{
case '"':
buf += '"';
break;
case '\\':
buf += '\\';
break;
case '/':
buf += '/';
break;
case 'b':
buf += '\b';
break;
case 'f':
buf += '\f';
break;
case 'n':
buf += '\n';
break;
case 'r':
buf += '\r';
break;
case 't':
buf += '\t';
break;
case 'u':
{
Poco::UTF8Encoding utf8;
++s;
checkPos(s + 4);
std::string hex(s, 4);
s += 3;
int unicode;
try
{
unicode = Poco::NumberParser::parseHex(hex);
}
catch (const Poco::SyntaxException &)
{
throw JSONException("JSON: incorrect syntax: incorrect HEX code.");
}
buf.resize(buf.size() + 6); /// Max size of UTF-8 sequence, including pre-standard mapping of UCS-4 to UTF-8.
int res = utf8.convert(unicode,
reinterpret_cast<unsigned char *>(const_cast<char*>(buf.data())) + buf.size() - 6, 6);
if (!res)
throw JSONException("JSON: cannot convert unicode " + std::to_string(unicode)
+ " to UTF8.");
buf.resize(buf.size() - 6 + res);
break;
}
default:
buf += *s;
break;
}
++s;
break;
case '"':
return buf;
default:
throw JSONException("find_first_symbols<...>() failed in unexpected way");
}
} while (s < ptr_end);
throw JSONException("JSON: incorrect syntax (expected end of string, found end of JSON).");
}
std::string JSON::getName() const
{
return getString();
}
std::string_view JSON::getRawString() const
{
Pos s = ptr_begin;
if (*s != '"')
throw JSONException(std::string("JSON: expected \", got ") + *s);
++s;
while (s != ptr_end && *s != '"')
++s;
if (s != ptr_end)
return std::string_view(ptr_begin + 1, s - ptr_begin - 1);
throw JSONException("JSON: incorrect syntax (expected end of string, found end of JSON).");
}
std::string_view JSON::getRawName() const
{
return getRawString();
}
JSON JSON::getValue() const
{
Pos pos = skipString();
checkPos(pos);
if (*pos != ':')
throw JSONException("JSON: expected :.");
++pos;
checkPos(pos);
return JSON(pos, ptr_end, level + 1);
}
double JSON::toDouble() const
{
ElementType type = getType();
if (type == TYPE_NUMBER)
return getDouble();
if (type == TYPE_STRING)
return JSON(ptr_begin + 1, ptr_end, level + 1).getDouble();
throw JSONException("JSON: cannot convert value to double.");
}
Int64 JSON::toInt() const
{
ElementType type = getType();
if (type == TYPE_NUMBER)
return getInt();
if (type == TYPE_STRING)
return JSON(ptr_begin + 1, ptr_end, level + 1).getInt();
throw JSONException("JSON: cannot convert value to signed integer.");
}
UInt64 JSON::toUInt() const
{
ElementType type = getType();
if (type == TYPE_NUMBER)
return getUInt();
if (type == TYPE_STRING)
return JSON(ptr_begin + 1, ptr_end, level + 1).getUInt();
throw JSONException("JSON: cannot convert value to unsigned integer.");
}
std::string JSON::toString() const
{
ElementType type = getType();
if (type == TYPE_STRING)
return getString();
Pos pos = skipElement();
return std::string(ptr_begin, pos - ptr_begin);
}
JSON::iterator JSON::iterator::begin() const
{
ElementType type = getType();
if (type != TYPE_ARRAY && type != TYPE_OBJECT)
throw JSONException("JSON: not array or object when calling begin() method.");
Pos pos = ptr_begin + 1;
checkPos(pos);
if (*pos == '}' || *pos == ']')
return end();
return JSON(pos, ptr_end, level + 1);
}
JSON::iterator JSON::iterator::end() const
{
return JSON(nullptr, ptr_end, level + 1);
}
JSON::iterator & JSON::iterator::operator++()
{
Pos pos = skipElement();
checkPos(pos);
if (*pos != ',')
ptr_begin = nullptr;
else
{
++pos;
checkPos(pos);
ptr_begin = pos;
}
return *this;
}
JSON::iterator JSON::iterator::operator++(int) // NOLINT
{
iterator copy(*this);
++*this;
return copy;
}
template <>
double JSON::get<double>() const
{
return getDouble();
}
template <>
std::string JSON::get<std::string>() const
{
return getString();
}
template <>
Int64 JSON::get<Int64>() const
{
return getInt();
}
template <>
UInt64 JSON::get<UInt64>() const
{
return getUInt();
}
template <>
bool JSON::get<bool>() const
{
return getBool();
}
template <>
bool JSON::isType<std::string>() const
{
return isString();
}
template <>
bool JSON::isType<UInt64>() const
{
return isNumber();
}
template <>
bool JSON::isType<Int64>() const
{
return isNumber();
}
template <>
bool JSON::isType<bool>() const
{
return isBool();
}