2019-05-13 23:44:55 +00:00
|
|
|
#pragma once
|
|
|
|
|
2021-10-27 23:10:39 +00:00
|
|
|
#include "config_functions.h"
|
2019-05-13 23:44:55 +00:00
|
|
|
|
2020-04-16 12:31:57 +00:00
|
|
|
#if USE_SIMDJSON
|
2021-10-02 07:13:14 +00:00
|
|
|
# include <base/types.h>
|
2020-04-16 12:31:57 +00:00
|
|
|
# include <Common/Exception.h>
|
2021-10-02 07:13:14 +00:00
|
|
|
# include <base/defines.h>
|
2021-05-15 10:10:19 +00:00
|
|
|
# include <simdjson.h>
|
2019-05-13 23:44:55 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int CANNOT_ALLOCATE_MEMORY;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// This class can be used as an argument for the template class FunctionJSON.
|
|
|
|
/// It provides ability to parse JSONs using simdjson library.
|
|
|
|
struct SimdJSONParser
|
|
|
|
{
|
2020-07-11 21:04:22 +00:00
|
|
|
class Array;
|
|
|
|
class Object;
|
2019-05-13 23:44:55 +00:00
|
|
|
|
2020-07-20 17:01:58 +00:00
|
|
|
/// References an element in a JSON document, representing a JSON null, boolean, string, number,
|
|
|
|
/// array or object.
|
2020-07-11 21:04:22 +00:00
|
|
|
class Element
|
2019-05-13 23:44:55 +00:00
|
|
|
{
|
2020-07-11 21:04:22 +00:00
|
|
|
public:
|
2022-03-12 18:05:50 +00:00
|
|
|
ALWAYS_INLINE Element() {} /// NOLINT
|
|
|
|
ALWAYS_INLINE Element(const simdjson::dom::element & element_) : element(element_) {} /// NOLINT
|
2020-07-11 21:04:22 +00:00
|
|
|
|
|
|
|
ALWAYS_INLINE bool isInt64() const { return element.type() == simdjson::dom::element_type::INT64; }
|
|
|
|
ALWAYS_INLINE bool isUInt64() const { return element.type() == simdjson::dom::element_type::UINT64; }
|
|
|
|
ALWAYS_INLINE bool isDouble() const { return element.type() == simdjson::dom::element_type::DOUBLE; }
|
|
|
|
ALWAYS_INLINE bool isString() const { return element.type() == simdjson::dom::element_type::STRING; }
|
|
|
|
ALWAYS_INLINE bool isArray() const { return element.type() == simdjson::dom::element_type::ARRAY; }
|
|
|
|
ALWAYS_INLINE bool isObject() const { return element.type() == simdjson::dom::element_type::OBJECT; }
|
|
|
|
ALWAYS_INLINE bool isBool() const { return element.type() == simdjson::dom::element_type::BOOL; }
|
|
|
|
ALWAYS_INLINE bool isNull() const { return element.type() == simdjson::dom::element_type::NULL_VALUE; }
|
|
|
|
|
2021-03-23 17:58:30 +00:00
|
|
|
ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().value_unsafe(); }
|
|
|
|
ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().value_unsafe(); }
|
|
|
|
ALWAYS_INLINE double getDouble() const { return element.get_double().value_unsafe(); }
|
|
|
|
ALWAYS_INLINE bool getBool() const { return element.get_bool().value_unsafe(); }
|
|
|
|
ALWAYS_INLINE std::string_view getString() const { return element.get_string().value_unsafe(); }
|
2020-07-11 21:04:22 +00:00
|
|
|
ALWAYS_INLINE Array getArray() const;
|
|
|
|
ALWAYS_INLINE Object getObject() const;
|
|
|
|
|
2021-06-25 15:33:31 +00:00
|
|
|
ALWAYS_INLINE simdjson::dom::element getElement() const { return element; }
|
2021-03-24 19:47:28 +00:00
|
|
|
|
2020-07-11 21:04:22 +00:00
|
|
|
private:
|
|
|
|
simdjson::dom::element element;
|
|
|
|
};
|
|
|
|
|
2020-07-20 17:01:58 +00:00
|
|
|
/// References an array in a JSON document.
|
2020-07-11 21:04:22 +00:00
|
|
|
class Array
|
2019-05-17 14:21:37 +00:00
|
|
|
{
|
2020-07-11 21:04:22 +00:00
|
|
|
public:
|
|
|
|
class Iterator
|
2019-05-17 14:21:37 +00:00
|
|
|
{
|
2020-07-11 21:04:22 +00:00
|
|
|
public:
|
2022-03-12 18:05:50 +00:00
|
|
|
ALWAYS_INLINE Iterator(const simdjson::dom::array::iterator & it_) : it(it_) {} /// NOLINT
|
2020-07-20 17:01:58 +00:00
|
|
|
ALWAYS_INLINE Element operator*() const { return *it; }
|
2021-05-15 10:10:19 +00:00
|
|
|
ALWAYS_INLINE Iterator & operator++() { ++it; return *this; }
|
2022-03-12 18:05:50 +00:00
|
|
|
ALWAYS_INLINE Iterator operator++(int) { auto res = *this; ++it; return res; } /// NOLINT
|
2020-07-20 17:01:58 +00:00
|
|
|
ALWAYS_INLINE friend bool operator!=(const Iterator & left, const Iterator & right) { return left.it != right.it; }
|
|
|
|
ALWAYS_INLINE friend bool operator==(const Iterator & left, const Iterator & right) { return !(left != right); }
|
2020-07-11 21:04:22 +00:00
|
|
|
private:
|
|
|
|
simdjson::dom::array::iterator it;
|
|
|
|
};
|
|
|
|
|
2022-03-12 18:05:50 +00:00
|
|
|
ALWAYS_INLINE Array(const simdjson::dom::array & array_) : array(array_) {} /// NOLINT
|
2020-07-11 21:04:22 +00:00
|
|
|
ALWAYS_INLINE Iterator begin() const { return array.begin(); }
|
|
|
|
ALWAYS_INLINE Iterator end() const { return array.end(); }
|
|
|
|
ALWAYS_INLINE size_t size() const { return array.size(); }
|
2021-03-23 17:58:30 +00:00
|
|
|
ALWAYS_INLINE Element operator[](size_t index) const { assert(index < size()); return array.at(index).value_unsafe(); }
|
2020-07-11 21:04:22 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
simdjson::dom::array array;
|
|
|
|
};
|
|
|
|
|
2020-07-20 17:01:58 +00:00
|
|
|
using KeyValuePair = std::pair<std::string_view, Element>;
|
|
|
|
|
|
|
|
/// References an object in a JSON document.
|
2020-07-11 21:04:22 +00:00
|
|
|
class Object
|
2019-05-17 14:21:37 +00:00
|
|
|
{
|
2020-07-11 21:04:22 +00:00
|
|
|
public:
|
|
|
|
class Iterator
|
|
|
|
{
|
|
|
|
public:
|
2022-03-12 18:05:50 +00:00
|
|
|
ALWAYS_INLINE Iterator(const simdjson::dom::object::iterator & it_) : it(it_) {} /// NOLINT
|
2021-05-15 10:10:19 +00:00
|
|
|
ALWAYS_INLINE KeyValuePair operator*() const { const auto & res = *it; return {res.key, res.value}; }
|
|
|
|
ALWAYS_INLINE Iterator & operator++() { ++it; return *this; }
|
2022-03-12 18:05:50 +00:00
|
|
|
ALWAYS_INLINE Iterator operator++(int) { auto res = *this; ++it; return res; } /// NOLINT
|
2020-07-20 17:01:58 +00:00
|
|
|
ALWAYS_INLINE friend bool operator!=(const Iterator & left, const Iterator & right) { return left.it != right.it; }
|
|
|
|
ALWAYS_INLINE friend bool operator==(const Iterator & left, const Iterator & right) { return !(left != right); }
|
2020-07-11 21:04:22 +00:00
|
|
|
private:
|
|
|
|
simdjson::dom::object::iterator it;
|
|
|
|
};
|
|
|
|
|
2022-03-12 18:05:50 +00:00
|
|
|
ALWAYS_INLINE Object(const simdjson::dom::object & object_) : object(object_) {} /// NOLINT
|
2020-07-11 21:04:22 +00:00
|
|
|
ALWAYS_INLINE Iterator begin() const { return object.begin(); }
|
|
|
|
ALWAYS_INLINE Iterator end() const { return object.end(); }
|
|
|
|
ALWAYS_INLINE size_t size() const { return object.size(); }
|
|
|
|
|
2020-07-20 17:01:58 +00:00
|
|
|
bool find(const std::string_view & key, Element & result) const
|
2020-07-11 21:04:22 +00:00
|
|
|
{
|
|
|
|
auto x = object.at_key(key);
|
|
|
|
if (x.error())
|
|
|
|
return false;
|
2019-05-17 14:21:37 +00:00
|
|
|
|
2021-03-23 17:58:30 +00:00
|
|
|
result = x.value_unsafe();
|
2020-07-11 21:04:22 +00:00
|
|
|
return true;
|
|
|
|
}
|
2019-05-13 23:44:55 +00:00
|
|
|
|
2020-07-20 17:01:58 +00:00
|
|
|
/// Optional: Provides access to an object's element by index.
|
|
|
|
KeyValuePair operator[](size_t index) const
|
|
|
|
{
|
|
|
|
assert(index < size());
|
|
|
|
auto it = object.begin();
|
|
|
|
while (index--)
|
|
|
|
++it;
|
|
|
|
const auto & res = *it;
|
|
|
|
return {res.key, res.value};
|
|
|
|
}
|
|
|
|
|
2020-07-11 21:04:22 +00:00
|
|
|
private:
|
|
|
|
simdjson::dom::object object;
|
|
|
|
};
|
|
|
|
|
2020-07-20 17:01:58 +00:00
|
|
|
/// Parses a JSON document, returns the reference to its root element if succeeded.
|
2020-07-11 21:04:22 +00:00
|
|
|
bool parse(const std::string_view & json, Element & result)
|
2019-05-13 23:44:55 +00:00
|
|
|
{
|
2020-07-11 21:04:22 +00:00
|
|
|
auto document = parser.parse(json.data(), json.size());
|
|
|
|
if (document.error())
|
2019-05-13 23:44:55 +00:00
|
|
|
return false;
|
2019-05-17 14:21:37 +00:00
|
|
|
|
2021-03-23 17:58:30 +00:00
|
|
|
result = document.value_unsafe();
|
2020-07-11 21:04:22 +00:00
|
|
|
return true;
|
2019-05-13 23:44:55 +00:00
|
|
|
}
|
|
|
|
|
2020-07-20 17:01:58 +00:00
|
|
|
/// Optional: Allocates memory to parse JSON documents faster.
|
|
|
|
void reserve(size_t max_size)
|
|
|
|
{
|
|
|
|
if (parser.allocate(max_size) != simdjson::error_code::SUCCESS)
|
2021-05-15 10:10:19 +00:00
|
|
|
throw Exception{"Couldn't allocate " + std::to_string(max_size) + " bytes when parsing JSON",
|
|
|
|
ErrorCodes::CANNOT_ALLOCATE_MEMORY};
|
2020-07-20 17:01:58 +00:00
|
|
|
}
|
|
|
|
|
2019-05-13 23:44:55 +00:00
|
|
|
private:
|
2020-07-11 21:04:22 +00:00
|
|
|
simdjson::dom::parser parser;
|
2019-05-13 23:44:55 +00:00
|
|
|
};
|
|
|
|
|
2020-07-11 21:04:22 +00:00
|
|
|
inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const
|
|
|
|
{
|
2021-03-23 17:58:30 +00:00
|
|
|
return element.get_array().value_unsafe();
|
2020-07-11 21:04:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const
|
|
|
|
{
|
2021-03-23 17:58:30 +00:00
|
|
|
return element.get_object().value_unsafe();
|
2020-07-11 21:04:22 +00:00
|
|
|
}
|
|
|
|
|
2019-05-13 23:44:55 +00:00
|
|
|
}
|
2020-04-16 12:31:57 +00:00
|
|
|
|
2019-05-13 23:44:55 +00:00
|
|
|
#endif
|