From 1fb4d9002d54cf098bc37d7b5c48760e58b77bda Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 20 Jul 2020 20:01:58 +0300 Subject: [PATCH] Improve JSON internal interface. --- src/Functions/DummyJSONParser.h | 38 +++++++++++++------ src/Functions/FunctionsJSON.h | 36 +++++++++--------- src/Functions/RapidJSONParser.h | 34 +++++++++++------ src/Functions/SimdJSONParser.h | 67 +++++++++++++++++++-------------- 4 files changed, 105 insertions(+), 70 deletions(-) diff --git a/src/Functions/DummyJSONParser.h b/src/Functions/DummyJSONParser.h index 9fe285731a4..7bdcf205926 100644 --- a/src/Functions/DummyJSONParser.h +++ b/src/Functions/DummyJSONParser.h @@ -17,6 +17,8 @@ struct DummyJSONParser class Array; class Object; + /// References an element in a JSON document, representing a JSON null, boolean, string, number, + /// array or object. class Element { public: @@ -39,6 +41,7 @@ struct DummyJSONParser Object getObject() const; }; + /// References an array in a JSON document. class Array { public: @@ -46,10 +49,10 @@ struct DummyJSONParser { public: Element operator*() const { return {}; } - Iterator & operator ++() { return *this; } - Iterator operator ++(int) { return *this; } - friend bool operator ==(const Iterator &, const Iterator &) { return true; } - friend bool operator !=(const Iterator &, const Iterator &) { return false; } + Iterator & operator++() { return *this; } + Iterator operator++(int) { return *this; } + friend bool operator==(const Iterator &, const Iterator &) { return true; } + friend bool operator!=(const Iterator &, const Iterator &) { return false; } }; Iterator begin() const { return {}; } @@ -58,29 +61,40 @@ struct DummyJSONParser Element operator[](size_t) const { return {}; } }; + using KeyValuePair = std::pair; + + /// References an object in a JSON document. class Object { public: - using KeyValuePair = std::pair; - class Iterator { public: - KeyValuePair operator *() const { return {}; } - Iterator & operator ++() { return *this; } - Iterator operator ++(int) { return *this; } - friend bool operator ==(const Iterator &, const Iterator &) { return true; } - friend bool operator !=(const Iterator &, const Iterator &) { return false; } + KeyValuePair operator*() const { return {}; } + Iterator & operator++() { return *this; } + Iterator operator++(int) { return *this; } + friend bool operator==(const Iterator &, const Iterator &) { return true; } + friend bool operator!=(const Iterator &, const Iterator &) { return false; } }; Iterator begin() const { return {}; } Iterator end() const { return {}; } size_t size() const { return 0; } - KeyValuePair operator[](size_t) const { return {}; } bool find(const std::string_view &, Element &) const { return false; } + +#if 0 + /// Optional: Provides access to an object's element by index. + KeyValuePair operator[](size_t) const { return {}; } +#endif }; + /// Parses a JSON document, returns the reference to its root element if succeeded. bool parse(const std::string_view &, Element &) { throw Exception{"Functions JSON* are not supported", ErrorCodes::NOT_IMPLEMENTED}; } + +#if 0 + /// Optional: Allocates memory to parse JSON documents faster. + void reserve(size_t max_size); +#endif }; } diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index 15e64826d85..5d0a2924f05 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -139,6 +139,12 @@ private: BOOST_TTI_HAS_MEMBER_FUNCTION(reserve) BOOST_TTI_HAS_MEMBER_FUNCTION(prepare) + template + struct has_index_operator : std::false_type {}; + + template + struct has_index_operator()[0])>> : std::true_type {}; + /// Represents a move of a JSON iterator described by a single argument passed to a JSON function. /// For example, the call JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) /// contains two moves: {MoveType::ConstKey, "b"} and {MoveType::ConstIndex, 1}. @@ -217,38 +223,32 @@ private: { auto array = element.getArray(); if (index >= 0) - { --index; - if (static_cast(index) >= array.size()) - return false; - element = array[index]; - out_key = {}; - return true; - } - index += array.size(); - if (index < 0) + else + index += array.size(); + + if (static_cast(index) >= array.size()) return false; element = array[index]; out_key = {}; return true; } - if (element.isObject()) + if constexpr (has_index_operator::value) { - auto object = element.getObject(); - if (index >= 0) + if (element.isObject()) { - --index; + auto object = element.getObject(); + if (index >= 0) + --index; + else + index += object.size(); + if (static_cast(index) >= object.size()) return false; std::tie(out_key, element) = object[index]; return true; } - index += object.size(); - if (index < 0) - return false; - std::tie(out_key, element) = object[index]; - return true; } return {}; diff --git a/src/Functions/RapidJSONParser.h b/src/Functions/RapidJSONParser.h index 0e64929ac5f..e4d4718abc5 100644 --- a/src/Functions/RapidJSONParser.h +++ b/src/Functions/RapidJSONParser.h @@ -20,6 +20,8 @@ struct RapidJSONParser class Array; class Object; + /// References an element in a JSON document, representing a JSON null, boolean, string, number, + /// array or object. class Element { public: @@ -47,6 +49,7 @@ struct RapidJSONParser const rapidjson::Value * ptr = nullptr; }; + /// References an array in a JSON document. class Array { public: @@ -67,17 +70,18 @@ struct RapidJSONParser ALWAYS_INLINE Iterator begin() const { return ptr->Begin(); } ALWAYS_INLINE Iterator end() const { return ptr->End(); } ALWAYS_INLINE size_t size() const { return ptr->Size(); } - ALWAYS_INLINE Element operator[](size_t index) const { return *(ptr->Begin() + index); } + ALWAYS_INLINE Element operator[](size_t index) const { assert(index < size()); return *(ptr->Begin() + index); } private: const rapidjson::Value * ptr = nullptr; }; + using KeyValuePair = std::pair; + + /// References an object in a JSON document. class Object { public: - using KeyValuePair = std::pair; - class Iterator { public: @@ -96,14 +100,7 @@ struct RapidJSONParser ALWAYS_INLINE Iterator end() const { return ptr->MemberEnd(); } ALWAYS_INLINE size_t size() const { return ptr->MemberCount(); } - ALWAYS_INLINE KeyValuePair operator[](size_t index) const - { - auto it = ptr->MemberBegin() + index; - std::string_view key{it->name.GetString(), it->name.GetStringLength()}; - return KeyValuePair{key, it->value}; - } - - ALWAYS_INLINE bool find(const std::string_view & key, Element & result) const + bool find(const std::string_view & key, Element & result) const { auto it = ptr->FindMember(rapidjson::StringRef(key.data(), key.length())); if (it == ptr->MemberEnd()) @@ -113,10 +110,20 @@ struct RapidJSONParser return true; } + /// Optional: Provides access to an object's element by index. + ALWAYS_INLINE KeyValuePair operator[](size_t index) const + { + assert (index < size()); + auto it = ptr->MemberBegin() + index; + std::string_view key{it->name.GetString(), it->name.GetStringLength()}; + return {key, it->value}; + } + private: const rapidjson::Value * ptr = nullptr; }; + /// Parses a JSON document, returns the reference to its root element if succeeded. bool parse(const std::string_view & json, Element & result) { rapidjson::MemoryStream ms(json.data(), json.size()); @@ -128,6 +135,11 @@ struct RapidJSONParser return true; } +#if 0 + /// Optional: Allocates memory to parse JSON documents faster. + void reserve(size_t max_size); +#endif + private: rapidjson::Document document; }; diff --git a/src/Functions/SimdJSONParser.h b/src/Functions/SimdJSONParser.h index 33911ad9939..30ecbce1ac5 100644 --- a/src/Functions/SimdJSONParser.h +++ b/src/Functions/SimdJSONParser.h @@ -25,6 +25,8 @@ struct SimdJSONParser class Array; class Object; + /// References an element in a JSON document, representing a JSON null, boolean, string, number, + /// array or object. class Element { public: @@ -52,6 +54,7 @@ struct SimdJSONParser simdjson::dom::element element; }; + /// References an array in a JSON document. class Array { public: @@ -59,11 +62,11 @@ struct SimdJSONParser { public: ALWAYS_INLINE Iterator(const simdjson::dom::array::iterator & it_) : it(it_) {} - ALWAYS_INLINE Element operator *() const { return *it; } - ALWAYS_INLINE Iterator & operator ++() { ++it; return *this; } - ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; } - ALWAYS_INLINE friend bool operator !=(const Iterator & left, const Iterator & right) { return left.it != right.it; } - ALWAYS_INLINE friend bool operator ==(const Iterator & left, const Iterator & right) { return !(left != right); } + ALWAYS_INLINE Element operator*() const { return *it; } + ALWAYS_INLINE Iterator & operator++() { ++it; return *this; } + ALWAYS_INLINE Iterator operator++(int) { auto res = *this; ++it; return res; } + ALWAYS_INLINE friend bool operator!=(const Iterator & left, const Iterator & right) { return left.it != right.it; } + ALWAYS_INLINE friend bool operator==(const Iterator & left, const Iterator & right) { return !(left != right); } private: simdjson::dom::array::iterator it; }; @@ -72,26 +75,27 @@ struct SimdJSONParser ALWAYS_INLINE Iterator begin() const { return array.begin(); } ALWAYS_INLINE Iterator end() const { return array.end(); } ALWAYS_INLINE size_t size() const { return array.size(); } - ALWAYS_INLINE Element operator[](size_t index) const { return array.at(index).first; } + ALWAYS_INLINE Element operator[](size_t index) const { assert(index < size()); return array.at(index).first; } private: simdjson::dom::array array; }; + using KeyValuePair = std::pair; + + /// References an object in a JSON document. class Object { public: - using KeyValuePair = std::pair; - class Iterator { public: ALWAYS_INLINE Iterator(const simdjson::dom::object::iterator & it_) : it(it_) {} - ALWAYS_INLINE KeyValuePair operator *() const { const auto & res = *it; return {res.key, res.value}; } - ALWAYS_INLINE Iterator & operator ++() { ++it; return *this; } - ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; } - ALWAYS_INLINE friend bool operator !=(const Iterator & left, const Iterator & right) { return left.it != right.it; } - ALWAYS_INLINE friend bool operator ==(const Iterator & left, const Iterator & right) { return !(left != right); } + ALWAYS_INLINE KeyValuePair operator*() const { const auto & res = *it; return {res.key, res.value}; } + ALWAYS_INLINE Iterator & operator++() { ++it; return *this; } + ALWAYS_INLINE Iterator operator++(int) { auto res = *this; ++it; return res; } + ALWAYS_INLINE friend bool operator!=(const Iterator & left, const Iterator & right) { return left.it != right.it; } + ALWAYS_INLINE friend bool operator==(const Iterator & left, const Iterator & right) { return !(left != right); } private: simdjson::dom::object::iterator it; }; @@ -101,15 +105,7 @@ struct SimdJSONParser ALWAYS_INLINE Iterator end() const { return object.end(); } ALWAYS_INLINE size_t size() const { return object.size(); } - KeyValuePair operator [](size_t index) const - { - Iterator it = begin(); - while (index--) - ++it; - return *it; - } - - ALWAYS_INLINE bool find(const std::string_view & key, Element & result) const + bool find(const std::string_view & key, Element & result) const { auto x = object.at_key(key); if (x.error()) @@ -119,17 +115,22 @@ struct SimdJSONParser return true; } + /// Optional: Provides access to an object's element by index. + KeyValuePair operator[](size_t index) const + { + assert(index < size()); + auto it = object.begin(); + while (index--) + ++it; + const auto & res = *it; + return {res.key, res.value}; + } + private: simdjson::dom::object object; }; - void reserve(size_t max_size) - { - if (parser.allocate(max_size) != simdjson::error_code::SUCCESS) - throw Exception{"Couldn't allocate " + std::to_string(max_size) + " bytes when parsing JSON", - ErrorCodes::CANNOT_ALLOCATE_MEMORY}; - } - + /// Parses a JSON document, returns the reference to its root element if succeeded. bool parse(const std::string_view & json, Element & result) { auto document = parser.parse(json.data(), json.size()); @@ -140,6 +141,14 @@ struct SimdJSONParser return true; } + /// Optional: Allocates memory to parse JSON documents faster. + void reserve(size_t max_size) + { + if (parser.allocate(max_size) != simdjson::error_code::SUCCESS) + throw Exception{"Couldn't allocate " + std::to_string(max_size) + " bytes when parsing JSON", + ErrorCodes::CANNOT_ALLOCATE_MEMORY}; + } + private: simdjson::dom::parser parser; };