ClickHouse/src/Common/JSONParsers/SimdJSONParser.h

169 lines
6.4 KiB
C++
Raw Normal View History

2019-05-13 23:44:55 +00:00
#pragma once
2021-10-27 23:10:39 +00:00
#include "config_functions.h"
2019-05-13 23:44:55 +00:00
#if USE_SIMDJSON
2021-10-02 07:13:14 +00:00
# include <base/types.h>
# include <Common/Exception.h>
2021-10-02 07:13:14 +00:00
# include <base/defines.h>
Fix style Fifth try v2.0 Fifth try v2.1 Fifth try v2.2 Fifth try v2.3 Fifth try v2.4 Fifth try v2.5 Fifth try v2.6 Fifth try v2.7 Fifth try v2.8 Fifth try v2.9 Fifth try v2.10 Fifth try v2.11 Fifth try v2.12 Fifth try v2.13 Fifth try v2.14 Fifth try v2.15 Fifth try v2.16 Fifth try v2.17 Fifth try v2.18 Fifth try v2.19 Fifth try v2.20 Fifth try v2.21 Fifth try v2.22 Fifth try v2.23 Fifth try v2.24 Fifth try v2.25 Fifth try v2.26 Fifth try v2.27 Fifth try v2.28 Add ranges Add ranges try v1.1 Add ranges try v1.2 Add ranges try v1.3 Add ranges try v1.4 Add ranges try v1.5 Add ranges try v1.6 Add ranges try v1.7 Add ranges try v1.8 Add ranges try v1.9 Add ranges try v1.10 Add ranges try v1.11 Add ranges try v1.12 Add ranges try v1.13 Add ranges try v1.14 Add ranges try v1.15 Add ranges try v1.16 Add ranges try v1.17 Add ranges try v1.18 Add ranges try v1.19 Add ranges try v1.20 Add ranges try v1.21 Add ranges try v1.22 Add ranges try v1.23 Add ranges try v1.24 Add ranges try v1.25 Add ranges try v1.26 Add ranges try v1.27 Add ranges try v1.28 Add ranges try v1.29 Add ranges try v1.30 Add ranges try v1.31 Add ranges try v1.32 Add ranges try v1.33 Add ranges try v1.34 Add ranges try v1.35 Add ranges try v1.36 Add ranges try v1.37 Add ranges try v1.38 Add ranges try v1.39 Add ranges try v1.40 Add ranges try v1.41 Add ranges try v1.42 Add ranges try v1.43 Add ranges try v1.44 Add ranges try v1.45 Add ranges try v1.46 Add ranges try v1.47 Leftover comment Try wildcard Try wildcard v1.1 Try wildcard v1.2 Try wildcard v1.3 New functions New functions 1.1 New functions 1.2 New functions 1.3 New functions 1.4 New functions 1.5 New functions 1.6 New functions 1.7 New functions 1.8 New functions 1.9 New functions 1.10 New functions 1.11 New functions 1.12 New functions 1.13 New functions 1.14 New functions 1.15 New functions 1.16 Final steps Final steps v1.1 Final steps v1.2
2021-05-15 10:10:19 +00:00
# include <simdjson.h>
2019-05-13 23:44:55 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_ALLOCATE_MEMORY;
}
/// This class can be used as an argument for the template class FunctionJSON.
/// It provides ability to parse JSONs using simdjson library.
struct SimdJSONParser
{
class Array;
class Object;
2019-05-13 23:44:55 +00:00
2020-07-20 17:01:58 +00:00
/// References an element in a JSON document, representing a JSON null, boolean, string, number,
/// array or object.
class Element
2019-05-13 23:44:55 +00:00
{
public:
ALWAYS_INLINE Element() {} /// NOLINT
ALWAYS_INLINE Element(const simdjson::dom::element & element_) : element(element_) {} /// NOLINT
ALWAYS_INLINE bool isInt64() const { return element.type() == simdjson::dom::element_type::INT64; }
ALWAYS_INLINE bool isUInt64() const { return element.type() == simdjson::dom::element_type::UINT64; }
ALWAYS_INLINE bool isDouble() const { return element.type() == simdjson::dom::element_type::DOUBLE; }
ALWAYS_INLINE bool isString() const { return element.type() == simdjson::dom::element_type::STRING; }
ALWAYS_INLINE bool isArray() const { return element.type() == simdjson::dom::element_type::ARRAY; }
ALWAYS_INLINE bool isObject() const { return element.type() == simdjson::dom::element_type::OBJECT; }
ALWAYS_INLINE bool isBool() const { return element.type() == simdjson::dom::element_type::BOOL; }
ALWAYS_INLINE bool isNull() const { return element.type() == simdjson::dom::element_type::NULL_VALUE; }
ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().value_unsafe(); }
ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().value_unsafe(); }
ALWAYS_INLINE double getDouble() const { return element.get_double().value_unsafe(); }
ALWAYS_INLINE bool getBool() const { return element.get_bool().value_unsafe(); }
ALWAYS_INLINE std::string_view getString() const { return element.get_string().value_unsafe(); }
ALWAYS_INLINE Array getArray() const;
ALWAYS_INLINE Object getObject() const;
2021-06-25 15:33:31 +00:00
ALWAYS_INLINE simdjson::dom::element getElement() const { return element; }
Just Works Just works (remastered) First steps First steps fixed First steps first fails Research first steps Tokenizer created Sprint to the moon Rename Rename 2.0 Rename 3.0 Work in progress Update Oops Oops x2 Try this Now surely works Maybe now? Now? Cmake first try Restore to previous Cmake second try Make this work Correct mistakes Third try cmake Exclude simd Better Try Add std::cerr More std::cerr More and more std::cerr Maybe fix? A B C D E F G H I J K L M N O P AA AB AC AD AE AF AAA AAB AAC AAD AAF AAE AAF AAG AAH AAI AAJ AAK AAAA AAAB AAAC AAAD AAAE AAAF AAAG AAAH AAAAA AAAAB First try v2 First try v2.1 First try v2.2 First try v2.3 First try v2.4 First try v2.5 First try v2.6 First try v2.7 First try v2.8 First try v2.9 First try v2.10 First try v2.11 First try v2.12 First try v2.13 First try v2.14 First try v2.15 First try v2.16 First try v2.16 First try v2.17 First try v2.18 First try v2.19 First try v2.20 First try v2.21 First try v2.22 First try v2.23 First try v2.24 First try v2.25 First try v2.26 First try v2.27 First try v2.28 First try v2.29 First try v2.30 First try v2.31 First try v2.32 First try v2.33 First try v2.34 First try v2.35 First try v2.36 First try v2.37 Second try v2.00 Second try v2.01 Second try v2.02 Second try v2.03 Second try v2.04 Second try v2.05 Second try v2.06 Second try v2.07 Second try v2.08 Second try v2.09 Second try v2.10 Second try v2.11 Second try v2.12 Second try v2.13 Second try v2.14 Second try v2.15 Second try v2.16 Second try v2.17 Cleanup Link SQLJSON only in simdjson build Fix? Fix?1.1 Fix Revert "Fix" This reverts commit 9df7aa977c880ec130062bceece7e215190b4837. Revert "Fix?1.1" This reverts commit 37429ecc9003fd73c106344186e39ff6603dde6c. Revert "Fix?" This reverts commit c1236fb8f4b5a799a5564aecf81136301f226e33. Revert "Link SQLJSON only in simdjson build" This reverts commit 8795cd8b143f3cfd312ddbf1b98e10d0d6fcaf51. Revert "Cleanup" This reverts commit e100dbc545f54421276be2e5d44f99f52fe1d87c. Third try v2.0 Third try v2.1 Third try v2.2 Third try v2.3 Third try v2.4 Third try v2.5 Third try v2.6 Third try v2.7 Third try v2.8 Third try v2.9 Third try v2.10 Third try v2.11 Third try v2.12 Third try v2.13 Third try v2.14 Third try v2.15 Pre-intermediate touches v1.0 Pre-intermediate touches v1.1 Pre-intermediate touches v1.2 Pre-intermediate touches v1.3 Last changes
2021-03-24 19:47:28 +00:00
private:
simdjson::dom::element element;
};
2020-07-20 17:01:58 +00:00
/// References an array in a JSON document.
class Array
{
public:
class Iterator
{
public:
ALWAYS_INLINE Iterator(const simdjson::dom::array::iterator & it_) : it(it_) {} /// NOLINT
2020-07-20 17:01:58 +00:00
ALWAYS_INLINE Element operator*() const { return *it; }
Fix style Fifth try v2.0 Fifth try v2.1 Fifth try v2.2 Fifth try v2.3 Fifth try v2.4 Fifth try v2.5 Fifth try v2.6 Fifth try v2.7 Fifth try v2.8 Fifth try v2.9 Fifth try v2.10 Fifth try v2.11 Fifth try v2.12 Fifth try v2.13 Fifth try v2.14 Fifth try v2.15 Fifth try v2.16 Fifth try v2.17 Fifth try v2.18 Fifth try v2.19 Fifth try v2.20 Fifth try v2.21 Fifth try v2.22 Fifth try v2.23 Fifth try v2.24 Fifth try v2.25 Fifth try v2.26 Fifth try v2.27 Fifth try v2.28 Add ranges Add ranges try v1.1 Add ranges try v1.2 Add ranges try v1.3 Add ranges try v1.4 Add ranges try v1.5 Add ranges try v1.6 Add ranges try v1.7 Add ranges try v1.8 Add ranges try v1.9 Add ranges try v1.10 Add ranges try v1.11 Add ranges try v1.12 Add ranges try v1.13 Add ranges try v1.14 Add ranges try v1.15 Add ranges try v1.16 Add ranges try v1.17 Add ranges try v1.18 Add ranges try v1.19 Add ranges try v1.20 Add ranges try v1.21 Add ranges try v1.22 Add ranges try v1.23 Add ranges try v1.24 Add ranges try v1.25 Add ranges try v1.26 Add ranges try v1.27 Add ranges try v1.28 Add ranges try v1.29 Add ranges try v1.30 Add ranges try v1.31 Add ranges try v1.32 Add ranges try v1.33 Add ranges try v1.34 Add ranges try v1.35 Add ranges try v1.36 Add ranges try v1.37 Add ranges try v1.38 Add ranges try v1.39 Add ranges try v1.40 Add ranges try v1.41 Add ranges try v1.42 Add ranges try v1.43 Add ranges try v1.44 Add ranges try v1.45 Add ranges try v1.46 Add ranges try v1.47 Leftover comment Try wildcard Try wildcard v1.1 Try wildcard v1.2 Try wildcard v1.3 New functions New functions 1.1 New functions 1.2 New functions 1.3 New functions 1.4 New functions 1.5 New functions 1.6 New functions 1.7 New functions 1.8 New functions 1.9 New functions 1.10 New functions 1.11 New functions 1.12 New functions 1.13 New functions 1.14 New functions 1.15 New functions 1.16 Final steps Final steps v1.1 Final steps v1.2
2021-05-15 10:10:19 +00:00
ALWAYS_INLINE Iterator & operator++() { ++it; return *this; }
ALWAYS_INLINE Iterator operator++(int) { auto res = *this; ++it; return res; } /// NOLINT
2020-07-20 17:01:58 +00:00
ALWAYS_INLINE friend bool operator!=(const Iterator & left, const Iterator & right) { return left.it != right.it; }
ALWAYS_INLINE friend bool operator==(const Iterator & left, const Iterator & right) { return !(left != right); }
private:
simdjson::dom::array::iterator it;
};
ALWAYS_INLINE Array(const simdjson::dom::array & array_) : array(array_) {} /// NOLINT
ALWAYS_INLINE Iterator begin() const { return array.begin(); }
ALWAYS_INLINE Iterator end() const { return array.end(); }
ALWAYS_INLINE size_t size() const { return array.size(); }
ALWAYS_INLINE Element operator[](size_t index) const { assert(index < size()); return array.at(index).value_unsafe(); }
private:
simdjson::dom::array array;
};
2020-07-20 17:01:58 +00:00
using KeyValuePair = std::pair<std::string_view, Element>;
/// References an object in a JSON document.
class Object
{
public:
class Iterator
{
public:
ALWAYS_INLINE Iterator(const simdjson::dom::object::iterator & it_) : it(it_) {} /// NOLINT
Fix style Fifth try v2.0 Fifth try v2.1 Fifth try v2.2 Fifth try v2.3 Fifth try v2.4 Fifth try v2.5 Fifth try v2.6 Fifth try v2.7 Fifth try v2.8 Fifth try v2.9 Fifth try v2.10 Fifth try v2.11 Fifth try v2.12 Fifth try v2.13 Fifth try v2.14 Fifth try v2.15 Fifth try v2.16 Fifth try v2.17 Fifth try v2.18 Fifth try v2.19 Fifth try v2.20 Fifth try v2.21 Fifth try v2.22 Fifth try v2.23 Fifth try v2.24 Fifth try v2.25 Fifth try v2.26 Fifth try v2.27 Fifth try v2.28 Add ranges Add ranges try v1.1 Add ranges try v1.2 Add ranges try v1.3 Add ranges try v1.4 Add ranges try v1.5 Add ranges try v1.6 Add ranges try v1.7 Add ranges try v1.8 Add ranges try v1.9 Add ranges try v1.10 Add ranges try v1.11 Add ranges try v1.12 Add ranges try v1.13 Add ranges try v1.14 Add ranges try v1.15 Add ranges try v1.16 Add ranges try v1.17 Add ranges try v1.18 Add ranges try v1.19 Add ranges try v1.20 Add ranges try v1.21 Add ranges try v1.22 Add ranges try v1.23 Add ranges try v1.24 Add ranges try v1.25 Add ranges try v1.26 Add ranges try v1.27 Add ranges try v1.28 Add ranges try v1.29 Add ranges try v1.30 Add ranges try v1.31 Add ranges try v1.32 Add ranges try v1.33 Add ranges try v1.34 Add ranges try v1.35 Add ranges try v1.36 Add ranges try v1.37 Add ranges try v1.38 Add ranges try v1.39 Add ranges try v1.40 Add ranges try v1.41 Add ranges try v1.42 Add ranges try v1.43 Add ranges try v1.44 Add ranges try v1.45 Add ranges try v1.46 Add ranges try v1.47 Leftover comment Try wildcard Try wildcard v1.1 Try wildcard v1.2 Try wildcard v1.3 New functions New functions 1.1 New functions 1.2 New functions 1.3 New functions 1.4 New functions 1.5 New functions 1.6 New functions 1.7 New functions 1.8 New functions 1.9 New functions 1.10 New functions 1.11 New functions 1.12 New functions 1.13 New functions 1.14 New functions 1.15 New functions 1.16 Final steps Final steps v1.1 Final steps v1.2
2021-05-15 10:10:19 +00:00
ALWAYS_INLINE KeyValuePair operator*() const { const auto & res = *it; return {res.key, res.value}; }
ALWAYS_INLINE Iterator & operator++() { ++it; return *this; }
ALWAYS_INLINE Iterator operator++(int) { auto res = *this; ++it; return res; } /// NOLINT
2020-07-20 17:01:58 +00:00
ALWAYS_INLINE friend bool operator!=(const Iterator & left, const Iterator & right) { return left.it != right.it; }
ALWAYS_INLINE friend bool operator==(const Iterator & left, const Iterator & right) { return !(left != right); }
private:
simdjson::dom::object::iterator it;
};
ALWAYS_INLINE Object(const simdjson::dom::object & object_) : object(object_) {} /// NOLINT
ALWAYS_INLINE Iterator begin() const { return object.begin(); }
ALWAYS_INLINE Iterator end() const { return object.end(); }
ALWAYS_INLINE size_t size() const { return object.size(); }
2020-07-20 17:01:58 +00:00
bool find(const std::string_view & key, Element & result) const
{
auto x = object.at_key(key);
if (x.error())
return false;
result = x.value_unsafe();
return true;
}
2019-05-13 23:44:55 +00:00
2020-07-20 17:01:58 +00:00
/// Optional: Provides access to an object's element by index.
KeyValuePair operator[](size_t index) const
{
assert(index < size());
auto it = object.begin();
while (index--)
++it;
const auto & res = *it;
return {res.key, res.value};
}
private:
simdjson::dom::object object;
};
2020-07-20 17:01:58 +00:00
/// Parses a JSON document, returns the reference to its root element if succeeded.
bool parse(const std::string_view & json, Element & result)
2019-05-13 23:44:55 +00:00
{
auto document = parser.parse(json.data(), json.size());
if (document.error())
2019-05-13 23:44:55 +00:00
return false;
result = document.value_unsafe();
return true;
2019-05-13 23:44:55 +00:00
}
2020-07-20 17:01:58 +00:00
/// Optional: Allocates memory to parse JSON documents faster.
void reserve(size_t max_size)
{
if (parser.allocate(max_size) != simdjson::error_code::SUCCESS)
Fix style Fifth try v2.0 Fifth try v2.1 Fifth try v2.2 Fifth try v2.3 Fifth try v2.4 Fifth try v2.5 Fifth try v2.6 Fifth try v2.7 Fifth try v2.8 Fifth try v2.9 Fifth try v2.10 Fifth try v2.11 Fifth try v2.12 Fifth try v2.13 Fifth try v2.14 Fifth try v2.15 Fifth try v2.16 Fifth try v2.17 Fifth try v2.18 Fifth try v2.19 Fifth try v2.20 Fifth try v2.21 Fifth try v2.22 Fifth try v2.23 Fifth try v2.24 Fifth try v2.25 Fifth try v2.26 Fifth try v2.27 Fifth try v2.28 Add ranges Add ranges try v1.1 Add ranges try v1.2 Add ranges try v1.3 Add ranges try v1.4 Add ranges try v1.5 Add ranges try v1.6 Add ranges try v1.7 Add ranges try v1.8 Add ranges try v1.9 Add ranges try v1.10 Add ranges try v1.11 Add ranges try v1.12 Add ranges try v1.13 Add ranges try v1.14 Add ranges try v1.15 Add ranges try v1.16 Add ranges try v1.17 Add ranges try v1.18 Add ranges try v1.19 Add ranges try v1.20 Add ranges try v1.21 Add ranges try v1.22 Add ranges try v1.23 Add ranges try v1.24 Add ranges try v1.25 Add ranges try v1.26 Add ranges try v1.27 Add ranges try v1.28 Add ranges try v1.29 Add ranges try v1.30 Add ranges try v1.31 Add ranges try v1.32 Add ranges try v1.33 Add ranges try v1.34 Add ranges try v1.35 Add ranges try v1.36 Add ranges try v1.37 Add ranges try v1.38 Add ranges try v1.39 Add ranges try v1.40 Add ranges try v1.41 Add ranges try v1.42 Add ranges try v1.43 Add ranges try v1.44 Add ranges try v1.45 Add ranges try v1.46 Add ranges try v1.47 Leftover comment Try wildcard Try wildcard v1.1 Try wildcard v1.2 Try wildcard v1.3 New functions New functions 1.1 New functions 1.2 New functions 1.3 New functions 1.4 New functions 1.5 New functions 1.6 New functions 1.7 New functions 1.8 New functions 1.9 New functions 1.10 New functions 1.11 New functions 1.12 New functions 1.13 New functions 1.14 New functions 1.15 New functions 1.16 Final steps Final steps v1.1 Final steps v1.2
2021-05-15 10:10:19 +00:00
throw Exception{"Couldn't allocate " + std::to_string(max_size) + " bytes when parsing JSON",
ErrorCodes::CANNOT_ALLOCATE_MEMORY};
2020-07-20 17:01:58 +00:00
}
2019-05-13 23:44:55 +00:00
private:
simdjson::dom::parser parser;
2019-05-13 23:44:55 +00:00
};
inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const
{
return element.get_array().value_unsafe();
}
inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const
{
return element.get_object().value_unsafe();
}
2019-05-13 23:44:55 +00:00
}
2019-05-13 23:44:55 +00:00
#endif