diff --git a/.gitmodules b/.gitmodules index e19f56837da..0fda654f07c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -82,3 +82,6 @@ [submodule "contrib/simdjson"] path = contrib/simdjson url = https://github.com/lemire/simdjson.git +[submodule "contrib/rapidjson"] + path = contrib/rapidjson + url = https://github.com/Tencent/rapidjson diff --git a/CMakeLists.txt b/CMakeLists.txt index 168fdf7e28d..79b3b1ddba3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -328,6 +328,7 @@ include (cmake/find_base64.cmake) include (cmake/find_hyperscan.cmake) include (cmake/find_lfalloc.cmake) include (cmake/find_simdjson.cmake) +include (cmake/find_rapidjson.cmake) find_contrib_lib(cityhash) find_contrib_lib(farmhash) find_contrib_lib(metrohash) diff --git a/cmake/find_rapidjson.cmake b/cmake/find_rapidjson.cmake new file mode 100644 index 00000000000..bd8f0fbb449 --- /dev/null +++ b/cmake/find_rapidjson.cmake @@ -0,0 +1,9 @@ +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/rapidjson/include/rapidjson/rapidjson.h") + message (WARNING "submodule contrib/rapidjson is missing. to fix try run: \n git submodule update --init --recursive") + return() +endif () + +option (USE_RAPIDJSON "Use rapidjson" ON) +set (RAPIDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rapidjson/include") + +message(STATUS "Using rapidjson=${USE_RAPIDJSON}: ${RAPIDJSON_INCLUDE_DIR}") diff --git a/contrib/rapidjson b/contrib/rapidjson new file mode 160000 index 00000000000..01950eb7ace --- /dev/null +++ b/contrib/rapidjson @@ -0,0 +1 @@ +Subproject commit 01950eb7acec78818d68b762efc869bba2420d82 diff --git a/dbms/src/Common/config.h.in b/dbms/src/Common/config.h.in index a1d2074686c..fb45c88a94d 100644 --- a/dbms/src/Common/config.h.in +++ b/dbms/src/Common/config.h.in @@ -26,6 +26,7 @@ #cmakedefine01 USE_SSL #cmakedefine01 USE_HYPERSCAN #cmakedefine01 USE_SIMDJSON +#cmakedefine01 USE_RAPIDJSON #cmakedefine01 USE_LFALLOC #cmakedefine01 USE_LFALLOC_RANDOM_HINT diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index 00959a755dd..f83c751b55b 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -73,3 +73,7 @@ endif() if(USE_SIMDJSON) target_link_libraries(clickhouse_functions PRIVATE ${SIMDJSON_LIBRARY}) endif() + +if(USE_RAPIDJSON) + target_include_directories(clickhouse_functions SYSTEM PRIVATE ${RAPIDJSON_INCLUDE_DIR}) +endif() diff --git a/dbms/src/Functions/FunctionsJSON.cpp b/dbms/src/Functions/FunctionsJSON.cpp index 819f97f07c7..97ca5efe4fc 100644 --- a/dbms/src/Functions/FunctionsJSON.cpp +++ b/dbms/src/Functions/FunctionsJSON.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include @@ -16,7 +17,12 @@ void registerFunctionsJSON(FunctionFactory & factory) return; } #endif + +#if USE_RAPIDJSON + registerFunctionsJSONTemplate(factory); +#else registerFunctionsJSONTemplate(factory); +#endif } } diff --git a/dbms/src/Functions/RapidJSONParser.h b/dbms/src/Functions/RapidJSONParser.h new file mode 100644 index 00000000000..7f5ac4ddf4a --- /dev/null +++ b/dbms/src/Functions/RapidJSONParser.h @@ -0,0 +1,206 @@ +#pragma once + +#include +#if USE_RAPIDJSON + +#include +#include +#include + +#include + + +namespace DB +{ + +/// This class can be used as an argument for the template class FunctionJSON. +/// It provides ability to parse JSONs using rapidjson library. +struct RapidJSONParser +{ + static constexpr bool need_preallocate = false; + void preallocate(size_t) {} + + bool parse(const char * data, size_t size) + { + InputStream in(data, size); + document.ParseStream(in); + return !document.HasParseError(); + } + + struct Iterator + { + public: + Iterator() {} + Iterator(const rapidjson::Document & document) : value(&document) {} + Iterator(const Iterator & src) + : value(src.value) + , parent_scope_is_object(src.parent_scope_is_object) + , current_in_array(src.current_in_array) + , end_of_array(src.end_of_array) {} + + Iterator & operator =(const Iterator & src) + { + value = src.value; + parent_scope_is_object = src.parent_scope_is_object; + current_in_array = src.current_in_array; + end_of_array = src.end_of_array; + return *this; + } + + const rapidjson::Value & getValue() const { return *value; } + + bool downToArray() + { + if (value->Empty()) + return false; + current_in_array = &*value->Begin(); + end_of_array = &*value->End(); + value = current_in_array; + ++current_in_array; + parent_scope_is_object = false; + return true; + } + + bool next() + { + if (current_in_array == end_of_array) + return false; + value = current_in_array; + ++current_in_array; + return true; + } + + bool downToObject() + { + if (value->ObjectEmpty()) + return false; + current_in_object = &*value->MemberBegin(); + end_of_object = &*value->MemberEnd(); + value = ¤t_in_object->value; + ++current_in_object; + parent_scope_is_object = true; + return true; + } + + bool downToObject(StringRef & first_key) + { + if (value->ObjectEmpty()) + return false; + current_in_object = &*value->MemberBegin(); + end_of_object = &*value->MemberEnd(); + const auto & name = current_in_object->name; + first_key.data = name.GetString(); + first_key.size = name.GetStringLength(); + value = ¤t_in_object->value; + ++current_in_object; + parent_scope_is_object = true; + return true; + } + + bool nextKeyValue() + { + if (current_in_object == end_of_object) + return false; + value = ¤t_in_object->value; + ++current_in_object; + return true; + } + + bool nextKeyValue(StringRef & key) + { + if (current_in_object == end_of_object) + return false; + const auto & name = current_in_object->name; + key.data = name.GetString(); + key.size = name.GetStringLength(); + value = ¤t_in_object->value; + ++current_in_object; + return true; + } + + StringRef getKey() const + { + const auto & name = (current_in_object - 1)->name; + return {name.GetString(), name.GetStringLength()}; + } + + bool parentScopeIsObject() const { return parent_scope_is_object; } + + bool isInteger() const { return value->IsInt64(); } + bool isFloat() const { return value->IsDouble(); } + bool isBool() const { return value->IsBool(); } + bool isString() const { return value->IsString(); } + bool isArray() const { return value->IsArray(); } + bool isObject() const { return value->IsObject(); } + bool isNull() const { return value->IsNull(); } + + Int64 getInteger() const { return value->GetInt64(); } + double getFloat() const { return value->GetDouble(); } + bool getBool() const { return value->GetBool(); } + StringRef getString() const { return {value->GetString(), value->GetStringLength()}; } + + private: + const rapidjson::Value * value = nullptr; + bool parent_scope_is_object = false; + + union + { + const rapidjson::GenericMember, rapidjson::MemoryPoolAllocator<>> * current_in_object; + const rapidjson::Value * current_in_array; + }; + union + { + const rapidjson::GenericMember, rapidjson::MemoryPoolAllocator<>> * end_of_object; + const rapidjson::Value * end_of_array; + }; + }; + + Iterator getRoot() { return Iterator{document}; } + + static bool downToArray(Iterator & it) { return it.downToArray(); } + static bool downToObject(Iterator & it) { return it.downToObject(); } + static bool downToObject(Iterator & it, StringRef & first_key) { return it.downToObject(first_key); } + static bool parentScopeIsObject(const Iterator & it) { return it.parentScopeIsObject(); } + static bool next(Iterator & it) { return it.next(); } + static bool nextKeyValue(Iterator & it) { return it.nextKeyValue(); } + static bool nextKeyValue(Iterator & it, StringRef & key) { return it.nextKeyValue(key); } + static bool isInteger(const Iterator & it) { return it.isInteger(); } + static bool isFloat(const Iterator & it) { return it.isFloat(); } + static bool isString(const Iterator & it) { return it.isString(); } + static bool isArray(const Iterator & it) { return it.isArray(); } + static bool isObject(const Iterator & it) { return it.isObject(); } + static bool isBool(const Iterator & it) { return it.isBool(); } + static bool isNull(const Iterator & it) { return it.isNull(); } + static StringRef getKey(const Iterator & it) { return it.getKey(); } + static StringRef getString(const Iterator & it) { return it.getString(); } + static Int64 getInteger(const Iterator & it) { return it.getInteger(); } + static double getFloat(const Iterator & it) { return it.getFloat(); } + static bool getBool(const Iterator & it) { return it.getBool(); } + +private: + class InputStream + { + public: + InputStream(const char * data, size_t size) : begin(data), end(data + size), current(data) {} + + using Ch = char; + Ch Peek() { if (current == end) return 0; return *current; } + Ch Take() { if (current == end) return 0; return *current++; } + size_t Tell() const { return current - begin; } + + Ch* PutBegin() { return nullptr; } + void Put(Ch) {} + void Flush() {} + size_t PutEnd(Ch*) { return 0; } + + private: + const char * begin; + const char * end; + const char * current; + }; + + rapidjson::Document document; +}; + +} +#endif diff --git a/dbms/tests/queries/0_stateless/00918_json_functions_avx2.reference b/dbms/tests/queries/0_stateless/00918_json_functions.reference similarity index 100% rename from dbms/tests/queries/0_stateless/00918_json_functions_avx2.reference rename to dbms/tests/queries/0_stateless/00918_json_functions.reference diff --git a/dbms/tests/queries/0_stateless/00918_json_functions_avx2.sql b/dbms/tests/queries/0_stateless/00918_json_functions.sql similarity index 100% rename from dbms/tests/queries/0_stateless/00918_json_functions_avx2.sql rename to dbms/tests/queries/0_stateless/00918_json_functions.sql