Use rapidjson when AVX2 instructions are not available.

This commit is contained in:
Vitaly Baranov 2019-05-16 20:40:24 +03:00
parent ae4f472bc7
commit 8ab4e4dcfe
10 changed files with 231 additions and 0 deletions

3
.gitmodules vendored
View File

@ -82,3 +82,6 @@
[submodule "contrib/simdjson"]
path = contrib/simdjson
url = https://github.com/lemire/simdjson.git
[submodule "contrib/rapidjson"]
path = contrib/rapidjson
url = https://github.com/Tencent/rapidjson

View File

@ -328,6 +328,7 @@ include (cmake/find_base64.cmake)
include (cmake/find_hyperscan.cmake)
include (cmake/find_lfalloc.cmake)
include (cmake/find_simdjson.cmake)
include (cmake/find_rapidjson.cmake)
find_contrib_lib(cityhash)
find_contrib_lib(farmhash)
find_contrib_lib(metrohash)

View File

@ -0,0 +1,9 @@
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/rapidjson/include/rapidjson/rapidjson.h")
message (WARNING "submodule contrib/rapidjson is missing. to fix try run: \n git submodule update --init --recursive")
return()
endif ()
option (USE_RAPIDJSON "Use rapidjson" ON)
set (RAPIDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rapidjson/include")
message(STATUS "Using rapidjson=${USE_RAPIDJSON}: ${RAPIDJSON_INCLUDE_DIR}")

1
contrib/rapidjson vendored Submodule

@ -0,0 +1 @@
Subproject commit 01950eb7acec78818d68b762efc869bba2420d82

View File

@ -26,6 +26,7 @@
#cmakedefine01 USE_SSL
#cmakedefine01 USE_HYPERSCAN
#cmakedefine01 USE_SIMDJSON
#cmakedefine01 USE_RAPIDJSON
#cmakedefine01 USE_LFALLOC
#cmakedefine01 USE_LFALLOC_RANDOM_HINT

View File

@ -73,3 +73,7 @@ endif()
if(USE_SIMDJSON)
target_link_libraries(clickhouse_functions PRIVATE ${SIMDJSON_LIBRARY})
endif()
if(USE_RAPIDJSON)
target_include_directories(clickhouse_functions SYSTEM PRIVATE ${RAPIDJSON_INCLUDE_DIR})
endif()

View File

@ -1,6 +1,7 @@
#include <Functions/FunctionsJSON.h>
#include <Functions/DummyJSONParser.h>
#include <Functions/SimdJSONParser.h>
#include <Functions/RapidJSONParser.h>
#include <Common/CpuId.h>
@ -16,7 +17,12 @@ void registerFunctionsJSON(FunctionFactory & factory)
return;
}
#endif
#if USE_RAPIDJSON
registerFunctionsJSONTemplate<RapidJSONParser>(factory);
#else
registerFunctionsJSONTemplate<DummyJSONParser>(factory);
#endif
}
}

View File

@ -0,0 +1,206 @@
#pragma once
#include <Common/config.h>
#if USE_RAPIDJSON
#include <common/StringRef.h>
#include <Common/Exception.h>
#include <Core/Types.h>
#include <rapidjson/document.h>
namespace DB
{
/// This class can be used as an argument for the template class FunctionJSON.
/// It provides ability to parse JSONs using rapidjson library.
struct RapidJSONParser
{
static constexpr bool need_preallocate = false;
void preallocate(size_t) {}
bool parse(const char * data, size_t size)
{
InputStream in(data, size);
document.ParseStream(in);
return !document.HasParseError();
}
struct Iterator
{
public:
Iterator() {}
Iterator(const rapidjson::Document & document) : value(&document) {}
Iterator(const Iterator & src)
: value(src.value)
, parent_scope_is_object(src.parent_scope_is_object)
, current_in_array(src.current_in_array)
, end_of_array(src.end_of_array) {}
Iterator & operator =(const Iterator & src)
{
value = src.value;
parent_scope_is_object = src.parent_scope_is_object;
current_in_array = src.current_in_array;
end_of_array = src.end_of_array;
return *this;
}
const rapidjson::Value & getValue() const { return *value; }
bool downToArray()
{
if (value->Empty())
return false;
current_in_array = &*value->Begin();
end_of_array = &*value->End();
value = current_in_array;
++current_in_array;
parent_scope_is_object = false;
return true;
}
bool next()
{
if (current_in_array == end_of_array)
return false;
value = current_in_array;
++current_in_array;
return true;
}
bool downToObject()
{
if (value->ObjectEmpty())
return false;
current_in_object = &*value->MemberBegin();
end_of_object = &*value->MemberEnd();
value = &current_in_object->value;
++current_in_object;
parent_scope_is_object = true;
return true;
}
bool downToObject(StringRef & first_key)
{
if (value->ObjectEmpty())
return false;
current_in_object = &*value->MemberBegin();
end_of_object = &*value->MemberEnd();
const auto & name = current_in_object->name;
first_key.data = name.GetString();
first_key.size = name.GetStringLength();
value = &current_in_object->value;
++current_in_object;
parent_scope_is_object = true;
return true;
}
bool nextKeyValue()
{
if (current_in_object == end_of_object)
return false;
value = &current_in_object->value;
++current_in_object;
return true;
}
bool nextKeyValue(StringRef & key)
{
if (current_in_object == end_of_object)
return false;
const auto & name = current_in_object->name;
key.data = name.GetString();
key.size = name.GetStringLength();
value = &current_in_object->value;
++current_in_object;
return true;
}
StringRef getKey() const
{
const auto & name = (current_in_object - 1)->name;
return {name.GetString(), name.GetStringLength()};
}
bool parentScopeIsObject() const { return parent_scope_is_object; }
bool isInteger() const { return value->IsInt64(); }
bool isFloat() const { return value->IsDouble(); }
bool isBool() const { return value->IsBool(); }
bool isString() const { return value->IsString(); }
bool isArray() const { return value->IsArray(); }
bool isObject() const { return value->IsObject(); }
bool isNull() const { return value->IsNull(); }
Int64 getInteger() const { return value->GetInt64(); }
double getFloat() const { return value->GetDouble(); }
bool getBool() const { return value->GetBool(); }
StringRef getString() const { return {value->GetString(), value->GetStringLength()}; }
private:
const rapidjson::Value * value = nullptr;
bool parent_scope_is_object = false;
union
{
const rapidjson::GenericMember<rapidjson::UTF8<>, rapidjson::MemoryPoolAllocator<>> * current_in_object;
const rapidjson::Value * current_in_array;
};
union
{
const rapidjson::GenericMember<rapidjson::UTF8<>, rapidjson::MemoryPoolAllocator<>> * end_of_object;
const rapidjson::Value * end_of_array;
};
};
Iterator getRoot() { return Iterator{document}; }
static bool downToArray(Iterator & it) { return it.downToArray(); }
static bool downToObject(Iterator & it) { return it.downToObject(); }
static bool downToObject(Iterator & it, StringRef & first_key) { return it.downToObject(first_key); }
static bool parentScopeIsObject(const Iterator & it) { return it.parentScopeIsObject(); }
static bool next(Iterator & it) { return it.next(); }
static bool nextKeyValue(Iterator & it) { return it.nextKeyValue(); }
static bool nextKeyValue(Iterator & it, StringRef & key) { return it.nextKeyValue(key); }
static bool isInteger(const Iterator & it) { return it.isInteger(); }
static bool isFloat(const Iterator & it) { return it.isFloat(); }
static bool isString(const Iterator & it) { return it.isString(); }
static bool isArray(const Iterator & it) { return it.isArray(); }
static bool isObject(const Iterator & it) { return it.isObject(); }
static bool isBool(const Iterator & it) { return it.isBool(); }
static bool isNull(const Iterator & it) { return it.isNull(); }
static StringRef getKey(const Iterator & it) { return it.getKey(); }
static StringRef getString(const Iterator & it) { return it.getString(); }
static Int64 getInteger(const Iterator & it) { return it.getInteger(); }
static double getFloat(const Iterator & it) { return it.getFloat(); }
static bool getBool(const Iterator & it) { return it.getBool(); }
private:
class InputStream
{
public:
InputStream(const char * data, size_t size) : begin(data), end(data + size), current(data) {}
using Ch = char;
Ch Peek() { if (current == end) return 0; return *current; }
Ch Take() { if (current == end) return 0; return *current++; }
size_t Tell() const { return current - begin; }
Ch* PutBegin() { return nullptr; }
void Put(Ch) {}
void Flush() {}
size_t PutEnd(Ch*) { return 0; }
private:
const char * begin;
const char * end;
const char * current;
};
rapidjson::Document document;
};
}
#endif