mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 09:02:00 +00:00
Merge pull request #5235 from vitlibar/improve-new-json-functions
Improve new json functions
This commit is contained in:
commit
24b16da9fd
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -82,3 +82,6 @@
|
||||
[submodule "contrib/simdjson"]
|
||||
path = contrib/simdjson
|
||||
url = https://github.com/lemire/simdjson.git
|
||||
[submodule "contrib/rapidjson"]
|
||||
path = contrib/rapidjson
|
||||
url = https://github.com/Tencent/rapidjson
|
||||
|
@ -328,6 +328,7 @@ include (cmake/find_base64.cmake)
|
||||
include (cmake/find_hyperscan.cmake)
|
||||
include (cmake/find_lfalloc.cmake)
|
||||
include (cmake/find_simdjson.cmake)
|
||||
include (cmake/find_rapidjson.cmake)
|
||||
find_contrib_lib(cityhash)
|
||||
find_contrib_lib(farmhash)
|
||||
find_contrib_lib(metrohash)
|
||||
|
9
cmake/find_rapidjson.cmake
Normal file
9
cmake/find_rapidjson.cmake
Normal file
@ -0,0 +1,9 @@
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/rapidjson/include/rapidjson/rapidjson.h")
|
||||
message (WARNING "submodule contrib/rapidjson is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
return()
|
||||
endif ()
|
||||
|
||||
option (USE_RAPIDJSON "Use rapidjson" ON)
|
||||
set (RAPIDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rapidjson/include")
|
||||
|
||||
message(STATUS "Using rapidjson=${USE_RAPIDJSON}: ${RAPIDJSON_INCLUDE_DIR}")
|
1
contrib/rapidjson
vendored
Submodule
1
contrib/rapidjson
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 01950eb7acec78818d68b762efc869bba2420d82
|
2
contrib/simdjson
vendored
2
contrib/simdjson
vendored
@ -1 +1 @@
|
||||
Subproject commit 681cd3369860f4eada49a387cbff93030f759c95
|
||||
Subproject commit 14cd1f7a0b0563db78bda8053a9f6ac2ea95a441
|
@ -26,6 +26,7 @@
|
||||
#cmakedefine01 USE_SSL
|
||||
#cmakedefine01 USE_HYPERSCAN
|
||||
#cmakedefine01 USE_SIMDJSON
|
||||
#cmakedefine01 USE_RAPIDJSON
|
||||
#cmakedefine01 USE_LFALLOC
|
||||
#cmakedefine01 USE_LFALLOC_RANDOM_HINT
|
||||
|
||||
|
@ -430,6 +430,13 @@ inline bool_if_safe_conversion<A, B> greaterOrEqualsOp(A a, B b)
|
||||
template <typename From, typename To>
|
||||
inline bool NO_SANITIZE_UNDEFINED convertNumeric(From value, To & result)
|
||||
{
|
||||
/// If the type is actually the same it's not necessary to do any checks.
|
||||
if constexpr (std::is_same_v<From, To>)
|
||||
{
|
||||
result = value;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Note that NaNs doesn't compare equal to anything, but they are still in range of any Float type.
|
||||
if (isNaN(value) && std::is_floating_point_v<To>)
|
||||
{
|
||||
|
@ -323,8 +323,9 @@ struct Settings : public SettingsCollection<Settings>
|
||||
M(SettingBool, allow_experimental_data_skipping_indices, false, "If it is set to true, data skipping indices can be used in CREATE TABLE/ALTER TABLE queries.") \
|
||||
\
|
||||
M(SettingBool, allow_hyperscan, true, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.") \
|
||||
M(SettingBool, allow_simdjson, 1, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.") \
|
||||
\
|
||||
M(SettingUInt64, max_partitions_per_insert_block, 100, "Limit maximum number of partitions in single INSERTed block. Zero means unlimited. Throw exception if the block contains too many partitions. This setting is a safety threshold, because using large number of partitions is a common misconception.") \
|
||||
M(SettingUInt64, max_partitions_per_insert_block, 100, "Limit maximum number of partitions in single INSERTed block. Zero means unlimited. Throw exception if the block contains too many partitions. This setting is a safety threshold, because using large number of partitions is a common misconception.")
|
||||
|
||||
DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS)
|
||||
|
||||
|
@ -73,3 +73,7 @@ endif()
|
||||
if(USE_SIMDJSON)
|
||||
target_link_libraries(clickhouse_functions PRIVATE ${SIMDJSON_LIBRARY})
|
||||
endif()
|
||||
|
||||
if(USE_RAPIDJSON)
|
||||
target_include_directories(clickhouse_functions SYSTEM PRIVATE ${RAPIDJSON_INCLUDE_DIR})
|
||||
endif()
|
||||
|
57
dbms/src/Functions/DummyJSONParser.h
Normal file
57
dbms/src/Functions/DummyJSONParser.h
Normal file
@ -0,0 +1,57 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/StringRef.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Core/Types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
/// This class can be used as an argument for the template class FunctionJSON when we unable to parse JSONs.
|
||||
/// It can't do anything useful and just throws an exception.
|
||||
struct DummyJSONParser
|
||||
{
|
||||
static constexpr bool need_preallocate = false;
|
||||
void preallocate(size_t) {}
|
||||
|
||||
bool parse(const StringRef &) { throw Exception{"Functions JSON* are not supported without AVX2", ErrorCodes::NOT_IMPLEMENTED}; }
|
||||
|
||||
using Iterator = std::nullptr_t;
|
||||
Iterator getRoot() const { return nullptr; }
|
||||
|
||||
static bool isInt64(const Iterator &) { return false; }
|
||||
static bool isUInt64(const Iterator &) { return false; }
|
||||
static bool isDouble(const Iterator &) { return false; }
|
||||
static bool isString(const Iterator &) { return false; }
|
||||
static bool isArray(const Iterator &) { return false; }
|
||||
static bool isObject(const Iterator &) { return false; }
|
||||
static bool isBool(const Iterator &) { return false; }
|
||||
static bool isNull(const Iterator &) { return true; }
|
||||
|
||||
static Int64 getInt64(const Iterator &) { return 0; }
|
||||
static UInt64 getUInt64(const Iterator &) { return 0; }
|
||||
static double getDouble(const Iterator &) { return 0; }
|
||||
static bool getBool(const Iterator &) { return false; }
|
||||
static StringRef getString(const Iterator &) { return {}; }
|
||||
|
||||
static size_t sizeOfArray(const Iterator &) { return 0; }
|
||||
static bool firstArrayElement(Iterator &) { return false; }
|
||||
static bool arrayElementByIndex(Iterator &, size_t) { return false; }
|
||||
static bool nextArrayElement(Iterator &) { return false; }
|
||||
|
||||
static size_t sizeOfObject(const Iterator &) { return 0; }
|
||||
static bool firstObjectMember(Iterator &) { return false; }
|
||||
static bool firstObjectMember(Iterator &, StringRef &) { return false; }
|
||||
static bool objectMemberByIndex(Iterator &, size_t) { return false; }
|
||||
static bool objectMemberByName(Iterator &, const StringRef &) { return false; }
|
||||
static bool nextObjectMember(Iterator &) { return false; }
|
||||
static bool nextObjectMember(Iterator &, StringRef &) { return false; }
|
||||
static bool isObjectMember(const Iterator &) { return false; }
|
||||
static StringRef getKey(const Iterator &) { return {}; }
|
||||
};
|
||||
|
||||
}
|
@ -1,378 +1,24 @@
|
||||
#include <Functions/FunctionsJSON.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Common/config.h>
|
||||
|
||||
#if USE_SIMDJSON
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
template <typename T>
|
||||
class JSONNullableImplBase
|
||||
{
|
||||
public:
|
||||
static DataTypePtr getType() { return std::make_shared<DataTypeNullable>(std::make_shared<T>()); }
|
||||
|
||||
static Field getDefault() { return {}; }
|
||||
};
|
||||
|
||||
class JSONHasImpl : public JSONNullableImplBase<DataTypeUInt8>
|
||||
{
|
||||
public:
|
||||
static constexpr auto name{"jsonHas"};
|
||||
|
||||
static Field getValue(ParsedJson::iterator &) { return {1}; }
|
||||
};
|
||||
|
||||
class JSONLengthImpl : public JSONNullableImplBase<DataTypeUInt64>
|
||||
{
|
||||
public:
|
||||
static constexpr auto name{"jsonLength"};
|
||||
|
||||
static Field getValue(ParsedJson::iterator & pjh)
|
||||
{
|
||||
if (!pjh.is_object_or_array())
|
||||
return getDefault();
|
||||
|
||||
size_t size = 0;
|
||||
|
||||
if (pjh.down())
|
||||
{
|
||||
size += 1;
|
||||
|
||||
while (pjh.next())
|
||||
size += 1;
|
||||
}
|
||||
|
||||
return {size};
|
||||
}
|
||||
};
|
||||
|
||||
class JSONTypeImpl : public JSONNullableImplBase<DataTypeString>
|
||||
{
|
||||
public:
|
||||
static constexpr auto name{"jsonType"};
|
||||
|
||||
static Field getValue(ParsedJson::iterator & pjh)
|
||||
{
|
||||
switch (pjh.get_type())
|
||||
{
|
||||
case '[':
|
||||
return "Array";
|
||||
case '{':
|
||||
return "Object";
|
||||
case '"':
|
||||
return "String";
|
||||
case 'l':
|
||||
return "Int64";
|
||||
case 'd':
|
||||
return "Float64";
|
||||
case 't':
|
||||
return "Bool";
|
||||
case 'f':
|
||||
return "Bool";
|
||||
case 'n':
|
||||
return "Null";
|
||||
default:
|
||||
return "Unknown";
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class JSONExtractImpl
|
||||
{
|
||||
public:
|
||||
static constexpr auto name{"jsonExtract"};
|
||||
|
||||
static DataTypePtr getType(const DataTypePtr & type)
|
||||
{
|
||||
WhichDataType which{type};
|
||||
|
||||
if (which.isNativeUInt() || which.isNativeInt() || which.isFloat() || which.isEnum() || which.isDateOrDateTime()
|
||||
|| which.isStringOrFixedString() || which.isInterval())
|
||||
return std::make_shared<DataTypeNullable>(type);
|
||||
|
||||
if (which.isArray())
|
||||
{
|
||||
auto array_type = static_cast<const DataTypeArray *>(type.get());
|
||||
|
||||
return std::make_shared<DataTypeArray>(getType(array_type->getNestedType()));
|
||||
}
|
||||
|
||||
if (which.isTuple())
|
||||
{
|
||||
auto tuple_type = static_cast<const DataTypeTuple *>(type.get());
|
||||
|
||||
DataTypes types;
|
||||
types.reserve(tuple_type->getElements().size());
|
||||
|
||||
for (const DataTypePtr & element : tuple_type->getElements())
|
||||
{
|
||||
types.push_back(getType(element));
|
||||
}
|
||||
|
||||
return std::make_shared<DataTypeTuple>(std::move(types));
|
||||
}
|
||||
|
||||
throw Exception{"Unsupported return type schema: " + type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
}
|
||||
|
||||
static Field getDefault(const DataTypePtr & type)
|
||||
{
|
||||
WhichDataType which{type};
|
||||
|
||||
if (which.isNativeUInt() || which.isNativeInt() || which.isFloat() || which.isEnum() || which.isDateOrDateTime()
|
||||
|| which.isStringOrFixedString() || which.isInterval())
|
||||
return {};
|
||||
|
||||
if (which.isArray())
|
||||
return {Array{}};
|
||||
|
||||
if (which.isTuple())
|
||||
{
|
||||
auto tuple_type = static_cast<const DataTypeTuple *>(type.get());
|
||||
|
||||
Tuple tuple;
|
||||
tuple.toUnderType().reserve(tuple_type->getElements().size());
|
||||
|
||||
for (const DataTypePtr & element : tuple_type->getElements())
|
||||
tuple.toUnderType().push_back(getDefault(element));
|
||||
|
||||
return {tuple};
|
||||
}
|
||||
|
||||
// should not reach
|
||||
throw Exception{"Unsupported return type schema: " + type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
}
|
||||
|
||||
static Field getValue(ParsedJson::iterator & pjh, const DataTypePtr & type)
|
||||
{
|
||||
WhichDataType which{type};
|
||||
|
||||
if (which.isNativeUInt() || which.isNativeInt() || which.isEnum() || which.isDateOrDateTime() || which.isInterval())
|
||||
{
|
||||
if (pjh.is_integer())
|
||||
return {pjh.get_integer()};
|
||||
else
|
||||
return getDefault(type);
|
||||
}
|
||||
|
||||
if (which.isFloat())
|
||||
{
|
||||
if (pjh.is_integer())
|
||||
return {static_cast<double>(pjh.get_integer())};
|
||||
else if (pjh.is_double())
|
||||
return {pjh.get_double()};
|
||||
else
|
||||
return getDefault(type);
|
||||
}
|
||||
|
||||
if (which.isStringOrFixedString())
|
||||
{
|
||||
if (pjh.is_string())
|
||||
return {String{pjh.get_string()}};
|
||||
else
|
||||
return getDefault(type);
|
||||
}
|
||||
|
||||
if (which.isArray())
|
||||
{
|
||||
if (!pjh.is_object_or_array())
|
||||
return getDefault(type);
|
||||
|
||||
auto array_type = static_cast<const DataTypeArray *>(type.get());
|
||||
|
||||
Array array;
|
||||
|
||||
bool first = true;
|
||||
|
||||
while (first ? pjh.down() : pjh.next())
|
||||
{
|
||||
first = false;
|
||||
|
||||
ParsedJson::iterator pjh1{pjh};
|
||||
|
||||
array.push_back(getValue(pjh1, array_type->getNestedType()));
|
||||
}
|
||||
|
||||
return {array};
|
||||
}
|
||||
|
||||
if (which.isTuple())
|
||||
{
|
||||
if (!pjh.is_object_or_array())
|
||||
return getDefault(type);
|
||||
|
||||
auto tuple_type = static_cast<const DataTypeTuple *>(type.get());
|
||||
|
||||
Tuple tuple;
|
||||
tuple.toUnderType().reserve(tuple_type->getElements().size());
|
||||
|
||||
bool valid = true;
|
||||
bool first = true;
|
||||
|
||||
for (const DataTypePtr & element : tuple_type->getElements())
|
||||
{
|
||||
if (valid)
|
||||
{
|
||||
valid &= first ? pjh.down() : pjh.next();
|
||||
first = false;
|
||||
|
||||
ParsedJson::iterator pjh1{pjh};
|
||||
|
||||
tuple.toUnderType().push_back(getValue(pjh1, element));
|
||||
}
|
||||
else
|
||||
tuple.toUnderType().push_back(getDefault(element));
|
||||
}
|
||||
|
||||
return {tuple};
|
||||
}
|
||||
|
||||
// should not reach
|
||||
throw Exception{"Unsupported return type schema: " + type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
}
|
||||
};
|
||||
|
||||
class JSONExtractUIntImpl : public JSONNullableImplBase<DataTypeUInt64>
|
||||
{
|
||||
public:
|
||||
static constexpr auto name{"jsonExtractUInt"};
|
||||
|
||||
static Field getValue(ParsedJson::iterator & pjh)
|
||||
{
|
||||
if (pjh.is_integer())
|
||||
return {pjh.get_integer()};
|
||||
else
|
||||
return getDefault();
|
||||
}
|
||||
};
|
||||
|
||||
class JSONExtractIntImpl : public JSONNullableImplBase<DataTypeInt64>
|
||||
{
|
||||
public:
|
||||
static constexpr auto name{"jsonExtractInt"};
|
||||
|
||||
static Field getValue(ParsedJson::iterator & pjh)
|
||||
{
|
||||
if (pjh.is_integer())
|
||||
return {pjh.get_integer()};
|
||||
else
|
||||
return getDefault();
|
||||
}
|
||||
};
|
||||
|
||||
class JSONExtractFloatImpl : public JSONNullableImplBase<DataTypeFloat64>
|
||||
{
|
||||
public:
|
||||
static constexpr auto name{"jsonExtractFloat"};
|
||||
|
||||
static Field getValue(ParsedJson::iterator & pjh)
|
||||
{
|
||||
if (pjh.is_double())
|
||||
return {pjh.get_double()};
|
||||
else
|
||||
return getDefault();
|
||||
}
|
||||
};
|
||||
|
||||
class JSONExtractBoolImpl : public JSONNullableImplBase<DataTypeUInt8>
|
||||
{
|
||||
public:
|
||||
static constexpr auto name{"jsonExtractBool"};
|
||||
|
||||
static Field getValue(ParsedJson::iterator & pjh)
|
||||
{
|
||||
if (pjh.get_type() == 't')
|
||||
return {1};
|
||||
else if (pjh.get_type() == 'f')
|
||||
return {0};
|
||||
else
|
||||
return getDefault();
|
||||
}
|
||||
};
|
||||
|
||||
// class JSONExtractRawImpl: public JSONNullableImplBase<DataTypeString>
|
||||
// {
|
||||
// public:
|
||||
// static constexpr auto name {"jsonExtractRaw"};
|
||||
|
||||
// static Field getValue(ParsedJson::iterator & pjh)
|
||||
// {
|
||||
// //
|
||||
// }
|
||||
// };
|
||||
|
||||
class JSONExtractStringImpl : public JSONNullableImplBase<DataTypeString>
|
||||
{
|
||||
public:
|
||||
static constexpr auto name{"jsonExtractString"};
|
||||
|
||||
static Field getValue(ParsedJson::iterator & pjh)
|
||||
{
|
||||
if (pjh.is_string())
|
||||
return {String{pjh.get_string()}};
|
||||
else
|
||||
return getDefault();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
#else
|
||||
namespace DB
|
||||
{
|
||||
struct JSONHasImpl { static constexpr auto name{"jsonHas"}; };
|
||||
struct JSONLengthImpl { static constexpr auto name{"jsonLength"}; };
|
||||
struct JSONTypeImpl { static constexpr auto name{"jsonType"}; };
|
||||
struct JSONExtractImpl { static constexpr auto name{"jsonExtract"}; };
|
||||
struct JSONExtractUIntImpl { static constexpr auto name{"jsonExtractUInt"}; };
|
||||
struct JSONExtractIntImpl { static constexpr auto name{"jsonExtractInt"}; };
|
||||
struct JSONExtractFloatImpl { static constexpr auto name{"jsonExtractFloat"}; };
|
||||
struct JSONExtractBoolImpl { static constexpr auto name{"jsonExtractBool"}; };
|
||||
//struct JSONExtractRawImpl { static constexpr auto name {"jsonExtractRaw"}; };
|
||||
struct JSONExtractStringImpl { static constexpr auto name{"jsonExtractString"}; };
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void registerFunctionsJSON(FunctionFactory & factory)
|
||||
{
|
||||
#if USE_SIMDJSON
|
||||
if (__builtin_cpu_supports("avx2"))
|
||||
{
|
||||
factory.registerFunction<FunctionJSONBase<JSONHasImpl, false>>();
|
||||
factory.registerFunction<FunctionJSONBase<JSONLengthImpl, false>>();
|
||||
factory.registerFunction<FunctionJSONBase<JSONTypeImpl, false>>();
|
||||
factory.registerFunction<FunctionJSONBase<JSONExtractImpl, true>>();
|
||||
factory.registerFunction<FunctionJSONBase<JSONExtractUIntImpl, false>>();
|
||||
factory.registerFunction<FunctionJSONBase<JSONExtractIntImpl, false>>();
|
||||
factory.registerFunction<FunctionJSONBase<JSONExtractFloatImpl, false>>();
|
||||
factory.registerFunction<FunctionJSONBase<JSONExtractBoolImpl, false>>();
|
||||
// factory.registerFunction<FunctionJSONBase<
|
||||
// JSONExtractRawImpl,
|
||||
// false
|
||||
// >>();
|
||||
factory.registerFunction<FunctionJSONBase<JSONExtractStringImpl, false>>();
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
factory.registerFunction<FunctionJSONDummy<JSONHasImpl>>();
|
||||
factory.registerFunction<FunctionJSONDummy<JSONLengthImpl>>();
|
||||
factory.registerFunction<FunctionJSONDummy<JSONTypeImpl>>();
|
||||
factory.registerFunction<FunctionJSONDummy<JSONExtractImpl>>();
|
||||
factory.registerFunction<FunctionJSONDummy<JSONExtractUIntImpl>>();
|
||||
factory.registerFunction<FunctionJSONDummy<JSONExtractIntImpl>>();
|
||||
factory.registerFunction<FunctionJSONDummy<JSONExtractFloatImpl>>();
|
||||
factory.registerFunction<FunctionJSONDummy<JSONExtractBoolImpl>>();
|
||||
//factory.registerFunction<FunctionJSONDummy<JSONExtractRawImpl>>();
|
||||
factory.registerFunction<FunctionJSONDummy<JSONExtractStringImpl>>();
|
||||
factory.registerFunction<FunctionJSON<NameJSONHas, JSONHasImpl>>();
|
||||
factory.registerFunction<FunctionJSON<NameJSONLength, JSONLengthImpl>>();
|
||||
factory.registerFunction<FunctionJSON<NameJSONKey, JSONKeyImpl>>();
|
||||
factory.registerFunction<FunctionJSON<NameJSONType, JSONTypeImpl>>();
|
||||
factory.registerFunction<FunctionJSON<NameJSONExtractInt, JSONExtractInt64Impl>>();
|
||||
factory.registerFunction<FunctionJSON<NameJSONExtractUInt, JSONExtractUInt64Impl>>();
|
||||
factory.registerFunction<FunctionJSON<NameJSONExtractFloat, JSONExtractFloat64Impl>>();
|
||||
factory.registerFunction<FunctionJSON<NameJSONExtractBool, JSONExtractBoolImpl>>();
|
||||
factory.registerFunction<FunctionJSON<NameJSONExtractString, JSONExtractStringImpl>>();
|
||||
factory.registerFunction<FunctionJSON<NameJSONExtract, JSONExtractImpl>>();
|
||||
factory.registerFunction<FunctionJSON<NameJSONExtractKeysAndValues, JSONExtractKeysAndValuesImpl>>();
|
||||
factory.registerFunction<FunctionJSON<NameJSONExtractRaw, JSONExtractRawImpl>>();
|
||||
}
|
||||
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
211
dbms/src/Functions/RapidJSONParser.h
Normal file
211
dbms/src/Functions/RapidJSONParser.h
Normal file
@ -0,0 +1,211 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/config.h>
|
||||
#if USE_RAPIDJSON
|
||||
|
||||
#include <common/StringRef.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Core/Types.h>
|
||||
|
||||
#include <rapidjson/document.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// This class can be used as an argument for the template class FunctionJSON.
|
||||
/// It provides ability to parse JSONs using rapidjson library.
|
||||
struct RapidJSONParser
|
||||
{
|
||||
static constexpr bool need_preallocate = false;
|
||||
void preallocate(size_t) {}
|
||||
|
||||
bool parse(const StringRef & json)
|
||||
{
|
||||
document.Parse(json.data);
|
||||
return !document.HasParseError();
|
||||
}
|
||||
|
||||
struct Iterator
|
||||
{
|
||||
public:
|
||||
Iterator() {}
|
||||
Iterator(const rapidjson::Document & document) : value(&document) {}
|
||||
Iterator(const Iterator & src)
|
||||
: value(src.value)
|
||||
, is_object_member(src.is_object_member)
|
||||
, current_in_array(src.current_in_array)
|
||||
, end_of_array(src.end_of_array) {}
|
||||
|
||||
Iterator & operator =(const Iterator & src)
|
||||
{
|
||||
value = src.value;
|
||||
is_object_member = src.is_object_member;
|
||||
current_in_array = src.current_in_array;
|
||||
end_of_array = src.end_of_array;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool isInt64() const { return value->IsInt64(); }
|
||||
bool isUInt64() const { return value->IsUint64(); }
|
||||
bool isDouble() const { return value->IsDouble(); }
|
||||
bool isBool() const { return value->IsBool(); }
|
||||
bool isString() const { return value->IsString(); }
|
||||
bool isArray() const { return value->IsArray(); }
|
||||
bool isObject() const { return value->IsObject(); }
|
||||
bool isNull() const { return value->IsNull(); }
|
||||
|
||||
Int64 getInt64() const { return value->GetInt64(); }
|
||||
UInt64 getUInt64() const { return value->GetUint64(); }
|
||||
double getDouble() const { return value->GetDouble(); }
|
||||
bool getBool() const { return value->GetBool(); }
|
||||
StringRef getString() const { return {value->GetString(), value->GetStringLength()}; }
|
||||
|
||||
size_t sizeOfArray() const { return value->Size(); }
|
||||
|
||||
bool arrayElementByIndex(size_t index)
|
||||
{
|
||||
if (index >= value->Size())
|
||||
return false;
|
||||
setRange(value->Begin() + index, value->End());
|
||||
value = current_in_array++;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool nextArrayElement()
|
||||
{
|
||||
if (current_in_array == end_of_array)
|
||||
return false;
|
||||
value = current_in_array++;
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t sizeOfObject() const { return value->MemberCount(); }
|
||||
|
||||
bool objectMemberByIndex(size_t index)
|
||||
{
|
||||
if (index >= value->MemberCount())
|
||||
return false;
|
||||
setRange(value->MemberBegin() + index, value->MemberEnd());
|
||||
value = &(current_in_object++)->value;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool objectMemberByIndex(size_t index, StringRef & key)
|
||||
{
|
||||
if (index >= value->MemberCount())
|
||||
return false;
|
||||
setRange(value->MemberBegin() + index, value->MemberEnd());
|
||||
key = getKeyImpl(current_in_object);
|
||||
value = &(current_in_object++)->value;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool objectMemberByName(const StringRef & name)
|
||||
{
|
||||
auto it = value->FindMember(name.data);
|
||||
if (it == value->MemberEnd())
|
||||
return false;
|
||||
setRange(it, value->MemberEnd());
|
||||
value = &(current_in_object++)->value;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool nextObjectMember()
|
||||
{
|
||||
if (current_in_object == end_of_object)
|
||||
return false;
|
||||
value = &(current_in_object++)->value;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool nextObjectMember(StringRef & key)
|
||||
{
|
||||
if (current_in_object == end_of_object)
|
||||
return false;
|
||||
key = getKeyImpl(current_in_object);
|
||||
value = &(current_in_object++)->value;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isObjectMember() const { return is_object_member; }
|
||||
|
||||
StringRef getKey() const
|
||||
{
|
||||
return getKeyImpl(current_in_object - 1);
|
||||
}
|
||||
|
||||
private:
|
||||
void setRange(rapidjson::Value::ConstValueIterator current, rapidjson::Value::ConstValueIterator end)
|
||||
{
|
||||
current_in_array = &*current;
|
||||
end_of_array = &*end;
|
||||
is_object_member = false;
|
||||
}
|
||||
|
||||
void setRange(rapidjson::Value::ConstMemberIterator current, rapidjson::Value::ConstMemberIterator end)
|
||||
{
|
||||
current_in_object = &*current;
|
||||
end_of_object = &*end;
|
||||
is_object_member = true;
|
||||
}
|
||||
|
||||
static StringRef getKeyImpl(const rapidjson::GenericMember<rapidjson::UTF8<>, rapidjson::MemoryPoolAllocator<>> * member)
|
||||
{
|
||||
const auto & name = member->name;
|
||||
return {name.GetString(), name.GetStringLength()};
|
||||
}
|
||||
|
||||
const rapidjson::Value * value = nullptr;
|
||||
bool is_object_member = false;
|
||||
|
||||
union
|
||||
{
|
||||
const rapidjson::GenericMember<rapidjson::UTF8<>, rapidjson::MemoryPoolAllocator<>> * current_in_object;
|
||||
const rapidjson::Value * current_in_array;
|
||||
};
|
||||
union
|
||||
{
|
||||
const rapidjson::GenericMember<rapidjson::UTF8<>, rapidjson::MemoryPoolAllocator<>> * end_of_object;
|
||||
const rapidjson::Value * end_of_array;
|
||||
};
|
||||
};
|
||||
|
||||
Iterator getRoot() { return Iterator{document}; }
|
||||
|
||||
static bool isInt64(const Iterator & it) { return it.isInt64(); }
|
||||
static bool isUInt64(const Iterator & it) { return it.isUInt64(); }
|
||||
static bool isDouble(const Iterator & it) { return it.isDouble(); }
|
||||
static bool isBool(const Iterator & it) { return it.isBool(); }
|
||||
static bool isString(const Iterator & it) { return it.isString(); }
|
||||
static bool isArray(const Iterator & it) { return it.isArray(); }
|
||||
static bool isObject(const Iterator & it) { return it.isObject(); }
|
||||
static bool isNull(const Iterator & it) { return it.isNull(); }
|
||||
|
||||
static Int64 getInt64(const Iterator & it) { return it.getInt64(); }
|
||||
static UInt64 getUInt64(const Iterator & it) { return it.getUInt64(); }
|
||||
static double getDouble(const Iterator & it) { return it.getDouble(); }
|
||||
static bool getBool(const Iterator & it) { return it.getBool(); }
|
||||
static StringRef getString(const Iterator & it) { return it.getString(); }
|
||||
|
||||
static size_t sizeOfArray(const Iterator & it) { return it.sizeOfArray(); }
|
||||
static bool firstArrayElement(Iterator & it) { return it.arrayElementByIndex(0); }
|
||||
static bool arrayElementByIndex(Iterator & it, size_t index) { return it.arrayElementByIndex(index); }
|
||||
static bool nextArrayElement(Iterator & it) { return it.nextArrayElement(); }
|
||||
|
||||
static size_t sizeOfObject(const Iterator & it) { return it.sizeOfObject(); }
|
||||
static bool firstObjectMember(Iterator & it) { return it.objectMemberByIndex(0); }
|
||||
static bool firstObjectMember(Iterator & it, StringRef & first_key) { return it.objectMemberByIndex(0, first_key); }
|
||||
static bool objectMemberByIndex(Iterator & it, size_t index) { return it.objectMemberByIndex(index); }
|
||||
static bool objectMemberByName(Iterator & it, const StringRef & name) { return it.objectMemberByName(name); }
|
||||
static bool nextObjectMember(Iterator & it) { return it.nextObjectMember(); }
|
||||
static bool nextObjectMember(Iterator & it, StringRef & next_key) { return it.nextObjectMember(next_key); }
|
||||
static bool isObjectMember(const Iterator & it) { return it.isObjectMember(); }
|
||||
static StringRef getKey(const Iterator & it) { return it.getKey(); }
|
||||
|
||||
private:
|
||||
rapidjson::Document document;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
150
dbms/src/Functions/SimdJSONParser.h
Normal file
150
dbms/src/Functions/SimdJSONParser.h
Normal file
@ -0,0 +1,150 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/config.h>
|
||||
#if USE_SIMDJSON
|
||||
|
||||
#include <common/StringRef.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Core/Types.h>
|
||||
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wold-style-cast"
|
||||
#pragma clang diagnostic ignored "-Wnewline-eof"
|
||||
#endif
|
||||
|
||||
#include <simdjson/jsonparser.h>
|
||||
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||
}
|
||||
|
||||
/// This class can be used as an argument for the template class FunctionJSON.
|
||||
/// It provides ability to parse JSONs using simdjson library.
|
||||
struct SimdJSONParser
|
||||
{
|
||||
static constexpr bool need_preallocate = true;
|
||||
|
||||
void preallocate(size_t max_size)
|
||||
{
|
||||
if (!pj.allocateCapacity(max_size))
|
||||
throw Exception{"Can not allocate memory for " + std::to_string(max_size) + " units when parsing JSON",
|
||||
ErrorCodes::CANNOT_ALLOCATE_MEMORY};
|
||||
}
|
||||
|
||||
bool parse(const StringRef & json) { return !json_parse(json.data, json.size, pj); }
|
||||
|
||||
using Iterator = ParsedJson::iterator;
|
||||
Iterator getRoot() { return Iterator{pj}; }
|
||||
|
||||
static bool isInt64(const Iterator & it) { return it.is_integer(); }
|
||||
static bool isUInt64(const Iterator &) { return false; /* See https://github.com/lemire/simdjson/issues/68 */ }
|
||||
static bool isDouble(const Iterator & it) { return it.is_double(); }
|
||||
static bool isString(const Iterator & it) { return it.is_string(); }
|
||||
static bool isArray(const Iterator & it) { return it.is_array(); }
|
||||
static bool isObject(const Iterator & it) { return it.is_object(); }
|
||||
static bool isBool(const Iterator & it) { return it.get_type() == 't' || it.get_type() == 'f'; }
|
||||
static bool isNull(const Iterator & it) { return it.is_null(); }
|
||||
|
||||
static Int64 getInt64(const Iterator & it) { return it.get_integer(); }
|
||||
static UInt64 getUInt64(const Iterator &) { return 0; /* isUInt64() never returns true */ }
|
||||
static double getDouble(const Iterator & it) { return it.get_double(); }
|
||||
static bool getBool(const Iterator & it) { return it.get_type() == 't'; }
|
||||
static StringRef getString(const Iterator & it) { return StringRef{it.get_string(), it.get_string_length()}; }
|
||||
|
||||
static size_t sizeOfArray(const Iterator & it)
|
||||
{
|
||||
size_t size = 0;
|
||||
Iterator it2 = it;
|
||||
if (it2.down())
|
||||
{
|
||||
do
|
||||
++size;
|
||||
while (it2.next());
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
static bool firstArrayElement(Iterator & it) { return it.down(); }
|
||||
|
||||
static bool arrayElementByIndex(Iterator & it, size_t index)
|
||||
{
|
||||
if (!it.down())
|
||||
return false;
|
||||
while (index--)
|
||||
if (!it.next())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool nextArrayElement(Iterator & it) { return it.next(); }
|
||||
|
||||
static size_t sizeOfObject(const Iterator & it)
|
||||
{
|
||||
size_t size = 0;
|
||||
Iterator it2 = it;
|
||||
if (it2.down())
|
||||
{
|
||||
do
|
||||
++size;
|
||||
while (it2.next() && it2.next());
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
static bool firstObjectMember(Iterator & it) { return it.down() && it.next(); }
|
||||
|
||||
static bool firstObjectMember(Iterator & it, StringRef & first_key)
|
||||
{
|
||||
if (!it.down())
|
||||
return false;
|
||||
first_key.data = it.get_string();
|
||||
first_key.size = it.get_string_length();
|
||||
return it.next();
|
||||
}
|
||||
|
||||
static bool objectMemberByIndex(Iterator & it, size_t index)
|
||||
{
|
||||
if (!it.down())
|
||||
return false;
|
||||
while (index--)
|
||||
if (!it.next() || !it.next())
|
||||
return false;
|
||||
return it.next();
|
||||
}
|
||||
|
||||
static bool objectMemberByName(Iterator & it, const StringRef & name) { return it.move_to_key(name.data); }
|
||||
static bool nextObjectMember(Iterator & it) { return it.next() && it.next(); }
|
||||
|
||||
static bool nextObjectMember(Iterator & it, StringRef & next_key)
|
||||
{
|
||||
if (!it.next())
|
||||
return false;
|
||||
next_key.data = it.get_string();
|
||||
next_key.size = it.get_string_length();
|
||||
return it.next();
|
||||
}
|
||||
|
||||
static bool isObjectMember(const Iterator & it) { return it.get_scope_type() == '{'; }
|
||||
|
||||
static StringRef getKey(const Iterator & it)
|
||||
{
|
||||
Iterator it2 = it;
|
||||
it2.prev();
|
||||
return StringRef{it2.get_string(), it2.get_string_length()};
|
||||
}
|
||||
|
||||
private:
|
||||
ParsedJson pj;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
@ -38,18 +38,29 @@ private:
|
||||
working_buffer = internal_buffer;
|
||||
}
|
||||
|
||||
static constexpr size_t initial_size = 32;
|
||||
|
||||
public:
|
||||
WriteBufferFromVector(VectorType & vector_)
|
||||
: WriteBuffer(reinterpret_cast<Position>(vector_.data()), vector_.size()), vector(vector_)
|
||||
{
|
||||
if (vector.empty())
|
||||
{
|
||||
static constexpr size_t initial_size = 32;
|
||||
vector.resize(initial_size);
|
||||
set(reinterpret_cast<Position>(vector.data()), vector.size());
|
||||
}
|
||||
}
|
||||
|
||||
/// Append to vector instead of rewrite.
|
||||
struct AppendModeTag {};
|
||||
WriteBufferFromVector(VectorType & vector_, AppendModeTag)
|
||||
: WriteBuffer(nullptr, 0), vector(vector_)
|
||||
{
|
||||
size_t old_size = vector.size();
|
||||
vector.resize(vector.capacity() < initial_size ? initial_size : vector.capacity());
|
||||
set(reinterpret_cast<Position>(vector.data() + old_size), (vector.size() - old_size) * sizeof(typename VectorType::value_type));
|
||||
}
|
||||
|
||||
void finish()
|
||||
{
|
||||
if (is_finished)
|
||||
|
@ -0,0 +1,66 @@
|
||||
--JSONLength--
|
||||
2
|
||||
3
|
||||
0
|
||||
--JSONHas--
|
||||
1
|
||||
1
|
||||
0
|
||||
--JSONKey--
|
||||
a
|
||||
b
|
||||
b
|
||||
a
|
||||
--JSONType--
|
||||
Object
|
||||
Array
|
||||
--JSONExtract<numeric>--
|
||||
-100
|
||||
200
|
||||
300
|
||||
1
|
||||
0
|
||||
--JSONExtractString--
|
||||
hello
|
||||
hello
|
||||
\n\0
|
||||
☺
|
||||
|
||||
|
||||
--JSONExtract (generic)--
|
||||
('hello',[-100,200,300])
|
||||
('hello',[-100,200,300])
|
||||
([-100,200,300],'hello')
|
||||
('hello\0',0)
|
||||
hello
|
||||
[-100,200,300]
|
||||
(-100,200,300)
|
||||
[-100,0,0]
|
||||
[-100,NULL,NULL]
|
||||
[0,200,0]
|
||||
[NULL,200,NULL]
|
||||
-100
|
||||
200
|
||||
\N
|
||||
1
|
||||
Thursday
|
||||
Friday
|
||||
--JSONExtractKeysAndValues--
|
||||
[('a','hello')]
|
||||
[('b',[-100,200,300])]
|
||||
[('a','hello'),('b','world')]
|
||||
[('a',5),('b',7),('c',11)]
|
||||
--JSONExtractRaw--
|
||||
{"a":"hello","b":[-100,200,300]}
|
||||
"hello"
|
||||
[-100,200,300]
|
||||
-100
|
||||
{"a":"hello","b":[-100,200,300],"c":{"d":[121,144]}}
|
||||
{"d":[121,144]}
|
||||
[121,144]
|
||||
144
|
||||
|
||||
{"passed":true}
|
||||
{}
|
||||
"\\n\\u0000"
|
||||
"☺"
|
76
dbms/tests/queries/0_stateless/00918_json_functions.sql
Normal file
76
dbms/tests/queries/0_stateless/00918_json_functions.sql
Normal file
@ -0,0 +1,76 @@
|
||||
SET allow_simdjson=1;
|
||||
|
||||
SELECT '--JSONLength--';
|
||||
SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}');
|
||||
SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}', 'b');
|
||||
SELECT JSONLength('{}');
|
||||
|
||||
SELECT '--JSONHas--';
|
||||
SELECT JSONHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'a');
|
||||
SELECT JSONHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'b');
|
||||
SELECT JSONHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'c');
|
||||
|
||||
SELECT '--JSONKey--';
|
||||
SELECT JSONKey('{"a": "hello", "b": [-100, 200.0, 300]}', 1);
|
||||
SELECT JSONKey('{"a": "hello", "b": [-100, 200.0, 300]}', 2);
|
||||
SELECT JSONKey('{"a": "hello", "b": [-100, 200.0, 300]}', -1);
|
||||
SELECT JSONKey('{"a": "hello", "b": [-100, 200.0, 300]}', -2);
|
||||
|
||||
SELECT '--JSONType--';
|
||||
SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}');
|
||||
SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b');
|
||||
|
||||
SELECT '--JSONExtract<numeric>--';
|
||||
SELECT JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1);
|
||||
SELECT JSONExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2);
|
||||
SELECT JSONExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1);
|
||||
SELECT JSONExtractBool('{"passed": true}', 'passed');
|
||||
SELECT JSONExtractBool('"HX-=');
|
||||
|
||||
SELECT '--JSONExtractString--';
|
||||
SELECT JSONExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 'a');
|
||||
SELECT JSONExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 1);
|
||||
select JSONExtractString('{"abc":"\\n\\u0000"}', 'abc');
|
||||
select JSONExtractString('{"abc":"\\u263a"}', 'abc');
|
||||
select JSONExtractString('{"abc":"\\u263"}', 'abc');
|
||||
select JSONExtractString('{"abc":"hello}', 'abc');
|
||||
|
||||
SELECT '--JSONExtract (generic)--';
|
||||
SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Tuple(String, Array(Float64))');
|
||||
SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Tuple(a String, b Array(Float64))');
|
||||
SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Tuple(b Array(Float64), a String)');
|
||||
SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Tuple(a FixedString(6), c UInt8)');
|
||||
SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'a', 'String');
|
||||
SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Float32)');
|
||||
SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Tuple(Int8, Float32, UInt16)');
|
||||
SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Int8)');
|
||||
SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Nullable(Int8))');
|
||||
SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(UInt8)');
|
||||
SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Nullable(UInt8))');
|
||||
SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1, 'Int8');
|
||||
SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2, 'Int32');
|
||||
SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 4, 'Nullable(Int64)');
|
||||
SELECT JSONExtract('{"passed": true}', 'passed', 'UInt8');
|
||||
SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)');
|
||||
SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)');
|
||||
|
||||
SELECT '--JSONExtractKeysAndValues--';
|
||||
SELECT JSONExtractKeysAndValues('{"a": "hello", "b": [-100, 200.0, 300]}', 'String');
|
||||
SELECT JSONExtractKeysAndValues('{"a": "hello", "b": [-100, 200.0, 300]}', 'Array(Float64)');
|
||||
SELECT JSONExtractKeysAndValues('{"a": "hello", "b": "world"}', 'String');
|
||||
SELECT JSONExtractKeysAndValues('{"x": {"a": 5, "b": 7, "c": 11}}', 'x', 'Int8');
|
||||
|
||||
SELECT '--JSONExtractRaw--';
|
||||
SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300]}');
|
||||
SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300]}', 'a');
|
||||
SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300]}', 'b');
|
||||
SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1);
|
||||
SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300], "c":{"d":[121,144]}}');
|
||||
SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300], "c":{"d":[121,144]}}', 'c');
|
||||
SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300], "c":{"d":[121,144]}}', 'c', 'd');
|
||||
SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300], "c":{"d":[121,144]}}', 'c', 'd', 2);
|
||||
SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300], "c":{"d":[121,144]}}', 'c', 'd', 3);
|
||||
SELECT JSONExtractRaw('{"passed": true}');
|
||||
SELECT JSONExtractRaw('{}');
|
||||
select JSONExtractRaw('{"abc":"\\n\\u0000"}', 'abc');
|
||||
select JSONExtractRaw('{"abc":"\\u263a"}', 'abc');
|
@ -1,16 +0,0 @@
|
||||
4
|
||||
Object
|
||||
1
|
||||
1
|
||||
a
|
||||
hello
|
||||
hello
|
||||
3
|
||||
Array
|
||||
-100
|
||||
200
|
||||
300
|
||||
('a','hello','b',[-100,200,300])
|
||||
[-100,NULL,300]
|
||||
['a','hello','b',NULL]
|
||||
[(NULL,NULL,NULL),(NULL,NULL,NULL),(NULL,NULL,NULL),(-100,200,44)]
|
@ -1,16 +0,0 @@
|
||||
select jsonLength('{"a": "hello", "b": [-100, 200.0, 300]}');
|
||||
select jsonType('{"a": "hello", "b": [-100, 200.0, 300]}');
|
||||
select jsonHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'a');
|
||||
select jsonHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'b');
|
||||
select jsonExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 1);
|
||||
select jsonExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 2);
|
||||
select jsonExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 'a');
|
||||
select jsonLength('{"a": "hello", "b": [-100, 200.0, 300]}', 'b');
|
||||
select jsonType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b');
|
||||
select jsonExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1);
|
||||
select jsonExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2);
|
||||
select jsonExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1);
|
||||
select jsonExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Tuple(String, String, String, Array(Float64))');
|
||||
select jsonExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Array(Int32)', 'b');
|
||||
select jsonExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Array(String)');
|
||||
select jsonExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Array(Tuple(Int16, Float32, UInt8))');
|
@ -60,98 +60,138 @@ There is currently no support for code points in the format `\uXXXX\uYYYY` that
|
||||
|
||||
The following functions are based on [simdjson](https://github.com/lemire/simdjson) designed for more complex JSON parsing requirements. The assumption 2 mentioned above still applies.
|
||||
|
||||
## jsonHas(params[, accessors]...)
|
||||
## JSONHas(json[, indices_or_keys]...)
|
||||
|
||||
If the value exists in the JSON document, `1` will be returned.
|
||||
|
||||
If the value does not exist, `null` will be returned.
|
||||
If the value does not exist, `0` will be returned.
|
||||
|
||||
Examples:
|
||||
|
||||
```
|
||||
select jsonHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 1
|
||||
select jsonHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 4) = null
|
||||
select JSONHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 1
|
||||
select JSONHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 4) = 0
|
||||
```
|
||||
|
||||
An accessor can be either a string, a positive integer or a negative integer.
|
||||
`indices_or_keys` is a list of zero or more arguments each of them can be either string or integer.
|
||||
|
||||
* String = access object member by key.
|
||||
* Positive integer = access the n-th member/key from the beginning.
|
||||
* Negative integer = access the n-th member/key from the end.
|
||||
|
||||
You may use integers to access both JSON arrays and JSON objects. JSON objects are accessed as an array with the `[key, value, key, value, ...]` layout.
|
||||
You may use integers to access both JSON arrays and JSON objects.
|
||||
|
||||
So, for example:
|
||||
|
||||
```
|
||||
select jsonExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 1) = 'a'
|
||||
select jsonExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 2) = 'hello'
|
||||
select jsonExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', -2) = 'b'
|
||||
select JSONExtractKey('{"a": "hello", "b": [-100, 200.0, 300]}', 1) = 'a'
|
||||
select JSONExtractKey('{"a": "hello", "b": [-100, 200.0, 300]}', 2) = 'b'
|
||||
select JSONExtractKey('{"a": "hello", "b": [-100, 200.0, 300]}', -1) = 'b'
|
||||
select JSONExtractKey('{"a": "hello", "b": [-100, 200.0, 300]}', -2) = 'a'
|
||||
select JSONExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 1) = 'hello'
|
||||
```
|
||||
|
||||
## jsonLength(params[, accessors]...)
|
||||
## JSONLength(json[, indices_or_keys]...)
|
||||
|
||||
Return the length of a JSON array or a JSON object. For JSON objects, both keys and values are included.
|
||||
Return the length of a JSON array or a JSON object.
|
||||
|
||||
If the value does not exist or has a wrong type, `null` will be returned.
|
||||
If the value does not exist or has a wrong type, `0` will be returned.
|
||||
|
||||
Examples:
|
||||
|
||||
```
|
||||
select jsonLength('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 3
|
||||
select jsonLength('{"a": "hello", "b": [-100, 200.0, 300]}') = 4
|
||||
select JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 3
|
||||
select JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}') = 2
|
||||
```
|
||||
|
||||
The usage of accessors is the same as above.
|
||||
|
||||
## jsonType(params[, accessors]...)
|
||||
## JSONType(json[, indices_or_keys]...)
|
||||
|
||||
Return the type of a JSON value.
|
||||
|
||||
If the value does not exist, `null` will be returned.
|
||||
If the value does not exist, `Null` will be returned.
|
||||
|
||||
Examples:
|
||||
|
||||
```
|
||||
select jsonType('{"a": "hello", "b": [-100, 200.0, 300]}') = 'Object'
|
||||
select jsonType('{"a": "hello", "b": [-100, 200.0, 300]}', 'a') = 'String'
|
||||
select jsonType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 'Array'
|
||||
select JSONType('{"a": "hello", "b": [-100, 200.0, 300]}') = 'Object'
|
||||
select JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'a') = 'String'
|
||||
select JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 'Array'
|
||||
```
|
||||
|
||||
The usage of accessors is the same as above.
|
||||
## JSONExtractUInt(json[, indices_or_keys]...)
|
||||
## JSONExtractInt(json[, indices_or_keys]...)
|
||||
## JSONExtractFloat(json[, indices_or_keys]...)
|
||||
## JSONExtractBool(json[, indices_or_keys]...)
|
||||
|
||||
## jsonExtractUInt(params[, accessors]...)
|
||||
## jsonExtractInt(params[, accessors]...)
|
||||
## jsonExtractFloat(params[, accessors]...)
|
||||
## jsonExtractBool(params[, accessors]...)
|
||||
## jsonExtractString(params[, accessors]...)
|
||||
Parses a JSON and extract a value. These functions are similar to `visitParam` functions.
|
||||
|
||||
Parse data from JSON values which is similar to `visitParam` functions.
|
||||
|
||||
If the value does not exist or has a wrong type, `null` will be returned.
|
||||
If the value does not exist or has a wrong type, `0` will be returned.
|
||||
|
||||
Examples:
|
||||
|
||||
```
|
||||
select jsonExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 'a') = 'hello'
|
||||
select jsonExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) = -100
|
||||
select jsonExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2) = 200.0
|
||||
select jsonExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1) = 300
|
||||
select JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) = -100
|
||||
select JSONExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2) = 200.0
|
||||
select JSONExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1) = 300
|
||||
```
|
||||
|
||||
The usage of accessors is the same as above.
|
||||
## JSONExtractString(json[, indices_or_keys]...)
|
||||
|
||||
## jsonExtract(params, type[, accessors]...)
|
||||
Parses a JSON and extract a string. This function is similar to `visitParamExtractString` functions.
|
||||
|
||||
Parse data from JSON values with a given ClickHouse data type.
|
||||
If the value does not exist or has a wrong type, an empty string will be returned.
|
||||
|
||||
If the value does not exist or has a wrong type, `null` will be returned.
|
||||
The value is unescaped. If unescaping failed, it returns an empty string.
|
||||
|
||||
Examples:
|
||||
|
||||
```
|
||||
select jsonExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Int8', 'b', 1) = -100
|
||||
select jsonExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Tuple(String, String, String, Array(Float64))') = ('a', 'hello', 'b', [-100.0, 200.0, 300.0])
|
||||
select JSONExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 'a') = 'hello'
|
||||
select JSONExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0'
|
||||
select JSONExtractString('{"abc":"\\u263a"}', 'abc') = '☺'
|
||||
select JSONExtractString('{"abc":"\\u263"}', 'abc') = ''
|
||||
select JSONExtractString('{"abc":"hello}', 'abc') = ''
|
||||
```
|
||||
|
||||
The usage of accessors is the same as above.
|
||||
## JSONExtract(json[, indices_or_keys...], return_type)
|
||||
|
||||
Parses a JSON and extract a value of the given ClickHouse data type.
|
||||
|
||||
This is a generalization of the previous `JSONExtract<type>` functions.
|
||||
This means
|
||||
`JSONExtract(..., 'String')` returns exactly the same as `JSONExtractString()`,
|
||||
`JSONExtract(..., 'Float64')` returns exactly the same as `JSONExtractFloat()`.
|
||||
|
||||
Examples:
|
||||
|
||||
```
|
||||
SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Tuple(String, Array(Float64))') = ('hello',[-100,200,300])
|
||||
SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Tuple(b Array(Float64), a String)') = ([-100,200,300],'hello')
|
||||
SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Nullable(Int8))') = [-100, NULL, NULL]
|
||||
SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 4, 'Nullable(Int64)') = NULL
|
||||
SELECT JSONExtract('{"passed": true}', 'passed', 'UInt8') = 1
|
||||
SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Thursday'
|
||||
SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Friday'
|
||||
```
|
||||
|
||||
## JSONExtractKeysAndValues(json[, indices_or_keys...], value_type)
|
||||
|
||||
Parse key-value pairs from a JSON where the values are of the given ClickHouse data type.
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
SELECT JSONExtractKeysAndValues('{"x": {"a": 5, "b": 7, "c": 11}}', 'x', 'Int8') = [('a',5),('b',7),('c',11)];
|
||||
```
|
||||
|
||||
## JSONExtractRaw(json[, indices_or_keys]...)
|
||||
|
||||
Returns a part of JSON.
|
||||
|
||||
If the part does not exist or has a wrong type, an empty string will be returned.
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
select JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = '[-100, 200.0, 300]'
|
||||
```
|
||||
|
Loading…
Reference in New Issue
Block a user