ClickHouse/src/Functions/jsonMergePatch.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

166 lines
6.4 KiB
C++
Raw Normal View History

2023-09-06 16:11:16 +00:00
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Interpreters/Context.h>
#include <IO/ReadBufferFromString.h>
#include <Common/FieldVisitorToString.h>
2023-09-07 02:10:33 +00:00
#include "config.h"
#if USE_RAPIDJSON
2023-09-06 16:11:16 +00:00
#include "rapidjson/document.h"
#include "rapidjson/writer.h"
#include "rapidjson/stringbuffer.h"
#include "rapidjson/filewritestream.h"
#include "rapidjson/prettywriter.h"
#include "rapidjson/filereadstream.h"
namespace DB
{
2023-09-06 16:11:16 +00:00
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
2023-09-06 16:11:16 +00:00
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
2023-10-25 18:53:58 +00:00
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
2023-09-06 16:11:16 +00:00
}
namespace
{
2023-10-10 06:51:51 +00:00
// select jsonMergePatch('{"a":1}','{"name": "joey"}','{"name": "tom"}','{"name": "zoey"}');
2023-09-06 16:11:16 +00:00
// ||
// \/
// ┌───────────────────────┐
// │ {"a":1,"name":"zoey"} │
// └───────────────────────┘
2023-10-10 06:51:51 +00:00
class FunctionjsonMergePatch : public IFunction
2023-09-06 16:11:16 +00:00
{
public:
2023-10-10 06:51:51 +00:00
static constexpr auto name = "jsonMergePatch";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionjsonMergePatch>(); }
2023-09-06 16:11:16 +00:00
String getName() const override { return name; }
bool isVariadic() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
bool useDefaultImplementationForConstants() const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName());
for (const auto & arg : arguments)
if (!isString(arg.type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} requires string arguments", getName());
2023-09-06 16:11:16 +00:00
return std::make_shared<DataTypeString>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
chassert(!arguments.empty());
rapidjson::Document::AllocatorType allocator;
std::function<void(rapidjson::Value &, const rapidjson::Value &)> merge_objects;
2023-09-06 16:11:16 +00:00
merge_objects = [&merge_objects, &allocator](rapidjson::Value & dest, const rapidjson::Value & src) -> void
2023-10-10 06:51:51 +00:00
{
if (!src.IsObject())
return;
2023-10-10 06:51:51 +00:00
for (auto it = src.MemberBegin(); it != src.MemberEnd(); ++it)
{
rapidjson::Value key(it->name, allocator);
rapidjson::Value value(it->value, allocator);
if (dest.HasMember(key))
{
if (dest[key].IsObject() && value.IsObject())
merge_objects(dest[key], value);
2023-10-10 06:51:51 +00:00
else
dest[key] = value;
}
else
{
dest.AddMember(key, value, allocator);
}
}
};
2023-12-02 23:46:31 +00:00
auto parse_json_document = [](const IColumn & column, rapidjson::Document & document, size_t i)
2023-09-06 16:11:16 +00:00
{
auto str_ref = column.getDataAt(i);
const char * json = str_ref.data;
document.Parse(json);
if (document.HasParseError() || !document.IsObject())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong JSON string to merge. Expected JSON object");
};
2023-12-02 23:46:31 +00:00
const auto * first_column_arg = arguments[0].column.get();
2023-12-04 17:27:10 +00:00
const auto * first_column_arg_string = checkAndGetColumn<ColumnString>(first_column_arg);
const auto * first_column_arg_const = checkAndGetColumnConst<ColumnString>(first_column_arg);
2023-11-30 04:14:42 +00:00
2023-12-02 23:46:31 +00:00
if (!first_column_arg_string && !first_column_arg_const)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Arguments of function {} must be strings", getName());
std::vector<rapidjson::Document> merged_jsons;
merged_jsons.reserve(input_rows_count);
for (size_t i = 0; i < input_rows_count; ++i)
{
auto & merged_json = merged_jsons.emplace_back(rapidjson::Type::kObjectType, &allocator);
2023-12-02 23:46:31 +00:00
parse_json_document(*first_column_arg, merged_json, i);
}
for (size_t col_idx = 1; col_idx < arguments.size(); ++col_idx)
{
2023-12-02 23:46:31 +00:00
const auto * column_arg = arguments[col_idx].column.get();
2023-12-04 17:27:10 +00:00
const auto * column_arg_string = checkAndGetColumn<ColumnString>(column_arg);
const auto * column_arg_const = checkAndGetColumnConst<ColumnString>(column_arg);
2023-11-30 04:14:42 +00:00
2023-12-02 23:46:31 +00:00
if (!column_arg_string && !column_arg_const)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Arguments of function {} must be strings", getName());
2023-09-06 16:11:16 +00:00
for (size_t i = 0; i < input_rows_count; ++i)
{
rapidjson::Document document(&allocator);
2023-12-02 23:46:31 +00:00
parse_json_document(*column_arg, document, i);
merge_objects(merged_jsons[i], document);
2023-09-06 16:11:16 +00:00
}
}
auto result = ColumnString::create();
auto & result_string = assert_cast<ColumnString &>(*result);
rapidjson::CrtAllocator buffer_allocator;
for (size_t i = 0; i < input_rows_count; ++i)
{
rapidjson::StringBuffer buffer(&buffer_allocator);
rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
2023-09-06 16:11:16 +00:00
merged_jsons[i].Accept(writer);
result_string.insertData(buffer.GetString(), buffer.GetSize());
}
2023-09-06 16:11:16 +00:00
return result;
2023-09-06 16:11:16 +00:00
}
};
}
2023-10-10 06:51:51 +00:00
REGISTER_FUNCTION(jsonMergePatch)
2023-09-06 16:11:16 +00:00
{
2023-10-10 06:51:51 +00:00
factory.registerFunction<FunctionjsonMergePatch>(FunctionDocumentation{
.description="Returns the merged JSON object string, which is formed by merging multiple JSON objects."});
2023-09-06 16:11:16 +00:00
}
}
#endif