2023-09-06 16:11:16 +00:00
|
|
|
#include <Columns/ColumnString.h>
|
|
|
|
#include <DataTypes/DataTypeString.h>
|
|
|
|
#include <Functions/FunctionFactory.h>
|
|
|
|
#include <Functions/FunctionHelpers.h>
|
|
|
|
#include <Functions/IFunction.h>
|
|
|
|
#include <Interpreters/Context.h>
|
|
|
|
#include <IO/ReadBufferFromString.h>
|
|
|
|
#include <Common/FieldVisitorToString.h>
|
2023-09-07 02:10:33 +00:00
|
|
|
#include "config.h"
|
|
|
|
|
|
|
|
#if USE_RAPIDJSON
|
2023-09-06 16:11:16 +00:00
|
|
|
|
|
|
|
#include "rapidjson/document.h"
|
|
|
|
#include "rapidjson/writer.h"
|
|
|
|
#include "rapidjson/stringbuffer.h"
|
|
|
|
#include "rapidjson/filewritestream.h"
|
|
|
|
#include "rapidjson/prettywriter.h"
|
|
|
|
#include "rapidjson/filereadstream.h"
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2023-10-25 15:08:27 +00:00
|
|
|
|
2023-09-06 16:11:16 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2023-10-25 15:08:27 +00:00
|
|
|
extern const int BAD_ARGUMENTS;
|
2023-09-06 16:11:16 +00:00
|
|
|
extern const int ILLEGAL_COLUMN;
|
|
|
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
2023-10-25 18:53:58 +00:00
|
|
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
2023-09-06 16:11:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
namespace
|
|
|
|
{
|
2023-10-10 06:51:51 +00:00
|
|
|
// select jsonMergePatch('{"a":1}','{"name": "joey"}','{"name": "tom"}','{"name": "zoey"}');
|
2023-09-06 16:11:16 +00:00
|
|
|
// ||
|
|
|
|
// \/
|
|
|
|
// ┌───────────────────────┐
|
|
|
|
// │ {"a":1,"name":"zoey"} │
|
|
|
|
// └───────────────────────┘
|
2023-10-10 06:51:51 +00:00
|
|
|
class FunctionjsonMergePatch : public IFunction
|
2023-09-06 16:11:16 +00:00
|
|
|
{
|
|
|
|
public:
|
2023-10-10 06:51:51 +00:00
|
|
|
static constexpr auto name = "jsonMergePatch";
|
|
|
|
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionjsonMergePatch>(); }
|
2023-09-06 16:11:16 +00:00
|
|
|
|
|
|
|
String getName() const override { return name; }
|
|
|
|
bool isVariadic() const override { return true; }
|
|
|
|
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
|
|
|
|
|
|
|
size_t getNumberOfArguments() const override { return 0; }
|
|
|
|
bool useDefaultImplementationForConstants() const override { return true; }
|
|
|
|
|
|
|
|
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
|
|
|
{
|
|
|
|
if (arguments.empty())
|
|
|
|
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName());
|
|
|
|
|
2023-10-25 15:08:27 +00:00
|
|
|
for (const auto & arg : arguments)
|
|
|
|
if (!isString(arg.type))
|
|
|
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} requires string arguments", getName());
|
|
|
|
|
2023-09-06 16:11:16 +00:00
|
|
|
return std::make_shared<DataTypeString>();
|
|
|
|
}
|
|
|
|
|
|
|
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
|
|
|
{
|
2023-10-25 15:08:27 +00:00
|
|
|
chassert(!arguments.empty());
|
|
|
|
|
|
|
|
rapidjson::Document::AllocatorType allocator;
|
|
|
|
std::function<void(rapidjson::Value &, const rapidjson::Value &)> merge_objects;
|
2023-09-06 16:11:16 +00:00
|
|
|
|
2023-10-25 15:08:27 +00:00
|
|
|
merge_objects = [&merge_objects, &allocator](rapidjson::Value & dest, const rapidjson::Value & src) -> void
|
2023-10-10 06:51:51 +00:00
|
|
|
{
|
|
|
|
if (!src.IsObject())
|
|
|
|
return;
|
2023-10-25 15:08:27 +00:00
|
|
|
|
2023-10-10 06:51:51 +00:00
|
|
|
for (auto it = src.MemberBegin(); it != src.MemberEnd(); ++it)
|
|
|
|
{
|
|
|
|
rapidjson::Value key(it->name, allocator);
|
|
|
|
rapidjson::Value value(it->value, allocator);
|
|
|
|
if (dest.HasMember(key))
|
|
|
|
{
|
|
|
|
if (dest[key].IsObject() && value.IsObject())
|
2023-10-25 15:08:27 +00:00
|
|
|
merge_objects(dest[key], value);
|
2023-10-10 06:51:51 +00:00
|
|
|
else
|
|
|
|
dest[key] = value;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
dest.AddMember(key, value, allocator);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2023-12-02 23:46:31 +00:00
|
|
|
auto parse_json_document = [](const IColumn & column, rapidjson::Document & document, size_t i)
|
2023-09-06 16:11:16 +00:00
|
|
|
{
|
2023-10-25 15:08:27 +00:00
|
|
|
auto str_ref = column.getDataAt(i);
|
|
|
|
const char * json = str_ref.data;
|
|
|
|
|
|
|
|
document.Parse(json);
|
|
|
|
if (document.HasParseError() || !document.IsObject())
|
|
|
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong JSON string to merge. Expected JSON object");
|
|
|
|
};
|
|
|
|
|
2023-12-02 23:46:31 +00:00
|
|
|
const auto * first_column_arg = arguments[0].column.get();
|
2023-12-04 17:27:10 +00:00
|
|
|
const auto * first_column_arg_string = checkAndGetColumn<ColumnString>(first_column_arg);
|
|
|
|
const auto * first_column_arg_const = checkAndGetColumnConst<ColumnString>(first_column_arg);
|
2023-11-30 04:14:42 +00:00
|
|
|
|
2023-12-02 23:46:31 +00:00
|
|
|
if (!first_column_arg_string && !first_column_arg_const)
|
2023-10-25 15:08:27 +00:00
|
|
|
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Arguments of function {} must be strings", getName());
|
|
|
|
|
|
|
|
std::vector<rapidjson::Document> merged_jsons;
|
|
|
|
merged_jsons.reserve(input_rows_count);
|
|
|
|
|
|
|
|
for (size_t i = 0; i < input_rows_count; ++i)
|
|
|
|
{
|
|
|
|
auto & merged_json = merged_jsons.emplace_back(rapidjson::Type::kObjectType, &allocator);
|
2023-12-02 23:46:31 +00:00
|
|
|
parse_json_document(*first_column_arg, merged_json, i);
|
2023-10-25 15:08:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t col_idx = 1; col_idx < arguments.size(); ++col_idx)
|
|
|
|
{
|
2023-12-02 23:46:31 +00:00
|
|
|
const auto * column_arg = arguments[col_idx].column.get();
|
2023-12-04 17:27:10 +00:00
|
|
|
const auto * column_arg_string = checkAndGetColumn<ColumnString>(column_arg);
|
|
|
|
const auto * column_arg_const = checkAndGetColumnConst<ColumnString>(column_arg);
|
2023-11-30 04:14:42 +00:00
|
|
|
|
2023-12-02 23:46:31 +00:00
|
|
|
if (!column_arg_string && !column_arg_const)
|
2023-10-25 15:08:27 +00:00
|
|
|
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Arguments of function {} must be strings", getName());
|
2023-09-06 16:11:16 +00:00
|
|
|
|
|
|
|
for (size_t i = 0; i < input_rows_count; ++i)
|
|
|
|
{
|
2023-10-25 15:08:27 +00:00
|
|
|
rapidjson::Document document(&allocator);
|
2023-12-02 23:46:31 +00:00
|
|
|
parse_json_document(*column_arg, document, i);
|
2023-10-25 15:08:27 +00:00
|
|
|
merge_objects(merged_jsons[i], document);
|
2023-09-06 16:11:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-25 15:08:27 +00:00
|
|
|
auto result = ColumnString::create();
|
|
|
|
auto & result_string = assert_cast<ColumnString &>(*result);
|
|
|
|
rapidjson::CrtAllocator buffer_allocator;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < input_rows_count; ++i)
|
|
|
|
{
|
|
|
|
rapidjson::StringBuffer buffer(&buffer_allocator);
|
|
|
|
rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
|
2023-09-06 16:11:16 +00:00
|
|
|
|
2023-10-25 15:08:27 +00:00
|
|
|
merged_jsons[i].Accept(writer);
|
|
|
|
result_string.insertData(buffer.GetString(), buffer.GetSize());
|
|
|
|
}
|
2023-09-06 16:11:16 +00:00
|
|
|
|
2023-10-25 15:08:27 +00:00
|
|
|
return result;
|
2023-09-06 16:11:16 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2023-10-10 06:51:51 +00:00
|
|
|
REGISTER_FUNCTION(jsonMergePatch)
|
2023-09-06 16:11:16 +00:00
|
|
|
{
|
2023-10-10 06:51:51 +00:00
|
|
|
factory.registerFunction<FunctionjsonMergePatch>(FunctionDocumentation{
|
2023-10-25 15:08:27 +00:00
|
|
|
.description="Returns the merged JSON object string, which is formed by merging multiple JSON objects."});
|
2023-09-06 16:11:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|