ClickHouse/dbms/src/Functions/FunctionsJSON.h

252 lines
7.9 KiB
C++
Raw Normal View History

2019-03-14 02:55:04 +00:00
#pragma once
#include <Functions/IFunction.h>
#include <Common/config.h>
#if USE_SIMDJSON
2019-03-14 02:55:04 +00:00
#include <Columns/ColumnConst.h>
2019-03-14 08:07:25 +00:00
#include <Columns/ColumnString.h>
2019-03-14 05:48:29 +00:00
#include <DataTypes/DataTypeFactory.h>
2019-03-14 08:30:15 +00:00
#include <Common/typeid_cast.h>
2019-04-26 21:58:14 +00:00
#include <ext/range.h>
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wold-style-cast"
#pragma clang diagnostic ignored "-Wnewline-eof"
2019-04-26 21:58:14 +00:00
#endif
2019-03-14 02:55:04 +00:00
#include <simdjson/jsonparser.h>
2019-04-26 21:58:14 +00:00
#ifdef __clang__
#pragma clang diagnostic pop
#endif
2019-03-14 02:55:04 +00:00
namespace DB
{
namespace ErrorCodes
{
2019-03-14 08:07:25 +00:00
extern const int CANNOT_ALLOCATE_MEMORY;
2019-03-14 06:19:21 +00:00
extern const int ILLEGAL_COLUMN;
2019-03-14 02:55:04 +00:00
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
template <typename Impl, bool ExtraArg>
class FunctionJSONBase : public IFunction
{
private:
enum class Action
{
key = 1,
index = 2,
};
mutable std::vector<Action> actions;
mutable DataTypePtr virtual_type;
2019-03-14 08:30:15 +00:00
bool tryMove(ParsedJson::iterator & pjh, Action action, const Field & accessor)
2019-03-14 02:55:04 +00:00
{
switch (action)
{
case Action::key:
2019-03-14 08:30:15 +00:00
if (!pjh.is_object() || !pjh.move_to_key(accessor.get<String>().data()))
2019-03-14 02:55:04 +00:00
return false;
break;
case Action::index:
2019-03-14 08:30:15 +00:00
if (!pjh.is_object_or_array() || !pjh.down())
2019-03-14 02:55:04 +00:00
return false;
int index = accessor.get<Int64>();
size_t steps;
if (index > 0)
2019-03-14 02:55:04 +00:00
{
if (pjh.get_scope_type() == '{')
steps = index * 2 - 1;
else
steps = index - 1;
}
else if (index < 0)
{
size_t steps_to_end = 0;
2019-03-14 08:30:15 +00:00
ParsedJson::iterator pjh1{pjh};
2019-03-14 02:55:04 +00:00
while (pjh1.next())
++steps_to_end;
if (pjh.get_scope_type() == '{')
steps = index * 2 + steps_to_end + 2;
else
steps = index + steps_to_end + 1;
2019-03-14 02:55:04 +00:00
}
else
return false;
for (const auto i : ext::range(0, steps))
{
2019-03-14 08:30:15 +00:00
(void)i;
2019-03-14 02:55:04 +00:00
if (!pjh.next())
return false;
}
break;
}
return true;
}
public:
static constexpr auto name = Impl::name;
2019-03-14 08:30:15 +00:00
static FunctionPtr create(const Context &) { return std::make_shared<FunctionJSONBase>(); }
2019-03-14 02:55:04 +00:00
2019-03-14 08:30:15 +00:00
String getName() const override { return Impl::name; }
2019-03-14 02:55:04 +00:00
2019-03-14 08:30:15 +00:00
bool isVariadic() const override { return true; }
2019-03-14 02:55:04 +00:00
2019-03-14 08:30:15 +00:00
size_t getNumberOfArguments() const override { return 0; }
2019-03-14 02:55:04 +00:00
2019-03-14 08:30:15 +00:00
bool useDefaultImplementationForConstants() const override { return true; }
2019-03-14 02:55:04 +00:00
2019-03-14 08:30:15 +00:00
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
2019-03-14 02:55:04 +00:00
{
if constexpr (ExtraArg)
{
if (arguments.size() < 2)
2019-03-14 08:30:15 +00:00
throw Exception{"Function " + getName() + " requires at least two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
2019-03-14 02:55:04 +00:00
auto col_type_const = typeid_cast<const ColumnConst *>(arguments[arguments.size() - 1].column.get());
2019-03-14 05:48:29 +00:00
2019-03-14 06:19:21 +00:00
if (!col_type_const)
throw Exception{"Illegal non-const column " + arguments[arguments.size() - 1].column->getName() + " of argument of function " + getName(),
2019-03-14 08:30:15 +00:00
ErrorCodes::ILLEGAL_COLUMN};
virtual_type = DataTypeFactory::instance().get(col_type_const->getValue<String>());
2019-03-14 02:55:04 +00:00
}
else
{
if (arguments.size() < 1)
2019-03-14 08:30:15 +00:00
throw Exception{"Function " + getName() + " requires at least one arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
2019-03-14 02:55:04 +00:00
}
2019-03-14 05:48:29 +00:00
if (!isString(arguments[0].type))
2019-03-14 08:30:15 +00:00
throw Exception{"Illegal type " + arguments[0].type->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
2019-03-14 02:55:04 +00:00
actions.reserve(arguments.size() - 1 - ExtraArg);
for (const auto i : ext::range(1, arguments.size() - ExtraArg))
2019-03-14 02:55:04 +00:00
{
2019-03-14 05:48:29 +00:00
if (isString(arguments[i].type))
2019-03-14 02:55:04 +00:00
actions.push_back(Action::key);
2019-03-14 05:48:29 +00:00
else if (isInteger(arguments[i].type))
2019-03-14 02:55:04 +00:00
actions.push_back(Action::index);
else
2019-03-14 08:30:15 +00:00
throw Exception{"Illegal type " + arguments[i].type->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
2019-03-14 02:55:04 +00:00
}
if constexpr (ExtraArg)
return Impl::getType(virtual_type);
else
return Impl::getType();
}
2019-03-14 08:30:15 +00:00
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result_pos, size_t input_rows_count) override
2019-03-14 02:55:04 +00:00
{
2019-03-14 08:30:15 +00:00
MutableColumnPtr to{block.getByPosition(result_pos).type->createColumn()};
2019-03-14 02:55:04 +00:00
to->reserve(input_rows_count);
const ColumnPtr & arg_json = block.getByPosition(arguments[0]).column;
2019-03-14 08:30:15 +00:00
auto col_json_const = typeid_cast<const ColumnConst *>(arg_json.get());
2019-03-14 08:07:25 +00:00
2019-03-14 08:30:15 +00:00
auto col_json_string
= typeid_cast<const ColumnString *>(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get());
2019-03-14 08:07:25 +00:00
if (!col_json_string)
2019-03-14 08:30:15 +00:00
throw Exception{"Illegal column " + arg_json->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
2019-03-14 08:07:25 +00:00
const ColumnString::Chars & chars = col_json_string->getChars();
const ColumnString::Offsets & offsets = col_json_string->getOffsets();
size_t max_size = 1;
2019-03-14 02:55:04 +00:00
for (const auto i : ext::range(0, input_rows_count))
2019-03-14 08:07:25 +00:00
if (max_size < offsets[i] - offsets[i - 1] - 1)
max_size = offsets[i] - offsets[i - 1] - 1;
ParsedJson pj;
if (!pj.allocateCapacity(max_size))
2019-03-14 08:30:15 +00:00
throw Exception{"Can not allocate memory for " + std::to_string(max_size) + " units when parsing JSON",
ErrorCodes::CANNOT_ALLOCATE_MEMORY};
2019-03-14 08:07:25 +00:00
for (const auto i : ext::range(0, input_rows_count))
{
2019-03-14 08:30:15 +00:00
bool ok = json_parse(&chars[offsets[i - 1]], offsets[i] - offsets[i - 1] - 1, pj) == 0;
2019-03-14 08:07:25 +00:00
2019-03-14 08:30:15 +00:00
ParsedJson::iterator pjh{pj};
2019-03-14 02:55:04 +00:00
for (const auto j : ext::range(0, actions.size()))
{
2019-03-14 08:07:25 +00:00
if (!ok)
break;
ok = tryMove(pjh, actions[j], (*block.getByPosition(arguments[j + 1]).column)[i]);
2019-03-14 02:55:04 +00:00
}
if (ok)
{
if constexpr (ExtraArg)
to->insert(Impl::getValue(pjh, virtual_type));
else
to->insert(Impl::getValue(pjh));
}
else
{
if constexpr (ExtraArg)
to->insert(Impl::getDefault(virtual_type));
else
to->insert(Impl::getDefault());
}
}
block.getByPosition(result_pos).column = std::move(to);
}
};
}
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
template <typename Impl>
class FunctionJSONDummy : public IFunction
{
public:
static constexpr auto name = Impl::name;
static FunctionPtr create(const Context &) { return std::make_shared<FunctionJSONDummy>(); }
String getName() const override { return Impl::name; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName &) const override
{
throw Exception{"Function " + getName() + " is not supported without AVX2", ErrorCodes::NOT_IMPLEMENTED};
}
void executeImpl(Block &, const ColumnNumbers &, size_t, size_t) override
{
throw Exception{"Function " + getName() + " is not supported without AVX2", ErrorCodes::NOT_IMPLEMENTED};
}
};
}