2021-03-24 19:47:28 +00:00
|
|
|
#pragma once
|
|
|
|
|
2021-06-04 22:31:55 +00:00
|
|
|
#include <sstream>
|
2021-03-24 19:47:28 +00:00
|
|
|
#include <type_traits>
|
2021-05-15 10:10:19 +00:00
|
|
|
#include <Columns/ColumnConst.h>
|
2021-06-04 22:31:55 +00:00
|
|
|
#include <Columns/ColumnString.h>
|
2021-05-29 12:34:39 +00:00
|
|
|
#include <Columns/ColumnsNumber.h>
|
2021-03-24 19:47:28 +00:00
|
|
|
#include <Core/Settings.h>
|
|
|
|
#include <DataTypes/DataTypeString.h>
|
2021-05-15 10:10:19 +00:00
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
2021-03-24 19:47:28 +00:00
|
|
|
#include <Functions/DummyJSONParser.h>
|
2021-06-04 22:31:55 +00:00
|
|
|
#include <Functions/IFunction.h>
|
2021-03-24 19:47:28 +00:00
|
|
|
#include <Functions/JSONPath/ASTs/ASTJSONPath.h>
|
2021-07-04 09:10:16 +00:00
|
|
|
#include <Functions/JSONPath/Generator/GeneratorJSONPath.h>
|
2021-03-24 19:47:28 +00:00
|
|
|
#include <Functions/JSONPath/Parsers/ParserJSONPath.h>
|
|
|
|
#include <Functions/RapidJSONParser.h>
|
|
|
|
#include <Functions/SimdJSONParser.h>
|
|
|
|
#include <Interpreters/Context.h>
|
|
|
|
#include <Parsers/IParser.h>
|
|
|
|
#include <Parsers/Lexer.h>
|
2021-06-17 11:48:05 +00:00
|
|
|
#include <common/range.h>
|
2021-03-24 19:47:28 +00:00
|
|
|
|
|
|
|
#if !defined(ARCADIA_BUILD)
|
|
|
|
# include "config_functions.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2021-06-25 15:33:31 +00:00
|
|
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
|
|
|
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
|
|
|
|
extern const int BAD_ARGUMENTS;
|
2021-03-24 19:47:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
class FunctionSQLJSONHelpers
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
template <typename Name, template <typename> typename Impl, class JSONParser>
|
|
|
|
class Executor
|
|
|
|
{
|
|
|
|
public:
|
2021-06-25 16:24:22 +00:00
|
|
|
static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth)
|
2021-03-24 19:47:28 +00:00
|
|
|
{
|
|
|
|
MutableColumnPtr to{result_type->createColumn()};
|
|
|
|
to->reserve(input_rows_count);
|
|
|
|
|
|
|
|
if (arguments.size() < 2)
|
|
|
|
{
|
|
|
|
throw Exception{"JSONPath functions require at least 2 arguments", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION};
|
|
|
|
}
|
|
|
|
|
|
|
|
const auto & first_column = arguments[0];
|
2021-06-05 12:27:37 +00:00
|
|
|
|
|
|
|
/// Check 1 argument: must be of type String (JSONPath)
|
2021-03-24 19:47:28 +00:00
|
|
|
if (!isString(first_column.type))
|
|
|
|
{
|
2021-06-05 12:13:46 +00:00
|
|
|
throw Exception(
|
2021-03-24 19:47:28 +00:00
|
|
|
"JSONPath functions require 1 argument to be JSONPath of type string, illegal type: " + first_column.type->getName(),
|
2021-06-05 12:13:46 +00:00
|
|
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
2021-03-24 19:47:28 +00:00
|
|
|
}
|
2021-06-05 12:27:37 +00:00
|
|
|
/// Check 1 argument: must be const (JSONPath)
|
|
|
|
if (!isColumnConst(*first_column.column))
|
|
|
|
{
|
|
|
|
throw Exception("1 argument (JSONPath) must be const", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
}
|
2021-03-24 19:47:28 +00:00
|
|
|
|
|
|
|
const auto & second_column = arguments[1];
|
2021-06-05 12:27:37 +00:00
|
|
|
|
|
|
|
/// Check 2 argument: must be of type String (JSON)
|
2021-03-24 19:47:28 +00:00
|
|
|
if (!isString(second_column.type))
|
|
|
|
{
|
2021-06-05 12:13:46 +00:00
|
|
|
throw Exception(
|
2021-03-24 19:47:28 +00:00
|
|
|
"JSONPath functions require 2 argument to be JSON of string, illegal type: " + second_column.type->getName(),
|
2021-06-05 12:13:46 +00:00
|
|
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
2021-03-24 19:47:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
const ColumnPtr & arg_jsonpath = first_column.column;
|
|
|
|
const auto * arg_jsonpath_const = typeid_cast<const ColumnConst *>(arg_jsonpath.get());
|
|
|
|
const auto * arg_jsonpath_string = typeid_cast<const ColumnString *>(arg_jsonpath_const->getDataColumnPtr().get());
|
2021-06-05 12:27:37 +00:00
|
|
|
|
2021-03-24 19:47:28 +00:00
|
|
|
const ColumnPtr & arg_json = second_column.column;
|
|
|
|
const auto * col_json_const = typeid_cast<const ColumnConst *>(arg_json.get());
|
|
|
|
const auto * col_json_string
|
|
|
|
= typeid_cast<const ColumnString *>(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get());
|
|
|
|
|
|
|
|
/// Get data and offsets for 1 argument (JSONPath)
|
|
|
|
const ColumnString::Chars & chars_path = arg_jsonpath_string->getChars();
|
|
|
|
const ColumnString::Offsets & offsets_path = arg_jsonpath_string->getOffsets();
|
|
|
|
|
2021-06-25 16:24:22 +00:00
|
|
|
/// Prepare to parse 1 argument (JSONPath)
|
2021-03-24 19:47:28 +00:00
|
|
|
const char * query_begin = reinterpret_cast<const char *>(&chars_path[0]);
|
|
|
|
const char * query_end = query_begin + offsets_path[0] - 1;
|
|
|
|
|
|
|
|
/// Tokenize query
|
|
|
|
Tokens tokens(query_begin, query_end);
|
|
|
|
/// Max depth 0 indicates that depth is not limited
|
2021-06-25 16:24:22 +00:00
|
|
|
IParser::Pos token_iterator(tokens, parse_depth);
|
2021-03-24 19:47:28 +00:00
|
|
|
|
|
|
|
/// Parse query and create AST tree
|
|
|
|
Expected expected;
|
|
|
|
ASTPtr res;
|
|
|
|
ParserJSONPath parser;
|
|
|
|
const bool parse_res = parser.parse(token_iterator, res, expected);
|
|
|
|
if (!parse_res)
|
|
|
|
{
|
|
|
|
throw Exception{"Unable to parse JSONPath", ErrorCodes::BAD_ARGUMENTS};
|
|
|
|
}
|
|
|
|
|
2021-06-25 16:24:22 +00:00
|
|
|
/// Get data and offsets for 2 argument (JSON)
|
2021-03-24 19:47:28 +00:00
|
|
|
const ColumnString::Chars & chars_json = col_json_string->getChars();
|
|
|
|
const ColumnString::Offsets & offsets_json = col_json_string->getOffsets();
|
|
|
|
|
|
|
|
JSONParser json_parser;
|
|
|
|
using Element = typename JSONParser::Element;
|
|
|
|
Element document;
|
|
|
|
bool document_ok = false;
|
|
|
|
|
|
|
|
/// Parse JSON for every row
|
|
|
|
Impl<JSONParser> impl;
|
2021-05-15 10:10:19 +00:00
|
|
|
|
2021-06-17 11:48:05 +00:00
|
|
|
for (const auto i : collections::range(0, input_rows_count))
|
2021-03-24 19:47:28 +00:00
|
|
|
{
|
|
|
|
std::string_view json{
|
|
|
|
reinterpret_cast<const char *>(&chars_json[offsets_json[i - 1]]), offsets_json[i] - offsets_json[i - 1] - 1};
|
|
|
|
document_ok = json_parser.parse(json, document);
|
|
|
|
|
|
|
|
bool added_to_column = false;
|
|
|
|
if (document_ok)
|
|
|
|
{
|
|
|
|
added_to_column = impl.insertResultToColumn(*to, document, res);
|
|
|
|
}
|
|
|
|
if (!added_to_column)
|
|
|
|
{
|
|
|
|
to->insertDefault();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return to;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename Name, template <typename> typename Impl>
|
2021-06-03 17:47:53 +00:00
|
|
|
class FunctionSQLJSON : public IFunction, WithConstContext
|
2021-03-24 19:47:28 +00:00
|
|
|
{
|
|
|
|
public:
|
2021-07-06 10:08:09 +00:00
|
|
|
static FunctionPtr create(ContextPtr context_) { return std::make_shared<FunctionSQLJSON>(context_); }
|
|
|
|
explicit FunctionSQLJSON(ContextPtr context_) : WithConstContext(context_) { }
|
2021-03-24 19:47:28 +00:00
|
|
|
|
|
|
|
static constexpr auto name = Name::name;
|
|
|
|
String getName() const override { return Name::name; }
|
|
|
|
bool isVariadic() const override { return true; }
|
|
|
|
size_t getNumberOfArguments() const override { return 0; }
|
|
|
|
bool useDefaultImplementationForConstants() const override { return true; }
|
2021-06-05 12:27:37 +00:00
|
|
|
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; }
|
2021-03-24 19:47:28 +00:00
|
|
|
|
|
|
|
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
|
|
|
{
|
|
|
|
return Impl<DummyJSONParser>::getReturnType(Name::name, arguments);
|
|
|
|
}
|
|
|
|
|
|
|
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
|
|
|
{
|
|
|
|
/// Choose JSONParser.
|
|
|
|
/// 1. Lexer(path) -> Tokens
|
|
|
|
/// 2. Create ASTPtr
|
|
|
|
/// 3. Parser(Tokens, ASTPtr) -> complete AST
|
2021-06-25 16:24:22 +00:00
|
|
|
/// 4. Execute functions: call getNextItem on generator and handle each item
|
|
|
|
uint32_t parse_depth = getContext()->getSettingsRef().max_parser_depth;
|
2021-03-24 19:47:28 +00:00
|
|
|
#if USE_SIMDJSON
|
2021-06-03 17:47:53 +00:00
|
|
|
if (getContext()->getSettingsRef().allow_simdjson)
|
2021-06-25 16:24:22 +00:00
|
|
|
return FunctionSQLJSONHelpers::Executor<Name, Impl, SimdJSONParser>::run(arguments, result_type, input_rows_count, parse_depth);
|
2021-03-24 19:47:28 +00:00
|
|
|
#endif
|
2021-06-25 16:24:22 +00:00
|
|
|
return FunctionSQLJSONHelpers::Executor<Name, Impl, DummyJSONParser>::run(arguments, result_type, input_rows_count, parse_depth);
|
2021-03-24 19:47:28 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2021-05-15 10:10:19 +00:00
|
|
|
struct NameJSONExists
|
2021-03-24 19:47:28 +00:00
|
|
|
{
|
2021-05-15 10:10:19 +00:00
|
|
|
static constexpr auto name{"JSON_EXISTS"};
|
2021-03-24 19:47:28 +00:00
|
|
|
};
|
|
|
|
|
2021-05-15 10:10:19 +00:00
|
|
|
struct NameJSONValue
|
2021-03-24 19:47:28 +00:00
|
|
|
{
|
2021-05-15 10:10:19 +00:00
|
|
|
static constexpr auto name{"JSON_VALUE"};
|
|
|
|
};
|
|
|
|
|
|
|
|
struct NameJSONQuery
|
|
|
|
{
|
|
|
|
static constexpr auto name{"JSON_QUERY"};
|
2021-03-24 19:47:28 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
template <typename JSONParser>
|
2021-05-15 10:10:19 +00:00
|
|
|
class JSONExistsImpl
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
using Element = typename JSONParser::Element;
|
|
|
|
|
|
|
|
static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared<DataTypeUInt8>(); }
|
|
|
|
|
|
|
|
static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
|
|
|
|
|
|
|
|
static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr)
|
|
|
|
{
|
|
|
|
GeneratorJSONPath<JSONParser> generator_json_path(query_ptr);
|
|
|
|
Element current_element = root;
|
|
|
|
VisitorStatus status;
|
|
|
|
while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted)
|
|
|
|
{
|
2021-06-04 22:31:55 +00:00
|
|
|
if (status == VisitorStatus::Ok)
|
|
|
|
{
|
2021-05-15 10:10:19 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
current_element = root;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// insert result, status can be either Ok (if we found the item)
|
|
|
|
/// or Exhausted (if we never found the item)
|
|
|
|
ColumnUInt8 & col_bool = assert_cast<ColumnUInt8 &>(dest);
|
2021-06-04 22:31:55 +00:00
|
|
|
if (status == VisitorStatus::Ok)
|
|
|
|
{
|
2021-05-15 10:10:19 +00:00
|
|
|
col_bool.insert(1);
|
2021-06-04 22:31:55 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-06-03 17:47:53 +00:00
|
|
|
col_bool.insert(0);
|
2021-05-15 10:10:19 +00:00
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename JSONParser>
|
|
|
|
class JSONValueImpl
|
2021-03-24 19:47:28 +00:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
using Element = typename JSONParser::Element;
|
|
|
|
|
|
|
|
static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared<DataTypeString>(); }
|
|
|
|
|
|
|
|
static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
|
|
|
|
|
2021-05-15 10:10:19 +00:00
|
|
|
static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr)
|
2021-03-24 19:47:28 +00:00
|
|
|
{
|
2021-05-15 10:10:19 +00:00
|
|
|
GeneratorJSONPath<JSONParser> generator_json_path(query_ptr);
|
|
|
|
Element current_element = root;
|
|
|
|
VisitorStatus status;
|
|
|
|
Element res;
|
|
|
|
while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted)
|
|
|
|
{
|
2021-06-04 22:31:55 +00:00
|
|
|
if (status == VisitorStatus::Ok)
|
|
|
|
{
|
|
|
|
if (!(current_element.isArray() || current_element.isObject()))
|
|
|
|
{
|
2021-05-15 10:10:19 +00:00
|
|
|
break;
|
|
|
|
}
|
2021-06-04 22:31:55 +00:00
|
|
|
}
|
|
|
|
else if (status == VisitorStatus::Error)
|
|
|
|
{
|
2021-05-15 10:10:19 +00:00
|
|
|
/// ON ERROR
|
2021-07-04 09:10:16 +00:00
|
|
|
/// Here it is possible to handle errors with ON ERROR (as described in ISO/IEC TR 19075-6),
|
|
|
|
/// however this functionality is not implemented yet
|
2021-05-15 10:10:19 +00:00
|
|
|
}
|
|
|
|
current_element = root;
|
|
|
|
}
|
|
|
|
|
2021-06-04 22:31:55 +00:00
|
|
|
if (status == VisitorStatus::Exhausted)
|
|
|
|
{
|
2021-05-15 10:10:19 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-06-25 15:33:31 +00:00
|
|
|
std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
|
|
|
out << current_element.getElement();
|
|
|
|
auto output_str = out.str();
|
2021-03-24 19:47:28 +00:00
|
|
|
ColumnString & col_str = assert_cast<ColumnString &>(dest);
|
2021-06-25 15:33:31 +00:00
|
|
|
col_str.insertData(output_str.data(), output_str.size());
|
2021-03-24 19:47:28 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Function to test jsonpath member access, will be removed in final PR
|
|
|
|
* @tparam JSONParser parser
|
|
|
|
*/
|
|
|
|
template <typename JSONParser>
|
2021-05-15 10:10:19 +00:00
|
|
|
class JSONQueryImpl
|
2021-03-24 19:47:28 +00:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
using Element = typename JSONParser::Element;
|
|
|
|
|
|
|
|
static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared<DataTypeString>(); }
|
|
|
|
|
|
|
|
static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
|
|
|
|
|
|
|
|
static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr)
|
|
|
|
{
|
|
|
|
GeneratorJSONPath<JSONParser> generator_json_path(query_ptr);
|
|
|
|
Element current_element = root;
|
|
|
|
VisitorStatus status;
|
2021-06-25 15:33:31 +00:00
|
|
|
std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
2021-05-15 10:10:19 +00:00
|
|
|
/// Create json array of results: [res1, res2, ...]
|
|
|
|
out << "[";
|
|
|
|
bool success = false;
|
|
|
|
while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted)
|
2021-03-24 19:47:28 +00:00
|
|
|
{
|
2021-06-04 22:31:55 +00:00
|
|
|
if (status == VisitorStatus::Ok)
|
|
|
|
{
|
|
|
|
if (success)
|
|
|
|
{
|
2021-05-15 10:10:19 +00:00
|
|
|
out << ", ";
|
|
|
|
}
|
|
|
|
success = true;
|
2021-06-25 15:33:31 +00:00
|
|
|
out << current_element.getElement();
|
2021-06-04 22:31:55 +00:00
|
|
|
}
|
|
|
|
else if (status == VisitorStatus::Error)
|
|
|
|
{
|
2021-05-15 10:10:19 +00:00
|
|
|
/// ON ERROR
|
2021-06-25 16:24:22 +00:00
|
|
|
/// Here it is possible to handle errors with ON ERROR (as described in ISO/IEC TR 19075-6),
|
|
|
|
/// however this functionality is not implemented yet
|
2021-05-15 10:10:19 +00:00
|
|
|
}
|
|
|
|
current_element = root;
|
2021-03-24 19:47:28 +00:00
|
|
|
}
|
2021-05-15 10:10:19 +00:00
|
|
|
out << "]";
|
2021-06-04 22:31:55 +00:00
|
|
|
if (!success)
|
|
|
|
{
|
2021-03-24 19:47:28 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
ColumnString & col_str = assert_cast<ColumnString &>(dest);
|
2021-06-25 15:33:31 +00:00
|
|
|
auto output_str = out.str();
|
|
|
|
col_str.insertData(output_str.data(), output_str.size());
|
2021-03-24 19:47:28 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|