Merge pull request #24148 from l1tsolaiki/jsonpath

Jsonpath
This commit is contained in:
Kseniia Sumarokova 2021-07-06 10:43:29 +03:00 committed by GitHub
commit d2141ff53b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
39 changed files with 1425 additions and 0 deletions

View File

@ -116,6 +116,8 @@ target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_url)
add_subdirectory(array)
target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_array)
add_subdirectory(JSONPath)
if (USE_STATS)
target_link_libraries(clickhouse_functions PRIVATE stats)
endif()

View File

@ -39,6 +39,8 @@ struct DummyJSONParser
std::string_view getString() const { return {}; }
Array getArray() const { return {}; }
Object getObject() const { return {}; }
Element getElement() { return {}; }
};
/// References an array in a JSON document.
@ -97,4 +99,9 @@ struct DummyJSONParser
#endif
};
inline ALWAYS_INLINE std::ostream& operator<<(std::ostream& out, DummyJSONParser::Element)
{
return out;
}
}

View File

@ -0,0 +1,15 @@
#include <Functions/FunctionSQLJSON.h>
#include <Functions/FunctionFactory.h>
namespace DB
{
void registerFunctionsSQLJSON(FunctionFactory & factory)
{
factory.registerFunction<FunctionSQLJSON<NameJSONExists, JSONExistsImpl>>();
factory.registerFunction<FunctionSQLJSON<NameJSONQuery, JSONQueryImpl>>();
factory.registerFunction<FunctionSQLJSON<NameJSONValue, JSONValueImpl>>();
}
}

View File

@ -0,0 +1,334 @@
#pragma once
#include <sstream>
#include <type_traits>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <Core/Settings.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/DummyJSONParser.h>
#include <Functions/IFunction.h>
#include <Functions/JSONPath/ASTs/ASTJSONPath.h>
#include <Functions/JSONPath/Generator/GeneratorJSONPath.h>
#include <Functions/JSONPath/Parsers/ParserJSONPath.h>
#include <Functions/RapidJSONParser.h>
#include <Functions/SimdJSONParser.h>
#include <Interpreters/Context.h>
#include <Parsers/IParser.h>
#include <Parsers/Lexer.h>
#include <common/range.h>
#if !defined(ARCADIA_BUILD)
# include "config_functions.h"
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
extern const int BAD_ARGUMENTS;
}
class FunctionSQLJSONHelpers
{
public:
template <typename Name, template <typename> typename Impl, class JSONParser>
class Executor
{
public:
static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth)
{
MutableColumnPtr to{result_type->createColumn()};
to->reserve(input_rows_count);
if (arguments.size() < 2)
{
throw Exception{"JSONPath functions require at least 2 arguments", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION};
}
const auto & first_column = arguments[0];
/// Check 1 argument: must be of type String (JSONPath)
if (!isString(first_column.type))
{
throw Exception(
"JSONPath functions require 1 argument to be JSONPath of type string, illegal type: " + first_column.type->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
/// Check 1 argument: must be const (JSONPath)
if (!isColumnConst(*first_column.column))
{
throw Exception("1 argument (JSONPath) must be const", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
const auto & second_column = arguments[1];
/// Check 2 argument: must be of type String (JSON)
if (!isString(second_column.type))
{
throw Exception(
"JSONPath functions require 2 argument to be JSON of string, illegal type: " + second_column.type->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
const ColumnPtr & arg_jsonpath = first_column.column;
const auto * arg_jsonpath_const = typeid_cast<const ColumnConst *>(arg_jsonpath.get());
const auto * arg_jsonpath_string = typeid_cast<const ColumnString *>(arg_jsonpath_const->getDataColumnPtr().get());
const ColumnPtr & arg_json = second_column.column;
const auto * col_json_const = typeid_cast<const ColumnConst *>(arg_json.get());
const auto * col_json_string
= typeid_cast<const ColumnString *>(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get());
/// Get data and offsets for 1 argument (JSONPath)
const ColumnString::Chars & chars_path = arg_jsonpath_string->getChars();
const ColumnString::Offsets & offsets_path = arg_jsonpath_string->getOffsets();
/// Prepare to parse 1 argument (JSONPath)
const char * query_begin = reinterpret_cast<const char *>(&chars_path[0]);
const char * query_end = query_begin + offsets_path[0] - 1;
/// Tokenize query
Tokens tokens(query_begin, query_end);
/// Max depth 0 indicates that depth is not limited
IParser::Pos token_iterator(tokens, parse_depth);
/// Parse query and create AST tree
Expected expected;
ASTPtr res;
ParserJSONPath parser;
const bool parse_res = parser.parse(token_iterator, res, expected);
if (!parse_res)
{
throw Exception{"Unable to parse JSONPath", ErrorCodes::BAD_ARGUMENTS};
}
/// Get data and offsets for 2 argument (JSON)
const ColumnString::Chars & chars_json = col_json_string->getChars();
const ColumnString::Offsets & offsets_json = col_json_string->getOffsets();
JSONParser json_parser;
using Element = typename JSONParser::Element;
Element document;
bool document_ok = false;
/// Parse JSON for every row
Impl<JSONParser> impl;
for (const auto i : collections::range(0, input_rows_count))
{
std::string_view json{
reinterpret_cast<const char *>(&chars_json[offsets_json[i - 1]]), offsets_json[i] - offsets_json[i - 1] - 1};
document_ok = json_parser.parse(json, document);
bool added_to_column = false;
if (document_ok)
{
added_to_column = impl.insertResultToColumn(*to, document, res);
}
if (!added_to_column)
{
to->insertDefault();
}
}
return to;
}
};
};
template <typename Name, template <typename> typename Impl>
class FunctionSQLJSON : public IFunction, WithConstContext
{
public:
static FunctionPtr create(ContextConstPtr context_) { return std::make_shared<FunctionSQLJSON>(context_); }
FunctionSQLJSON(ContextConstPtr context_) : WithConstContext(context_) { }
static constexpr auto name = Name::name;
String getName() const override { return Name::name; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
return Impl<DummyJSONParser>::getReturnType(Name::name, arguments);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
/// Choose JSONParser.
/// 1. Lexer(path) -> Tokens
/// 2. Create ASTPtr
/// 3. Parser(Tokens, ASTPtr) -> complete AST
/// 4. Execute functions: call getNextItem on generator and handle each item
uint32_t parse_depth = getContext()->getSettingsRef().max_parser_depth;
#if USE_SIMDJSON
if (getContext()->getSettingsRef().allow_simdjson)
return FunctionSQLJSONHelpers::Executor<Name, Impl, SimdJSONParser>::run(arguments, result_type, input_rows_count, parse_depth);
#endif
return FunctionSQLJSONHelpers::Executor<Name, Impl, DummyJSONParser>::run(arguments, result_type, input_rows_count, parse_depth);
}
};
struct NameJSONExists
{
static constexpr auto name{"JSON_EXISTS"};
};
struct NameJSONValue
{
static constexpr auto name{"JSON_VALUE"};
};
struct NameJSONQuery
{
static constexpr auto name{"JSON_QUERY"};
};
template <typename JSONParser>
class JSONExistsImpl
{
public:
using Element = typename JSONParser::Element;
static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared<DataTypeUInt8>(); }
static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr)
{
GeneratorJSONPath<JSONParser> generator_json_path(query_ptr);
Element current_element = root;
VisitorStatus status;
while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted)
{
if (status == VisitorStatus::Ok)
{
break;
}
current_element = root;
}
/// insert result, status can be either Ok (if we found the item)
/// or Exhausted (if we never found the item)
ColumnUInt8 & col_bool = assert_cast<ColumnUInt8 &>(dest);
if (status == VisitorStatus::Ok)
{
col_bool.insert(1);
}
else
{
col_bool.insert(0);
}
return true;
}
};
template <typename JSONParser>
class JSONValueImpl
{
public:
using Element = typename JSONParser::Element;
static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared<DataTypeString>(); }
static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr)
{
GeneratorJSONPath<JSONParser> generator_json_path(query_ptr);
Element current_element = root;
VisitorStatus status;
Element res;
while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted)
{
if (status == VisitorStatus::Ok)
{
if (!(current_element.isArray() || current_element.isObject()))
{
break;
}
}
else if (status == VisitorStatus::Error)
{
/// ON ERROR
/// Here it is possible to handle errors with ON ERROR (as described in ISO/IEC TR 19075-6),
/// however this functionality is not implemented yet
}
current_element = root;
}
if (status == VisitorStatus::Exhausted)
{
return false;
}
std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
out << current_element.getElement();
auto output_str = out.str();
ColumnString & col_str = assert_cast<ColumnString &>(dest);
col_str.insertData(output_str.data(), output_str.size());
return true;
}
};
/**
* Function to test jsonpath member access, will be removed in final PR
* @tparam JSONParser parser
*/
template <typename JSONParser>
class JSONQueryImpl
{
public:
using Element = typename JSONParser::Element;
static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared<DataTypeString>(); }
static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr)
{
GeneratorJSONPath<JSONParser> generator_json_path(query_ptr);
Element current_element = root;
VisitorStatus status;
std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
/// Create json array of results: [res1, res2, ...]
out << "[";
bool success = false;
while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted)
{
if (status == VisitorStatus::Ok)
{
if (success)
{
out << ", ";
}
success = true;
out << current_element.getElement();
}
else if (status == VisitorStatus::Error)
{
/// ON ERROR
/// Here it is possible to handle errors with ON ERROR (as described in ISO/IEC TR 19075-6),
/// however this functionality is not implemented yet
}
current_element = root;
}
out << "]";
if (!success)
{
return false;
}
ColumnString & col_str = assert_cast<ColumnString &>(dest);
auto output_str = out.str();
col_str.insertData(output_str.data(), output_str.size());
return true;
}
};
}

View File

@ -0,0 +1,18 @@
#pragma once
#include <Functions/JSONPath/ASTs/ASTJSONPathQuery.h>
#include <Parsers/IAST.h>
namespace DB
{
class ASTJSONPath : public IAST
{
public:
String getID(char) const override { return "ASTJSONPath"; }
ASTPtr clone() const override { return std::make_shared<ASTJSONPath>(*this); }
ASTJSONPathQuery * jsonpath_query;
};
}

View File

@ -0,0 +1,19 @@
#pragma once
#include <Parsers/IAST.h>
namespace DB
{
class ASTJSONPathMemberAccess : public IAST
{
public:
String getID(char) const override { return "ASTJSONPathMemberAccess"; }
ASTPtr clone() const override { return std::make_shared<ASTJSONPathMemberAccess>(*this); }
public:
/// Member name to lookup in json document (in path: $.some_key.another_key. ...)
String member_name;
};
}

View File

@ -0,0 +1,15 @@
#pragma once
#include <Parsers/IAST.h>
namespace DB
{
class ASTJSONPathQuery : public IAST
{
public:
String getID(char) const override { return "ASTJSONPathQuery"; }
ASTPtr clone() const override { return std::make_shared<ASTJSONPathQuery>(*this); }
};
}

View File

@ -0,0 +1,23 @@
#pragma once
#include <vector>
#include <Parsers/IAST.h>
namespace DB
{
class ASTJSONPathRange : public IAST
{
public:
String getID(char) const override { return "ASTJSONPathRange"; }
ASTPtr clone() const override { return std::make_shared<ASTJSONPathRange>(*this); }
public:
/// Ranges to lookup in json array ($[0, 1, 2, 4 to 9])
/// Range is represented as <start, end (non-inclusive)>
/// Single index is represented as <start, start + 1>
std::vector<std::pair<UInt32, UInt32>> ranges;
bool is_star = false;
};
}

View File

@ -0,0 +1,15 @@
#pragma once
#include <Parsers/IAST.h>
namespace DB
{
class ASTJSONPathRoot : public IAST
{
public:
String getID(char) const override { return "ASTJSONPathRoot"; }
ASTPtr clone() const override { return std::make_shared<ASTJSONPathRoot>(*this); }
};
}

View File

@ -0,0 +1,15 @@
#pragma once
#include <Parsers/IAST.h>
namespace DB
{
class ASTJSONPathStar : public IAST
{
public:
String getID(char) const override { return "ASTJSONPathStar"; }
ASTPtr clone() const override { return std::make_shared<ASTJSONPathStar>(*this); }
};
}

View File

@ -0,0 +1,13 @@
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
add_headers_and_sources(clickhouse_functions_jsonpath Parsers)
add_headers_and_sources(clickhouse_functions_jsonpath ASTs)
add_headers_and_sources(clickhouse_functions_jsonpath Generator)
add_library(clickhouse_functions_jsonpath ${clickhouse_functions_jsonpath_sources} ${clickhouse_functions_jsonpath_headers})
target_link_libraries(clickhouse_functions_jsonpath PRIVATE dbms)
target_link_libraries(clickhouse_functions_jsonpath PRIVATE clickhouse_parsers)
target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_jsonpath)
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
target_compile_options(clickhouse_functions_jsonpath PRIVATE "-g0")
endif()

View File

@ -0,0 +1,128 @@
#pragma once
#include <Functions/JSONPath/Generator/IGenerator.h>
#include <Functions/JSONPath/Generator/VisitorJSONPathMemberAccess.h>
#include <Functions/JSONPath/Generator/VisitorJSONPathRange.h>
#include <Functions/JSONPath/Generator/VisitorJSONPathRoot.h>
#include <Functions/JSONPath/Generator/VisitorJSONPathStar.h>
#include <Functions/JSONPath/Generator/VisitorStatus.h>
#include <Functions/JSONPath/ASTs/ASTJSONPath.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
template <typename JSONParser>
class GeneratorJSONPath : public IGenerator<JSONParser>
{
public:
/**
* Traverses children ASTs of ASTJSONPathQuery and creates a vector of corresponding visitors
* @param query_ptr_ pointer to ASTJSONPathQuery
*/
GeneratorJSONPath(ASTPtr query_ptr_)
{
query_ptr = query_ptr_;
const auto * path = query_ptr->as<ASTJSONPath>();
if (!path)
{
throw Exception("Invalid path", ErrorCodes::LOGICAL_ERROR);
}
const auto * query = path->jsonpath_query;
for (auto child_ast : query->children)
{
if (typeid_cast<ASTJSONPathRoot *>(child_ast.get()))
{
visitors.push_back(std::make_shared<VisitorJSONPathRoot<JSONParser>>(child_ast));
}
else if (typeid_cast<ASTJSONPathMemberAccess *>(child_ast.get()))
{
visitors.push_back(std::make_shared<VisitorJSONPathMemberAccess<JSONParser>>(child_ast));
}
else if (typeid_cast<ASTJSONPathRange *>(child_ast.get()))
{
visitors.push_back(std::make_shared<VisitorJSONPathRange<JSONParser>>(child_ast));
}
else if (typeid_cast<ASTJSONPathStar *>(child_ast.get()))
{
visitors.push_back(std::make_shared<VisitorJSONPathStar<JSONParser>>(child_ast));
}
}
}
const char * getName() const override { return "GeneratorJSONPath"; }
/**
* This method exposes API of traversing all paths, described by JSONPath,
* to SQLJSON Functions.
* Expected usage is to iteratively call this method from inside the function
* and to execute custom logic with received element or handle an error.
* On each such call getNextItem will yield next item into element argument
* and modify its internal state to prepare for next call.
*
* @param element root of JSON document
* @return is the generator exhausted
*/
VisitorStatus getNextItem(typename JSONParser::Element & element) override
{
while (true)
{
/// element passed to us actually is root, so here we assign current to root
auto current = element;
if (current_visitor < 0)
{
return VisitorStatus::Exhausted;
}
for (int i = 0; i < current_visitor; ++i)
{
visitors[i]->apply(current);
}
VisitorStatus status = VisitorStatus::Error;
for (size_t i = current_visitor; i < visitors.size(); ++i)
{
status = visitors[i]->visit(current);
current_visitor = i;
if (status == VisitorStatus::Error || status == VisitorStatus::Ignore)
{
break;
}
}
updateVisitorsForNextRun();
if (status != VisitorStatus::Ignore)
{
element = current;
return status;
}
}
}
private:
bool updateVisitorsForNextRun()
{
while (current_visitor >= 0 && visitors[current_visitor]->isExhausted())
{
visitors[current_visitor]->reinitialize();
current_visitor--;
}
if (current_visitor >= 0)
{
visitors[current_visitor]->updateState();
}
return current_visitor >= 0;
}
int current_visitor = 0;
ASTPtr query_ptr;
VisitorList<JSONParser> visitors;
};
}

View File

@ -0,0 +1,29 @@
#pragma once
#include <Functions/JSONPath/Generator/IGenerator_fwd.h>
#include <Functions/JSONPath/Generator/VisitorStatus.h>
#include <Parsers/IAST.h>
namespace DB
{
template <typename JSONParser>
class IGenerator
{
public:
IGenerator() = default;
virtual const char * getName() const = 0;
/**
* Used to yield next non-ignored element describes by JSONPath query.
*
* @param element to be extracted into
* @return true if generator is not exhausted
*/
virtual VisitorStatus getNextItem(typename JSONParser::Element & element) = 0;
virtual ~IGenerator() = default;
};
}

View File

@ -0,0 +1,16 @@
#pragma once
#include <Functions/JSONPath/Generator/IVisitor.h>
namespace DB
{
template <typename JSONParser>
class IGenerator;
template <typename JSONParser>
using IVisitorPtr = std::shared_ptr<IVisitor<JSONParser>>;
template <typename JSONParser>
using VisitorList = std::vector<IVisitorPtr<JSONParser>>;
}

View File

@ -0,0 +1,46 @@
#pragma once
#include <Functions/JSONPath/Generator/VisitorStatus.h>
namespace DB
{
template <typename JSONParser>
class IVisitor
{
public:
virtual const char * getName() const = 0;
/**
* Applies this visitor to document and mutates its state
* @param element simdjson element
*/
virtual VisitorStatus visit(typename JSONParser::Element & element) = 0;
/**
* Applies this visitor to document, but does not mutate state
* @param element simdjson element
*/
virtual VisitorStatus apply(typename JSONParser::Element & element) const = 0;
/**
* Restores visitor's initial state for later use
*/
virtual void reinitialize() = 0;
virtual void updateState() = 0;
bool isExhausted() { return is_exhausted; }
void setExhausted(bool exhausted) { is_exhausted = exhausted; }
virtual ~IVisitor() = default;
private:
/**
* This variable is for detecting whether a visitor's next visit will be able
* to yield a new item.
*/
bool is_exhausted = false;
};
}

View File

@ -0,0 +1,50 @@
#pragma once
#include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h>
#include <Functions/JSONPath/Generator/IVisitor.h>
#include <Functions/JSONPath/Generator/VisitorStatus.h>
namespace DB
{
template <typename JSONParser>
class VisitorJSONPathMemberAccess : public IVisitor<JSONParser>
{
public:
VisitorJSONPathMemberAccess(ASTPtr member_access_ptr_)
: member_access_ptr(member_access_ptr_->as<ASTJSONPathMemberAccess>()) { }
const char * getName() const override { return "VisitorJSONPathMemberAccess"; }
VisitorStatus apply(typename JSONParser::Element & element) const override
{
typename JSONParser::Element result;
element.getObject().find(std::string_view(member_access_ptr->member_name), result);
element = result;
return VisitorStatus::Ok;
}
VisitorStatus visit(typename JSONParser::Element & element) override
{
this->setExhausted(true);
if (!element.isObject())
{
return VisitorStatus::Error;
}
typename JSONParser::Element result;
if (!element.getObject().find(std::string_view(member_access_ptr->member_name), result))
{
return VisitorStatus::Error;
}
apply(element);
return VisitorStatus::Ok;
}
void reinitialize() override { this->setExhausted(false); }
void updateState() override { }
private:
ASTJSONPathMemberAccess * member_access_ptr;
};
}

View File

@ -0,0 +1,80 @@
#pragma once
#include <Functions/JSONPath/ASTs/ASTJSONPathRange.h>
#include <Functions/JSONPath/Generator/IVisitor.h>
#include <Functions/JSONPath/Generator/VisitorStatus.h>
namespace DB
{
template <typename JSONParser>
class VisitorJSONPathRange : public IVisitor<JSONParser>
{
public:
VisitorJSONPathRange(ASTPtr range_ptr_) : range_ptr(range_ptr_->as<ASTJSONPathRange>())
{
current_range = 0;
current_index = range_ptr->ranges[current_range].first;
}
const char * getName() const override { return "VisitorJSONPathRange"; }
VisitorStatus apply(typename JSONParser::Element & element) const override
{
typename JSONParser::Element result;
typename JSONParser::Array array = element.getArray();
element = array[current_index];
return VisitorStatus::Ok;
}
VisitorStatus visit(typename JSONParser::Element & element) override
{
if (!element.isArray())
{
this->setExhausted(true);
return VisitorStatus::Error;
}
VisitorStatus status;
if (current_index < element.getArray().size())
{
apply(element);
status = VisitorStatus::Ok;
}
else
{
status = VisitorStatus::Ignore;
}
if (current_index + 1 == range_ptr->ranges[current_range].second
&& current_range + 1 == range_ptr->ranges.size())
{
this->setExhausted(true);
}
return status;
}
void reinitialize() override
{
current_range = 0;
current_index = range_ptr->ranges[current_range].first;
this->setExhausted(false);
}
void updateState() override
{
current_index++;
if (current_index == range_ptr->ranges[current_range].second)
{
current_range++;
current_index = range_ptr->ranges[current_range].first;
}
}
private:
ASTJSONPathRange * range_ptr;
size_t current_range;
UInt32 current_index;
};
}

View File

@ -0,0 +1,35 @@
#pragma once
#include <Functions/JSONPath/ASTs/ASTJSONPathRoot.h>
#include <Functions/JSONPath/Generator/IVisitor.h>
#include <Functions/JSONPath/Generator/VisitorStatus.h>
namespace DB
{
template <typename JSONParser>
class VisitorJSONPathRoot : public IVisitor<JSONParser>
{
public:
VisitorJSONPathRoot(ASTPtr) { }
const char * getName() const override { return "VisitorJSONPathRoot"; }
VisitorStatus apply(typename JSONParser::Element & /*element*/) const override
{
/// No-op on document, since we are already passed document's root
return VisitorStatus::Ok;
}
VisitorStatus visit(typename JSONParser::Element & element) override
{
apply(element);
this->setExhausted(true);
return VisitorStatus::Ok;
}
void reinitialize() override { this->setExhausted(false); }
void updateState() override { }
};
}

View File

@ -0,0 +1,66 @@
#pragma once
#include <Functions/JSONPath/ASTs/ASTJSONPathStar.h>
#include <Functions/JSONPath/Generator/IVisitor.h>
#include <Functions/JSONPath/Generator/VisitorStatus.h>
namespace DB
{
template <typename JSONParser>
class VisitorJSONPathStar : public IVisitor<JSONParser>
{
public:
VisitorJSONPathStar(ASTPtr)
{
current_index = 0;
}
const char * getName() const override { return "VisitorJSONPathStar"; }
VisitorStatus apply(typename JSONParser::Element & element) const override
{
typename JSONParser::Element result;
typename JSONParser::Array array = element.getArray();
element = array[current_index];
return VisitorStatus::Ok;
}
VisitorStatus visit(typename JSONParser::Element & element) override
{
if (!element.isArray())
{
this->setExhausted(true);
return VisitorStatus::Error;
}
VisitorStatus status;
if (current_index < element.getArray().size())
{
apply(element);
status = VisitorStatus::Ok;
}
else
{
status = VisitorStatus::Ignore;
this->setExhausted(true);
}
return status;
}
void reinitialize() override
{
current_index = 0;
this->setExhausted(false);
}
void updateState() override
{
current_index++;
}
private:
UInt32 current_index;
};
}

View File

@ -0,0 +1,13 @@
#pragma once
namespace DB
{
enum VisitorStatus
{
Ok,
Exhausted,
Error,
Ignore
};
}

View File

@ -0,0 +1,31 @@
#include <Functions/JSONPath/ASTs/ASTJSONPath.h>
#include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h>
#include <Functions/JSONPath/Parsers/ParserJSONPath.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathQuery.h>
namespace DB
{
/**
* Entry parser for JSONPath
*/
bool ParserJSONPath::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
auto ast_jsonpath = std::make_shared<ASTJSONPath>();
ParserJSONPathQuery parser_jsonpath_query;
/// Push back dot AST and brackets AST to query->children
ASTPtr query;
bool res = parser_jsonpath_query.parse(pos, query, expected);
if (res)
{
/// Set ASTJSONPathQuery of ASTJSONPath
ast_jsonpath->set(ast_jsonpath->jsonpath_query, query);
}
node = ast_jsonpath;
return res;
}
}

View File

@ -0,0 +1,21 @@
#pragma once
#include <Parsers/IParserBase.h>
namespace DB
{
/**
* Entry parser for JSONPath
*/
class ParserJSONPath : public IParserBase
{
private:
const char * getName() const override { return "ParserJSONPath"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
public:
explicit ParserJSONPath() = default;
};
}

View File

@ -0,0 +1,42 @@
#include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ExpressionElementParsers.h>
#include <Parsers/Lexer.h>
namespace DB
{
/**
*
* @param pos token iterator
* @param node node of ASTJSONPathMemberAccess
* @param expected stuff for logging
* @return was parse successful
*/
bool ParserJSONPathMemberAccess::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
if (pos->type != TokenType::Dot)
{
return false;
}
++pos;
if (pos->type != TokenType::BareWord)
{
return false;
}
ParserIdentifier name_p;
ASTPtr member_name;
if (!name_p.parse(pos, member_name, expected))
{
return false;
}
auto member_access = std::make_shared<ASTJSONPathMemberAccess>();
node = member_access;
return tryGetIdentifierNameInto(member_name, member_access->member_name);
}
}

View File

@ -0,0 +1,14 @@
#pragma once
#include <Parsers/IParserBase.h>
namespace DB
{
class ParserJSONPathMemberAccess : public IParserBase
{
const char * getName() const override { return "ParserJSONPathMemberAccess"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
}

View File

@ -0,0 +1,48 @@
#include <Functions/JSONPath/ASTs/ASTJSONPathQuery.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathQuery.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathRoot.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathRange.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathStar.h>
namespace DB
{
/**
*
* @param pos token iterator
* @param query node of ASTJSONPathQuery
* @param expected stuff for logging
* @return was parse successful
*/
bool ParserJSONPathQuery::parseImpl(Pos & pos, ASTPtr & query, Expected & expected)
{
query = std::make_shared<ASTJSONPathQuery>();
ParserJSONPathMemberAccess parser_jsonpath_member_access;
ParserJSONPathRange parser_jsonpath_range;
ParserJSONPathStar parser_jsonpath_star;
ParserJSONPathRoot parser_jsonpath_root;
ASTPtr path_root;
if (!parser_jsonpath_root.parse(pos, path_root, expected))
{
return false;
}
query->children.push_back(path_root);
ASTPtr accessor;
while (parser_jsonpath_member_access.parse(pos, accessor, expected)
|| parser_jsonpath_range.parse(pos, accessor, expected)
|| parser_jsonpath_star.parse(pos, accessor, expected))
{
if (accessor)
{
query->children.push_back(accessor);
accessor = nullptr;
}
}
/// parsing was successful if we reached the end of query by this point
return pos->type == TokenType::EndOfStream;
}
}

View File

@ -0,0 +1,14 @@
#pragma once
#include <Parsers/IParserBase.h>
namespace DB
{
class ParserJSONPathQuery : public IParserBase
{
protected:
const char * getName() const override { return "ParserJSONPathQuery"; }
bool parseImpl(Pos & pos, ASTPtr & query, Expected & expected) override;
};
}

View File

@ -0,0 +1,94 @@
#include <Functions/JSONPath/ASTs/ASTJSONPathRange.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathQuery.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathRange.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ExpressionElementParsers.h>
#include <Parsers/CommonParsers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
/**
*
* @param pos token iterator
* @param node node of ASTJSONPathQuery
* @param expected stuff for logging
* @return was parse successful
*/
bool ParserJSONPathRange::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
if (pos->type != TokenType::OpeningSquareBracket)
{
return false;
}
++pos;
auto range = std::make_shared<ASTJSONPathRange>();
node = range;
ParserNumber number_p;
ASTPtr number_ptr;
while (pos->type != TokenType::ClosingSquareBracket)
{
if (pos->type != TokenType::Number)
{
return false;
}
std::pair<UInt32, UInt32> range_indices;
if (!number_p.parse(pos, number_ptr, expected))
{
return false;
}
range_indices.first = number_ptr->as<ASTLiteral>()->value.get<UInt32>();
if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingSquareBracket)
{
/// Single index case
range_indices.second = range_indices.first + 1;
}
else if (pos->type == TokenType::BareWord)
{
if (!ParserKeyword("TO").ignore(pos, expected))
{
return false;
}
if (!number_p.parse(pos, number_ptr, expected))
{
return false;
}
range_indices.second = number_ptr->as<ASTLiteral>()->value.get<UInt32>();
}
else
{
return false;
}
if (range_indices.first >= range_indices.second)
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Start of range must be greater than end of range, however {} >= {}",
range_indices.first,
range_indices.second);
}
range->ranges.push_back(std::move(range_indices));
if (pos->type != TokenType::ClosingSquareBracket)
{
++pos;
}
}
++pos;
/// We can't have both ranges and star present, so parse was successful <=> exactly 1 of these conditions is true
return !range->ranges.empty() ^ range->is_star;
}
}

View File

@ -0,0 +1,18 @@
#pragma once
#include <Parsers/IParserBase.h>
namespace DB
{
class ParserJSONPathRange : public IParserBase
{
private:
const char * getName() const override { return "ParserJSONPathRange"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
public:
explicit ParserJSONPathRange() = default;
};
}

View File

@ -0,0 +1,27 @@
#include <Functions/JSONPath/ASTs/ASTJSONPathRoot.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathRoot.h>
#include <Parsers/Lexer.h>
namespace DB
{
/**
*
* @param pos token iterator
* @param node node of ASTJSONPathRoot
* @param expected stuff for logging
* @return was parse successful
*/
bool ParserJSONPathRoot::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
if (pos->type != TokenType::DollarSign)
{
expected.add(pos, "dollar sign (start of jsonpath)");
return false;
}
node = std::make_shared<ASTJSONPathRoot>();
++pos;
return true;
}
}

View File

@ -0,0 +1,18 @@
#pragma once
#include <Parsers/IParserBase.h>
namespace DB
{
class ParserJSONPathRoot : public IParserBase
{
private:
const char * getName() const override { return "ParserJSONPathRoot"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
public:
explicit ParserJSONPathRoot() = default;
};
}

View File

@ -0,0 +1,31 @@
#include <Functions/JSONPath/Parsers/ParserJSONPathStar.h>
#include <Functions/JSONPath/ASTs/ASTJSONPathStar.h>
namespace DB
{
bool ParserJSONPathStar::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
if (pos->type != TokenType::OpeningSquareBracket)
{
return false;
}
++pos;
if (pos->type != TokenType::Asterisk)
{
return false;
}
++pos;
if (pos->type != TokenType::ClosingSquareBracket)
{
expected.add(pos, "Closing square bracket");
return false;
}
++pos;
node = std::make_shared<ASTJSONPathStar>();
return true;
}
}

View File

@ -0,0 +1,18 @@
#pragma once
#include <Parsers/IParserBase.h>
namespace DB
{
class ParserJSONPathStar : public IParserBase
{
private:
const char * getName() const override { return "ParserJSONPathStar"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
public:
explicit ParserJSONPathStar() = default;
};
}

View File

@ -50,6 +50,8 @@ struct SimdJSONParser
ALWAYS_INLINE Array getArray() const;
ALWAYS_INLINE Object getObject() const;
ALWAYS_INLINE simdjson::dom::element getElement() const { return element; }
private:
simdjson::dom::element element;
};

View File

@ -40,6 +40,7 @@ void registerFunctionsGeo(FunctionFactory &);
void registerFunctionsIntrospection(FunctionFactory &);
void registerFunctionsNull(FunctionFactory &);
void registerFunctionsJSON(FunctionFactory &);
void registerFunctionsSQLJSON(FunctionFactory &);
void registerFunctionToJSONString(FunctionFactory &);
void registerFunctionsConsistentHashing(FunctionFactory & factory);
void registerFunctionsUnixTimestamp64(FunctionFactory & factory);
@ -99,6 +100,7 @@ void registerFunctions()
registerFunctionsGeo(factory);
registerFunctionsNull(factory);
registerFunctionsJSON(factory);
registerFunctionsSQLJSON(factory);
registerFunctionToJSONString(factory);
registerFunctionsIntrospection(factory);
registerFunctionsConsistentHashing(factory);

View File

@ -44,6 +44,7 @@ SRCS(
FunctionFile.cpp
FunctionHelpers.cpp
FunctionJoinGet.cpp
FunctionSQLJSON.cpp
FunctionsAES.cpp
FunctionsCoding.cpp
FunctionsConversion.cpp
@ -76,6 +77,12 @@ SRCS(
GatherUtils/sliceFromRightConstantOffsetUnbounded.cpp
GeoHash.cpp
IFunction.cpp
JSONPath/Parsers/ParserJSONPath.cpp
JSONPath/Parsers/ParserJSONPathMemberAccess.cpp
JSONPath/Parsers/ParserJSONPathQuery.cpp
JSONPath/Parsers/ParserJSONPathRange.cpp
JSONPath/Parsers/ParserJSONPathRoot.cpp
JSONPath/Parsers/ParserJSONPathStar.cpp
TargetSpecific.cpp
URL/URLHierarchy.cpp
URL/URLPathHierarchy.cpp

View File

@ -338,6 +338,11 @@ Token Lexer::nextTokenImpl()
}
default:
if (*pos == '$' && ((pos + 1 < end && !isWordCharASCII(pos[1])) || pos + 1 == end))
{
/// Capture standalone dollar sign
return Token(TokenType::DollarSign, token_begin, ++pos);
}
if (isWordCharASCII(*pos) || *pos == '$')
{
++pos;

View File

@ -33,6 +33,7 @@ namespace DB
\
M(Asterisk) /** Could be used as multiplication operator or on it's own: "SELECT *" */ \
\
M(DollarSign) \
M(Plus) \
M(Minus) \
M(Slash) \

View File

@ -0,0 +1,43 @@
--JSON_VALUE--
1
1.2
true
"world"
null
--JSON_QUERY--
[{"hello":1}]
[1]
[1.2]
[true]
["world"]
[null]
[["world","world2"]]
[{"world":"!"}]
[0, 1, 4, 0, -1, -4]
--JSON_EXISTS--
1
0
1
1
1
0
1
0
0
1
1
0
1
0
1
--MANY ROWS--
0 ["Vasily", "Kostya"]
1 ["Tihon", "Ernest"]
2 ["Katya", "Anatoliy"]

View File

@ -0,0 +1,50 @@
SELECT '--JSON_VALUE--';
SELECT JSON_VALUE('$', '{"hello":1}'); -- root is a complex object => default value (empty string)
SELECT JSON_VALUE('$.hello', '{"hello":1}');
SELECT JSON_VALUE('$.hello', '{"hello":1.2}');
SELECT JSON_VALUE('$.hello', '{"hello":true}');
SELECT JSON_VALUE('$.hello', '{"hello":"world"}');
SELECT JSON_VALUE('$.hello', '{"hello":null}');
SELECT JSON_VALUE('$.hello', '{"hello":["world","world2"]}');
SELECT JSON_VALUE('$.hello', '{"hello":{"world":"!"}}');
SELECT JSON_VALUE('$.hello', '{hello:world}'); -- invalid json => default value (empty string)
SELECT JSON_VALUE('$.hello', '');
SELECT '--JSON_QUERY--';
SELECT JSON_QUERY('$', '{"hello":1}');
SELECT JSON_QUERY('$.hello', '{"hello":1}');
SELECT JSON_QUERY('$.hello', '{"hello":1.2}');
SELECT JSON_QUERY('$.hello', '{"hello":true}');
SELECT JSON_QUERY('$.hello', '{"hello":"world"}');
SELECT JSON_QUERY('$.hello', '{"hello":null}');
SELECT JSON_QUERY('$.hello', '{"hello":["world","world2"]}');
SELECT JSON_QUERY('$.hello', '{"hello":{"world":"!"}}');
SELECT JSON_QUERY('$.hello', '{hello:{"world":"!"}}}'); -- invalid json => default value (empty string)
SELECT JSON_QUERY('$.hello', '');
SELECT JSON_QUERY('$.array[*][0 to 2, 4]', '{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}');
SELECT '--JSON_EXISTS--';
SELECT JSON_EXISTS('$', '{"hello":1}');
SELECT JSON_EXISTS('$', '');
SELECT JSON_EXISTS('$', '{}');
SELECT JSON_EXISTS('$.hello', '{"hello":1}');
SELECT JSON_EXISTS('$.world', '{"hello":1,"world":2}');
SELECT JSON_EXISTS('$.world', '{"hello":{"world":1}}');
SELECT JSON_EXISTS('$.hello.world', '{"hello":{"world":1}}');
SELECT JSON_EXISTS('$.hello', '{hello:world}'); -- invalid json => default value (zero integer)
SELECT JSON_EXISTS('$.hello', '');
SELECT JSON_EXISTS('$.hello[*]', '{"hello":["world"]}');
SELECT JSON_EXISTS('$.hello[0]', '{"hello":["world"]}');
SELECT JSON_EXISTS('$.hello[1]', '{"hello":["world"]}');
SELECT JSON_EXISTS('$.a[*].b', '{"a":[{"b":1},{"c":2}]}');
SELECT JSON_EXISTS('$.a[*].f', '{"a":[{"b":1},{"c":2}]}');
SELECT JSON_EXISTS('$.a[*][0].h', '{"a":[[{"b":1}, {"g":1}],[{"h":1},{"y":1}]]}');
SELECT '--MANY ROWS--';
DROP TABLE IF EXISTS 01889_sql_json;
CREATE TABLE 01889_sql_json (id UInt8, json String) ENGINE = MergeTree ORDER BY id;
INSERT INTO 01889_sql_json(id, json) VALUES(0, '{"name":"Ivan","surname":"Ivanov","friends":["Vasily","Kostya","Artyom"]}');
INSERT INTO 01889_sql_json(id, json) VALUES(1, '{"name":"Katya","surname":"Baltica","friends":["Tihon","Ernest","Innokentiy"]}');
INSERT INTO 01889_sql_json(id, json) VALUES(2, '{"name":"Vitali","surname":"Brown","friends":["Katya","Anatoliy","Ivan","Oleg"]}');
SELECT id, JSON_QUERY('$.friends[0 to 2]', json) FROM 01889_sql_json ORDER BY id;
DROP TABLE 01889_sql_json;