diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 271deb0f42c..b20954c9652 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -116,6 +116,8 @@ target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_url) add_subdirectory(array) target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_array) +add_subdirectory(JSONPath) + if (USE_STATS) target_link_libraries(clickhouse_functions PRIVATE stats) endif() diff --git a/src/Functions/DummyJSONParser.h b/src/Functions/DummyJSONParser.h index a71c90e4a19..01fdab1abb6 100644 --- a/src/Functions/DummyJSONParser.h +++ b/src/Functions/DummyJSONParser.h @@ -39,6 +39,8 @@ struct DummyJSONParser std::string_view getString() const { return {}; } Array getArray() const { return {}; } Object getObject() const { return {}; } + + Element getElement() { return {}; } }; /// References an array in a JSON document. @@ -97,4 +99,9 @@ struct DummyJSONParser #endif }; +inline ALWAYS_INLINE std::ostream& operator<<(std::ostream& out, DummyJSONParser::Element) +{ + return out; +} + } diff --git a/src/Functions/FunctionSQLJSON.cpp b/src/Functions/FunctionSQLJSON.cpp new file mode 100644 index 00000000000..a316d9de7ab --- /dev/null +++ b/src/Functions/FunctionSQLJSON.cpp @@ -0,0 +1,15 @@ +#include +#include + + +namespace DB +{ + +void registerFunctionsSQLJSON(FunctionFactory & factory) +{ + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); +} + +} diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h new file mode 100644 index 00000000000..9e469c4ebac --- /dev/null +++ b/src/Functions/FunctionSQLJSON.h @@ -0,0 +1,334 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +namespace DB +{ +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; +extern const int BAD_ARGUMENTS; +} + +class FunctionSQLJSONHelpers +{ +public: + template typename Impl, class JSONParser> + class Executor + { + public: + static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth) + { + MutableColumnPtr to{result_type->createColumn()}; + to->reserve(input_rows_count); + + if (arguments.size() < 2) + { + throw Exception{"JSONPath functions require at least 2 arguments", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION}; + } + + const auto & first_column = arguments[0]; + + /// Check 1 argument: must be of type String (JSONPath) + if (!isString(first_column.type)) + { + throw Exception( + "JSONPath functions require 1 argument to be JSONPath of type string, illegal type: " + first_column.type->getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + /// Check 1 argument: must be const (JSONPath) + if (!isColumnConst(*first_column.column)) + { + throw Exception("1 argument (JSONPath) must be const", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + const auto & second_column = arguments[1]; + + /// Check 2 argument: must be of type String (JSON) + if (!isString(second_column.type)) + { + throw Exception( + "JSONPath functions require 2 argument to be JSON of string, illegal type: " + second_column.type->getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + const ColumnPtr & arg_jsonpath = first_column.column; + const auto * arg_jsonpath_const = typeid_cast(arg_jsonpath.get()); + const auto * arg_jsonpath_string = typeid_cast(arg_jsonpath_const->getDataColumnPtr().get()); + + const ColumnPtr & arg_json = second_column.column; + const auto * col_json_const = typeid_cast(arg_json.get()); + const auto * col_json_string + = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); + + /// Get data and offsets for 1 argument (JSONPath) + const ColumnString::Chars & chars_path = arg_jsonpath_string->getChars(); + const ColumnString::Offsets & offsets_path = arg_jsonpath_string->getOffsets(); + + /// Prepare to parse 1 argument (JSONPath) + const char * query_begin = reinterpret_cast(&chars_path[0]); + const char * query_end = query_begin + offsets_path[0] - 1; + + /// Tokenize query + Tokens tokens(query_begin, query_end); + /// Max depth 0 indicates that depth is not limited + IParser::Pos token_iterator(tokens, parse_depth); + + /// Parse query and create AST tree + Expected expected; + ASTPtr res; + ParserJSONPath parser; + const bool parse_res = parser.parse(token_iterator, res, expected); + if (!parse_res) + { + throw Exception{"Unable to parse JSONPath", ErrorCodes::BAD_ARGUMENTS}; + } + + /// Get data and offsets for 2 argument (JSON) + const ColumnString::Chars & chars_json = col_json_string->getChars(); + const ColumnString::Offsets & offsets_json = col_json_string->getOffsets(); + + JSONParser json_parser; + using Element = typename JSONParser::Element; + Element document; + bool document_ok = false; + + /// Parse JSON for every row + Impl impl; + + for (const auto i : collections::range(0, input_rows_count)) + { + std::string_view json{ + reinterpret_cast(&chars_json[offsets_json[i - 1]]), offsets_json[i] - offsets_json[i - 1] - 1}; + document_ok = json_parser.parse(json, document); + + bool added_to_column = false; + if (document_ok) + { + added_to_column = impl.insertResultToColumn(*to, document, res); + } + if (!added_to_column) + { + to->insertDefault(); + } + } + return to; + } + }; +}; + +template typename Impl> +class FunctionSQLJSON : public IFunction, WithConstContext +{ +public: + static FunctionPtr create(ContextConstPtr context_) { return std::make_shared(context_); } + FunctionSQLJSON(ContextConstPtr context_) : WithConstContext(context_) { } + + static constexpr auto name = Name::name; + String getName() const override { return Name::name; } + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + return Impl::getReturnType(Name::name, arguments); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + /// Choose JSONParser. + /// 1. Lexer(path) -> Tokens + /// 2. Create ASTPtr + /// 3. Parser(Tokens, ASTPtr) -> complete AST + /// 4. Execute functions: call getNextItem on generator and handle each item + uint32_t parse_depth = getContext()->getSettingsRef().max_parser_depth; +#if USE_SIMDJSON + if (getContext()->getSettingsRef().allow_simdjson) + return FunctionSQLJSONHelpers::Executor::run(arguments, result_type, input_rows_count, parse_depth); +#endif + return FunctionSQLJSONHelpers::Executor::run(arguments, result_type, input_rows_count, parse_depth); + } +}; + +struct NameJSONExists +{ + static constexpr auto name{"JSON_EXISTS"}; +}; + +struct NameJSONValue +{ + static constexpr auto name{"JSON_VALUE"}; +}; + +struct NameJSONQuery +{ + static constexpr auto name{"JSON_QUERY"}; +}; + +template +class JSONExistsImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr) + { + GeneratorJSONPath generator_json_path(query_ptr); + Element current_element = root; + VisitorStatus status; + while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted) + { + if (status == VisitorStatus::Ok) + { + break; + } + current_element = root; + } + + /// insert result, status can be either Ok (if we found the item) + /// or Exhausted (if we never found the item) + ColumnUInt8 & col_bool = assert_cast(dest); + if (status == VisitorStatus::Ok) + { + col_bool.insert(1); + } + else + { + col_bool.insert(0); + } + return true; + } +}; + +template +class JSONValueImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr) + { + GeneratorJSONPath generator_json_path(query_ptr); + Element current_element = root; + VisitorStatus status; + Element res; + while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted) + { + if (status == VisitorStatus::Ok) + { + if (!(current_element.isArray() || current_element.isObject())) + { + break; + } + } + else if (status == VisitorStatus::Error) + { + /// ON ERROR + /// Here it is possible to handle errors with ON ERROR (as described in ISO/IEC TR 19075-6), + /// however this functionality is not implemented yet + } + current_element = root; + } + + if (status == VisitorStatus::Exhausted) + { + return false; + } + + std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + out << current_element.getElement(); + auto output_str = out.str(); + ColumnString & col_str = assert_cast(dest); + col_str.insertData(output_str.data(), output_str.size()); + return true; + } +}; + +/** + * Function to test jsonpath member access, will be removed in final PR + * @tparam JSONParser parser + */ +template +class JSONQueryImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr) + { + GeneratorJSONPath generator_json_path(query_ptr); + Element current_element = root; + VisitorStatus status; + std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + /// Create json array of results: [res1, res2, ...] + out << "["; + bool success = false; + while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted) + { + if (status == VisitorStatus::Ok) + { + if (success) + { + out << ", "; + } + success = true; + out << current_element.getElement(); + } + else if (status == VisitorStatus::Error) + { + /// ON ERROR + /// Here it is possible to handle errors with ON ERROR (as described in ISO/IEC TR 19075-6), + /// however this functionality is not implemented yet + } + current_element = root; + } + out << "]"; + if (!success) + { + return false; + } + ColumnString & col_str = assert_cast(dest); + auto output_str = out.str(); + col_str.insertData(output_str.data(), output_str.size()); + return true; + } +}; + +} diff --git a/src/Functions/JSONPath/ASTs/ASTJSONPath.h b/src/Functions/JSONPath/ASTs/ASTJSONPath.h new file mode 100644 index 00000000000..dfc117db846 --- /dev/null +++ b/src/Functions/JSONPath/ASTs/ASTJSONPath.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +namespace DB +{ +class ASTJSONPath : public IAST +{ +public: + String getID(char) const override { return "ASTJSONPath"; } + + ASTPtr clone() const override { return std::make_shared(*this); } + + ASTJSONPathQuery * jsonpath_query; +}; + +} diff --git a/src/Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h b/src/Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h new file mode 100644 index 00000000000..2c9482b665e --- /dev/null +++ b/src/Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h @@ -0,0 +1,19 @@ +#pragma once + +#include + +namespace DB +{ +class ASTJSONPathMemberAccess : public IAST +{ +public: + String getID(char) const override { return "ASTJSONPathMemberAccess"; } + + ASTPtr clone() const override { return std::make_shared(*this); } + +public: + /// Member name to lookup in json document (in path: $.some_key.another_key. ...) + String member_name; +}; + +} diff --git a/src/Functions/JSONPath/ASTs/ASTJSONPathQuery.h b/src/Functions/JSONPath/ASTs/ASTJSONPathQuery.h new file mode 100644 index 00000000000..ed2992777b2 --- /dev/null +++ b/src/Functions/JSONPath/ASTs/ASTJSONPathQuery.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +namespace DB +{ +class ASTJSONPathQuery : public IAST +{ +public: + String getID(char) const override { return "ASTJSONPathQuery"; } + + ASTPtr clone() const override { return std::make_shared(*this); } +}; + +} diff --git a/src/Functions/JSONPath/ASTs/ASTJSONPathRange.h b/src/Functions/JSONPath/ASTs/ASTJSONPathRange.h new file mode 100644 index 00000000000..746c6211f29 --- /dev/null +++ b/src/Functions/JSONPath/ASTs/ASTJSONPathRange.h @@ -0,0 +1,23 @@ +#pragma once + +#include +#include + +namespace DB +{ +class ASTJSONPathRange : public IAST +{ +public: + String getID(char) const override { return "ASTJSONPathRange"; } + + ASTPtr clone() const override { return std::make_shared(*this); } + +public: + /// Ranges to lookup in json array ($[0, 1, 2, 4 to 9]) + /// Range is represented as + /// Single index is represented as + std::vector> ranges; + bool is_star = false; +}; + +} diff --git a/src/Functions/JSONPath/ASTs/ASTJSONPathRoot.h b/src/Functions/JSONPath/ASTs/ASTJSONPathRoot.h new file mode 100644 index 00000000000..1c6469c5b75 --- /dev/null +++ b/src/Functions/JSONPath/ASTs/ASTJSONPathRoot.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +namespace DB +{ +class ASTJSONPathRoot : public IAST +{ +public: + String getID(char) const override { return "ASTJSONPathRoot"; } + + ASTPtr clone() const override { return std::make_shared(*this); } +}; + +} diff --git a/src/Functions/JSONPath/ASTs/ASTJSONPathStar.h b/src/Functions/JSONPath/ASTs/ASTJSONPathStar.h new file mode 100644 index 00000000000..2aada47c459 --- /dev/null +++ b/src/Functions/JSONPath/ASTs/ASTJSONPathStar.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +namespace DB +{ +class ASTJSONPathStar : public IAST +{ +public: + String getID(char) const override { return "ASTJSONPathStar"; } + + ASTPtr clone() const override { return std::make_shared(*this); } +}; + +} diff --git a/src/Functions/JSONPath/CMakeLists.txt b/src/Functions/JSONPath/CMakeLists.txt new file mode 100644 index 00000000000..2531a0f84e6 --- /dev/null +++ b/src/Functions/JSONPath/CMakeLists.txt @@ -0,0 +1,13 @@ +include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") +add_headers_and_sources(clickhouse_functions_jsonpath Parsers) +add_headers_and_sources(clickhouse_functions_jsonpath ASTs) +add_headers_and_sources(clickhouse_functions_jsonpath Generator) +add_library(clickhouse_functions_jsonpath ${clickhouse_functions_jsonpath_sources} ${clickhouse_functions_jsonpath_headers}) + +target_link_libraries(clickhouse_functions_jsonpath PRIVATE dbms) +target_link_libraries(clickhouse_functions_jsonpath PRIVATE clickhouse_parsers) +target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_jsonpath) + +if (STRIP_DEBUG_SYMBOLS_FUNCTIONS) + target_compile_options(clickhouse_functions_jsonpath PRIVATE "-g0") +endif() diff --git a/src/Functions/JSONPath/Generator/GeneratorJSONPath.h b/src/Functions/JSONPath/Generator/GeneratorJSONPath.h new file mode 100644 index 00000000000..291150f6df4 --- /dev/null +++ b/src/Functions/JSONPath/Generator/GeneratorJSONPath.h @@ -0,0 +1,128 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +template +class GeneratorJSONPath : public IGenerator +{ +public: + /** + * Traverses children ASTs of ASTJSONPathQuery and creates a vector of corresponding visitors + * @param query_ptr_ pointer to ASTJSONPathQuery + */ + GeneratorJSONPath(ASTPtr query_ptr_) + { + query_ptr = query_ptr_; + const auto * path = query_ptr->as(); + if (!path) + { + throw Exception("Invalid path", ErrorCodes::LOGICAL_ERROR); + } + const auto * query = path->jsonpath_query; + + for (auto child_ast : query->children) + { + if (typeid_cast(child_ast.get())) + { + visitors.push_back(std::make_shared>(child_ast)); + } + else if (typeid_cast(child_ast.get())) + { + visitors.push_back(std::make_shared>(child_ast)); + } + else if (typeid_cast(child_ast.get())) + { + visitors.push_back(std::make_shared>(child_ast)); + } + else if (typeid_cast(child_ast.get())) + { + visitors.push_back(std::make_shared>(child_ast)); + } + } + } + + const char * getName() const override { return "GeneratorJSONPath"; } + + /** + * This method exposes API of traversing all paths, described by JSONPath, + * to SQLJSON Functions. + * Expected usage is to iteratively call this method from inside the function + * and to execute custom logic with received element or handle an error. + * On each such call getNextItem will yield next item into element argument + * and modify its internal state to prepare for next call. + * + * @param element root of JSON document + * @return is the generator exhausted + */ + VisitorStatus getNextItem(typename JSONParser::Element & element) override + { + while (true) + { + /// element passed to us actually is root, so here we assign current to root + auto current = element; + if (current_visitor < 0) + { + return VisitorStatus::Exhausted; + } + + for (int i = 0; i < current_visitor; ++i) + { + visitors[i]->apply(current); + } + + VisitorStatus status = VisitorStatus::Error; + for (size_t i = current_visitor; i < visitors.size(); ++i) + { + status = visitors[i]->visit(current); + current_visitor = i; + if (status == VisitorStatus::Error || status == VisitorStatus::Ignore) + { + break; + } + } + updateVisitorsForNextRun(); + + if (status != VisitorStatus::Ignore) + { + element = current; + return status; + } + } + } + +private: + bool updateVisitorsForNextRun() + { + while (current_visitor >= 0 && visitors[current_visitor]->isExhausted()) + { + visitors[current_visitor]->reinitialize(); + current_visitor--; + } + if (current_visitor >= 0) + { + visitors[current_visitor]->updateState(); + } + return current_visitor >= 0; + } + + int current_visitor = 0; + ASTPtr query_ptr; + VisitorList visitors; +}; + +} diff --git a/src/Functions/JSONPath/Generator/IGenerator.h b/src/Functions/JSONPath/Generator/IGenerator.h new file mode 100644 index 00000000000..323145e07e1 --- /dev/null +++ b/src/Functions/JSONPath/Generator/IGenerator.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +template +class IGenerator +{ +public: + IGenerator() = default; + + virtual const char * getName() const = 0; + + /** + * Used to yield next non-ignored element describes by JSONPath query. + * + * @param element to be extracted into + * @return true if generator is not exhausted + */ + virtual VisitorStatus getNextItem(typename JSONParser::Element & element) = 0; + + virtual ~IGenerator() = default; +}; + +} diff --git a/src/Functions/JSONPath/Generator/IGenerator_fwd.h b/src/Functions/JSONPath/Generator/IGenerator_fwd.h new file mode 100644 index 00000000000..bb5f64cd6f9 --- /dev/null +++ b/src/Functions/JSONPath/Generator/IGenerator_fwd.h @@ -0,0 +1,16 @@ +#pragma once + +#include + +namespace DB +{ +template +class IGenerator; + +template +using IVisitorPtr = std::shared_ptr>; + +template +using VisitorList = std::vector>; + +} diff --git a/src/Functions/JSONPath/Generator/IVisitor.h b/src/Functions/JSONPath/Generator/IVisitor.h new file mode 100644 index 00000000000..1a94106a435 --- /dev/null +++ b/src/Functions/JSONPath/Generator/IVisitor.h @@ -0,0 +1,46 @@ +#pragma once + +#include + +namespace DB +{ +template +class IVisitor +{ +public: + virtual const char * getName() const = 0; + + /** + * Applies this visitor to document and mutates its state + * @param element simdjson element + */ + virtual VisitorStatus visit(typename JSONParser::Element & element) = 0; + + /** + * Applies this visitor to document, but does not mutate state + * @param element simdjson element + */ + virtual VisitorStatus apply(typename JSONParser::Element & element) const = 0; + + /** + * Restores visitor's initial state for later use + */ + virtual void reinitialize() = 0; + + virtual void updateState() = 0; + + bool isExhausted() { return is_exhausted; } + + void setExhausted(bool exhausted) { is_exhausted = exhausted; } + + virtual ~IVisitor() = default; + +private: + /** + * This variable is for detecting whether a visitor's next visit will be able + * to yield a new item. + */ + bool is_exhausted = false; +}; + +} diff --git a/src/Functions/JSONPath/Generator/VisitorJSONPathMemberAccess.h b/src/Functions/JSONPath/Generator/VisitorJSONPathMemberAccess.h new file mode 100644 index 00000000000..5fe35e75a84 --- /dev/null +++ b/src/Functions/JSONPath/Generator/VisitorJSONPathMemberAccess.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ +template +class VisitorJSONPathMemberAccess : public IVisitor +{ +public: + VisitorJSONPathMemberAccess(ASTPtr member_access_ptr_) + : member_access_ptr(member_access_ptr_->as()) { } + + const char * getName() const override { return "VisitorJSONPathMemberAccess"; } + + VisitorStatus apply(typename JSONParser::Element & element) const override + { + typename JSONParser::Element result; + element.getObject().find(std::string_view(member_access_ptr->member_name), result); + element = result; + return VisitorStatus::Ok; + } + + VisitorStatus visit(typename JSONParser::Element & element) override + { + this->setExhausted(true); + if (!element.isObject()) + { + return VisitorStatus::Error; + } + typename JSONParser::Element result; + if (!element.getObject().find(std::string_view(member_access_ptr->member_name), result)) + { + return VisitorStatus::Error; + } + apply(element); + return VisitorStatus::Ok; + } + + void reinitialize() override { this->setExhausted(false); } + + void updateState() override { } + +private: + ASTJSONPathMemberAccess * member_access_ptr; +}; + +} diff --git a/src/Functions/JSONPath/Generator/VisitorJSONPathRange.h b/src/Functions/JSONPath/Generator/VisitorJSONPathRange.h new file mode 100644 index 00000000000..40d4f6ad95e --- /dev/null +++ b/src/Functions/JSONPath/Generator/VisitorJSONPathRange.h @@ -0,0 +1,80 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ +template +class VisitorJSONPathRange : public IVisitor +{ +public: + VisitorJSONPathRange(ASTPtr range_ptr_) : range_ptr(range_ptr_->as()) + { + current_range = 0; + current_index = range_ptr->ranges[current_range].first; + } + + const char * getName() const override { return "VisitorJSONPathRange"; } + + VisitorStatus apply(typename JSONParser::Element & element) const override + { + typename JSONParser::Element result; + typename JSONParser::Array array = element.getArray(); + element = array[current_index]; + return VisitorStatus::Ok; + } + + VisitorStatus visit(typename JSONParser::Element & element) override + { + if (!element.isArray()) + { + this->setExhausted(true); + return VisitorStatus::Error; + } + + VisitorStatus status; + if (current_index < element.getArray().size()) + { + apply(element); + status = VisitorStatus::Ok; + } + else + { + status = VisitorStatus::Ignore; + } + + if (current_index + 1 == range_ptr->ranges[current_range].second + && current_range + 1 == range_ptr->ranges.size()) + { + this->setExhausted(true); + } + + return status; + } + + void reinitialize() override + { + current_range = 0; + current_index = range_ptr->ranges[current_range].first; + this->setExhausted(false); + } + + void updateState() override + { + current_index++; + if (current_index == range_ptr->ranges[current_range].second) + { + current_range++; + current_index = range_ptr->ranges[current_range].first; + } + } + +private: + ASTJSONPathRange * range_ptr; + size_t current_range; + UInt32 current_index; +}; + +} diff --git a/src/Functions/JSONPath/Generator/VisitorJSONPathRoot.h b/src/Functions/JSONPath/Generator/VisitorJSONPathRoot.h new file mode 100644 index 00000000000..5c48c12782f --- /dev/null +++ b/src/Functions/JSONPath/Generator/VisitorJSONPathRoot.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ +template +class VisitorJSONPathRoot : public IVisitor +{ +public: + VisitorJSONPathRoot(ASTPtr) { } + + const char * getName() const override { return "VisitorJSONPathRoot"; } + + VisitorStatus apply(typename JSONParser::Element & /*element*/) const override + { + /// No-op on document, since we are already passed document's root + return VisitorStatus::Ok; + } + + VisitorStatus visit(typename JSONParser::Element & element) override + { + apply(element); + this->setExhausted(true); + return VisitorStatus::Ok; + } + + void reinitialize() override { this->setExhausted(false); } + + void updateState() override { } +}; + +} diff --git a/src/Functions/JSONPath/Generator/VisitorJSONPathStar.h b/src/Functions/JSONPath/Generator/VisitorJSONPathStar.h new file mode 100644 index 00000000000..4a54a76c199 --- /dev/null +++ b/src/Functions/JSONPath/Generator/VisitorJSONPathStar.h @@ -0,0 +1,66 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ +template +class VisitorJSONPathStar : public IVisitor +{ +public: + VisitorJSONPathStar(ASTPtr) + { + current_index = 0; + } + + const char * getName() const override { return "VisitorJSONPathStar"; } + + VisitorStatus apply(typename JSONParser::Element & element) const override + { + typename JSONParser::Element result; + typename JSONParser::Array array = element.getArray(); + element = array[current_index]; + return VisitorStatus::Ok; + } + + VisitorStatus visit(typename JSONParser::Element & element) override + { + if (!element.isArray()) + { + this->setExhausted(true); + return VisitorStatus::Error; + } + + VisitorStatus status; + if (current_index < element.getArray().size()) + { + apply(element); + status = VisitorStatus::Ok; + } + else + { + status = VisitorStatus::Ignore; + this->setExhausted(true); + } + + return status; + } + + void reinitialize() override + { + current_index = 0; + this->setExhausted(false); + } + + void updateState() override + { + current_index++; + } + +private: + UInt32 current_index; +}; + +} diff --git a/src/Functions/JSONPath/Generator/VisitorStatus.h b/src/Functions/JSONPath/Generator/VisitorStatus.h new file mode 100644 index 00000000000..96b2ea72f18 --- /dev/null +++ b/src/Functions/JSONPath/Generator/VisitorStatus.h @@ -0,0 +1,13 @@ +#pragma once + +namespace DB +{ +enum VisitorStatus +{ + Ok, + Exhausted, + Error, + Ignore +}; + +} diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPath.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPath.cpp new file mode 100644 index 00000000000..003e97af38b --- /dev/null +++ b/src/Functions/JSONPath/Parsers/ParserJSONPath.cpp @@ -0,0 +1,31 @@ +#include +#include +#include +#include + +namespace DB +{ +/** + * Entry parser for JSONPath + */ +bool ParserJSONPath::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto ast_jsonpath = std::make_shared(); + ParserJSONPathQuery parser_jsonpath_query; + + /// Push back dot AST and brackets AST to query->children + ASTPtr query; + + bool res = parser_jsonpath_query.parse(pos, query, expected); + + if (res) + { + /// Set ASTJSONPathQuery of ASTJSONPath + ast_jsonpath->set(ast_jsonpath->jsonpath_query, query); + } + + node = ast_jsonpath; + return res; +} + +} diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPath.h b/src/Functions/JSONPath/Parsers/ParserJSONPath.h new file mode 100644 index 00000000000..7d2c2ad642c --- /dev/null +++ b/src/Functions/JSONPath/Parsers/ParserJSONPath.h @@ -0,0 +1,21 @@ +#pragma once + +#include + + +namespace DB +{ +/** + * Entry parser for JSONPath + */ +class ParserJSONPath : public IParserBase +{ +private: + const char * getName() const override { return "ParserJSONPath"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +public: + explicit ParserJSONPath() = default; +}; + +} diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp new file mode 100644 index 00000000000..c7f047eb8fb --- /dev/null +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp @@ -0,0 +1,42 @@ +#include +#include + +#include +#include +#include + +namespace DB +{ +/** + * + * @param pos token iterator + * @param node node of ASTJSONPathMemberAccess + * @param expected stuff for logging + * @return was parse successful + */ +bool ParserJSONPathMemberAccess::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (pos->type != TokenType::Dot) + { + return false; + } + ++pos; + + if (pos->type != TokenType::BareWord) + { + return false; + } + + ParserIdentifier name_p; + ASTPtr member_name; + if (!name_p.parse(pos, member_name, expected)) + { + return false; + } + + auto member_access = std::make_shared(); + node = member_access; + return tryGetIdentifierNameInto(member_name, member_access->member_name); +} + +} diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h new file mode 100644 index 00000000000..b28bf37d5ef --- /dev/null +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h @@ -0,0 +1,14 @@ +#pragma once + +#include + +namespace DB +{ +class ParserJSONPathMemberAccess : public IParserBase +{ + const char * getName() const override { return "ParserJSONPathMemberAccess"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp new file mode 100644 index 00000000000..c18b2ad9b31 --- /dev/null +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp @@ -0,0 +1,48 @@ +#include +#include +#include +#include +#include +#include + +namespace DB + +{ +/** + * + * @param pos token iterator + * @param query node of ASTJSONPathQuery + * @param expected stuff for logging + * @return was parse successful + */ +bool ParserJSONPathQuery::parseImpl(Pos & pos, ASTPtr & query, Expected & expected) +{ + query = std::make_shared(); + ParserJSONPathMemberAccess parser_jsonpath_member_access; + ParserJSONPathRange parser_jsonpath_range; + ParserJSONPathStar parser_jsonpath_star; + ParserJSONPathRoot parser_jsonpath_root; + + ASTPtr path_root; + if (!parser_jsonpath_root.parse(pos, path_root, expected)) + { + return false; + } + query->children.push_back(path_root); + + ASTPtr accessor; + while (parser_jsonpath_member_access.parse(pos, accessor, expected) + || parser_jsonpath_range.parse(pos, accessor, expected) + || parser_jsonpath_star.parse(pos, accessor, expected)) + { + if (accessor) + { + query->children.push_back(accessor); + accessor = nullptr; + } + } + /// parsing was successful if we reached the end of query by this point + return pos->type == TokenType::EndOfStream; +} + +} diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.h b/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.h new file mode 100644 index 00000000000..fbe7321562e --- /dev/null +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.h @@ -0,0 +1,14 @@ +#pragma once + +#include + + +namespace DB +{ +class ParserJSONPathQuery : public IParserBase +{ +protected: + const char * getName() const override { return "ParserJSONPathQuery"; } + bool parseImpl(Pos & pos, ASTPtr & query, Expected & expected) override; +}; +} diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp new file mode 100644 index 00000000000..bc153b9d747 --- /dev/null +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp @@ -0,0 +1,94 @@ +#include +#include +#include + +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} +/** + * + * @param pos token iterator + * @param node node of ASTJSONPathQuery + * @param expected stuff for logging + * @return was parse successful + */ +bool ParserJSONPathRange::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + + if (pos->type != TokenType::OpeningSquareBracket) + { + return false; + } + ++pos; + + auto range = std::make_shared(); + node = range; + + ParserNumber number_p; + ASTPtr number_ptr; + while (pos->type != TokenType::ClosingSquareBracket) + { + if (pos->type != TokenType::Number) + { + return false; + } + + std::pair range_indices; + if (!number_p.parse(pos, number_ptr, expected)) + { + return false; + } + range_indices.first = number_ptr->as()->value.get(); + + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingSquareBracket) + { + /// Single index case + range_indices.second = range_indices.first + 1; + } + else if (pos->type == TokenType::BareWord) + { + if (!ParserKeyword("TO").ignore(pos, expected)) + { + return false; + } + if (!number_p.parse(pos, number_ptr, expected)) + { + return false; + } + range_indices.second = number_ptr->as()->value.get(); + } + else + { + return false; + } + + if (range_indices.first >= range_indices.second) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Start of range must be greater than end of range, however {} >= {}", + range_indices.first, + range_indices.second); + } + + range->ranges.push_back(std::move(range_indices)); + if (pos->type != TokenType::ClosingSquareBracket) + { + ++pos; + } + } + ++pos; + + /// We can't have both ranges and star present, so parse was successful <=> exactly 1 of these conditions is true + return !range->ranges.empty() ^ range->is_star; +} + +} diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathRange.h b/src/Functions/JSONPath/Parsers/ParserJSONPathRange.h new file mode 100644 index 00000000000..94db29577ab --- /dev/null +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathRange.h @@ -0,0 +1,18 @@ +#pragma once + +#include + + +namespace DB +{ +class ParserJSONPathRange : public IParserBase +{ +private: + const char * getName() const override { return "ParserJSONPathRange"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +public: + explicit ParserJSONPathRange() = default; +}; + +} diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.cpp new file mode 100644 index 00000000000..86cf793fb52 --- /dev/null +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.cpp @@ -0,0 +1,27 @@ +#include +#include + +#include + +namespace DB +{ +/** + * + * @param pos token iterator + * @param node node of ASTJSONPathRoot + * @param expected stuff for logging + * @return was parse successful + */ +bool ParserJSONPathRoot::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (pos->type != TokenType::DollarSign) + { + expected.add(pos, "dollar sign (start of jsonpath)"); + return false; + } + node = std::make_shared(); + ++pos; + return true; +} + +} diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.h b/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.h new file mode 100644 index 00000000000..59fed28d63e --- /dev/null +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.h @@ -0,0 +1,18 @@ +#pragma once + +#include + + +namespace DB +{ +class ParserJSONPathRoot : public IParserBase +{ +private: + const char * getName() const override { return "ParserJSONPathRoot"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +public: + explicit ParserJSONPathRoot() = default; +}; + +} diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathStar.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathStar.cpp new file mode 100644 index 00000000000..1338a2064f1 --- /dev/null +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathStar.cpp @@ -0,0 +1,31 @@ +#include + +#include + +namespace DB +{ +bool ParserJSONPathStar::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (pos->type != TokenType::OpeningSquareBracket) + { + return false; + } + ++pos; + if (pos->type != TokenType::Asterisk) + { + return false; + } + ++pos; + if (pos->type != TokenType::ClosingSquareBracket) + { + expected.add(pos, "Closing square bracket"); + return false; + } + ++pos; + + node = std::make_shared(); + + return true; +} + +} diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathStar.h b/src/Functions/JSONPath/Parsers/ParserJSONPathStar.h new file mode 100644 index 00000000000..543823357de --- /dev/null +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathStar.h @@ -0,0 +1,18 @@ +#pragma once + +#include + + +namespace DB +{ +class ParserJSONPathStar : public IParserBase +{ +private: + const char * getName() const override { return "ParserJSONPathStar"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +public: + explicit ParserJSONPathStar() = default; +}; + +} diff --git a/src/Functions/SimdJSONParser.h b/src/Functions/SimdJSONParser.h index 7ff3c45130d..c5793088baf 100644 --- a/src/Functions/SimdJSONParser.h +++ b/src/Functions/SimdJSONParser.h @@ -50,6 +50,8 @@ struct SimdJSONParser ALWAYS_INLINE Array getArray() const; ALWAYS_INLINE Object getObject() const; + ALWAYS_INLINE simdjson::dom::element getElement() const { return element; } + private: simdjson::dom::element element; }; diff --git a/src/Functions/registerFunctions.cpp b/src/Functions/registerFunctions.cpp index b77e873b7aa..29343a871a8 100644 --- a/src/Functions/registerFunctions.cpp +++ b/src/Functions/registerFunctions.cpp @@ -40,6 +40,7 @@ void registerFunctionsGeo(FunctionFactory &); void registerFunctionsIntrospection(FunctionFactory &); void registerFunctionsNull(FunctionFactory &); void registerFunctionsJSON(FunctionFactory &); +void registerFunctionsSQLJSON(FunctionFactory &); void registerFunctionToJSONString(FunctionFactory &); void registerFunctionsConsistentHashing(FunctionFactory & factory); void registerFunctionsUnixTimestamp64(FunctionFactory & factory); @@ -99,6 +100,7 @@ void registerFunctions() registerFunctionsGeo(factory); registerFunctionsNull(factory); registerFunctionsJSON(factory); + registerFunctionsSQLJSON(factory); registerFunctionToJSONString(factory); registerFunctionsIntrospection(factory); registerFunctionsConsistentHashing(factory); diff --git a/src/Functions/ya.make b/src/Functions/ya.make index 4d8ddfcff00..d6da7eadd35 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -44,6 +44,7 @@ SRCS( FunctionFile.cpp FunctionHelpers.cpp FunctionJoinGet.cpp + FunctionSQLJSON.cpp FunctionsAES.cpp FunctionsCoding.cpp FunctionsConversion.cpp @@ -76,6 +77,12 @@ SRCS( GatherUtils/sliceFromRightConstantOffsetUnbounded.cpp GeoHash.cpp IFunction.cpp + JSONPath/Parsers/ParserJSONPath.cpp + JSONPath/Parsers/ParserJSONPathMemberAccess.cpp + JSONPath/Parsers/ParserJSONPathQuery.cpp + JSONPath/Parsers/ParserJSONPathRange.cpp + JSONPath/Parsers/ParserJSONPathRoot.cpp + JSONPath/Parsers/ParserJSONPathStar.cpp TargetSpecific.cpp URL/URLHierarchy.cpp URL/URLPathHierarchy.cpp diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 241abbd98de..be956ee705a 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -338,6 +338,11 @@ Token Lexer::nextTokenImpl() } default: + if (*pos == '$' && ((pos + 1 < end && !isWordCharASCII(pos[1])) || pos + 1 == end)) + { + /// Capture standalone dollar sign + return Token(TokenType::DollarSign, token_begin, ++pos); + } if (isWordCharASCII(*pos) || *pos == '$') { ++pos; diff --git a/src/Parsers/Lexer.h b/src/Parsers/Lexer.h index 447964355c8..1bcfbb3afb9 100644 --- a/src/Parsers/Lexer.h +++ b/src/Parsers/Lexer.h @@ -33,6 +33,7 @@ namespace DB \ M(Asterisk) /** Could be used as multiplication operator or on it's own: "SELECT *" */ \ \ + M(DollarSign) \ M(Plus) \ M(Minus) \ M(Slash) \ diff --git a/tests/queries/0_stateless/01889_sql_json_functions.reference b/tests/queries/0_stateless/01889_sql_json_functions.reference new file mode 100644 index 00000000000..593f2fb2d20 --- /dev/null +++ b/tests/queries/0_stateless/01889_sql_json_functions.reference @@ -0,0 +1,43 @@ +--JSON_VALUE-- + +1 +1.2 +true +"world" +null + + + + +--JSON_QUERY-- +[{"hello":1}] +[1] +[1.2] +[true] +["world"] +[null] +[["world","world2"]] +[{"world":"!"}] + + +[0, 1, 4, 0, -1, -4] +--JSON_EXISTS-- +1 +0 +1 +1 +1 +0 +1 +0 +0 +1 +1 +0 +1 +0 +1 +--MANY ROWS-- +0 ["Vasily", "Kostya"] +1 ["Tihon", "Ernest"] +2 ["Katya", "Anatoliy"] diff --git a/tests/queries/0_stateless/01889_sql_json_functions.sql b/tests/queries/0_stateless/01889_sql_json_functions.sql new file mode 100644 index 00000000000..1c5069ccfde --- /dev/null +++ b/tests/queries/0_stateless/01889_sql_json_functions.sql @@ -0,0 +1,50 @@ +SELECT '--JSON_VALUE--'; +SELECT JSON_VALUE('$', '{"hello":1}'); -- root is a complex object => default value (empty string) +SELECT JSON_VALUE('$.hello', '{"hello":1}'); +SELECT JSON_VALUE('$.hello', '{"hello":1.2}'); +SELECT JSON_VALUE('$.hello', '{"hello":true}'); +SELECT JSON_VALUE('$.hello', '{"hello":"world"}'); +SELECT JSON_VALUE('$.hello', '{"hello":null}'); +SELECT JSON_VALUE('$.hello', '{"hello":["world","world2"]}'); +SELECT JSON_VALUE('$.hello', '{"hello":{"world":"!"}}'); +SELECT JSON_VALUE('$.hello', '{hello:world}'); -- invalid json => default value (empty string) +SELECT JSON_VALUE('$.hello', ''); + +SELECT '--JSON_QUERY--'; +SELECT JSON_QUERY('$', '{"hello":1}'); +SELECT JSON_QUERY('$.hello', '{"hello":1}'); +SELECT JSON_QUERY('$.hello', '{"hello":1.2}'); +SELECT JSON_QUERY('$.hello', '{"hello":true}'); +SELECT JSON_QUERY('$.hello', '{"hello":"world"}'); +SELECT JSON_QUERY('$.hello', '{"hello":null}'); +SELECT JSON_QUERY('$.hello', '{"hello":["world","world2"]}'); +SELECT JSON_QUERY('$.hello', '{"hello":{"world":"!"}}'); +SELECT JSON_QUERY('$.hello', '{hello:{"world":"!"}}}'); -- invalid json => default value (empty string) +SELECT JSON_QUERY('$.hello', ''); +SELECT JSON_QUERY('$.array[*][0 to 2, 4]', '{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}'); + +SELECT '--JSON_EXISTS--'; +SELECT JSON_EXISTS('$', '{"hello":1}'); +SELECT JSON_EXISTS('$', ''); +SELECT JSON_EXISTS('$', '{}'); +SELECT JSON_EXISTS('$.hello', '{"hello":1}'); +SELECT JSON_EXISTS('$.world', '{"hello":1,"world":2}'); +SELECT JSON_EXISTS('$.world', '{"hello":{"world":1}}'); +SELECT JSON_EXISTS('$.hello.world', '{"hello":{"world":1}}'); +SELECT JSON_EXISTS('$.hello', '{hello:world}'); -- invalid json => default value (zero integer) +SELECT JSON_EXISTS('$.hello', ''); +SELECT JSON_EXISTS('$.hello[*]', '{"hello":["world"]}'); +SELECT JSON_EXISTS('$.hello[0]', '{"hello":["world"]}'); +SELECT JSON_EXISTS('$.hello[1]', '{"hello":["world"]}'); +SELECT JSON_EXISTS('$.a[*].b', '{"a":[{"b":1},{"c":2}]}'); +SELECT JSON_EXISTS('$.a[*].f', '{"a":[{"b":1},{"c":2}]}'); +SELECT JSON_EXISTS('$.a[*][0].h', '{"a":[[{"b":1}, {"g":1}],[{"h":1},{"y":1}]]}'); + +SELECT '--MANY ROWS--'; +DROP TABLE IF EXISTS 01889_sql_json; +CREATE TABLE 01889_sql_json (id UInt8, json String) ENGINE = MergeTree ORDER BY id; +INSERT INTO 01889_sql_json(id, json) VALUES(0, '{"name":"Ivan","surname":"Ivanov","friends":["Vasily","Kostya","Artyom"]}'); +INSERT INTO 01889_sql_json(id, json) VALUES(1, '{"name":"Katya","surname":"Baltica","friends":["Tihon","Ernest","Innokentiy"]}'); +INSERT INTO 01889_sql_json(id, json) VALUES(2, '{"name":"Vitali","surname":"Brown","friends":["Katya","Anatoliy","Ivan","Oleg"]}'); +SELECT id, JSON_QUERY('$.friends[0 to 2]', json) FROM 01889_sql_json ORDER BY id; +DROP TABLE 01889_sql_json;