add batch visitor

This commit is contained in:
zhanglistar 2024-11-20 15:06:09 +08:00
parent 93464f52f4
commit 7f6dcb854b
11 changed files with 258 additions and 47 deletions

View File

@ -87,7 +87,7 @@ struct DummyJSONParser
static Iterator end() { return {}; } static Iterator end() { return {}; }
static size_t size() { return 0; } static size_t size() { return 0; }
bool find(std::string_view, Element &) const { return false; } /// NOLINT bool find(std::string_view, Element &) const { return false; } /// NOLINT
bool reset() { return true; }
#if 0 #if 0
/// Optional: Provides access to an object's element by index. /// Optional: Provides access to an object's element by index.
KeyValuePair operator[](size_t) const { return {}; } KeyValuePair operator[](size_t) const { return {}; }

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include "Common/StackTrace.h"
#include "config.h" #include "config.h"
#if USE_SIMDJSON #if USE_SIMDJSON
@ -590,11 +591,13 @@ struct OnDemandSimdJSONParser
} }
ALWAYS_INLINE Array getArray() const ALWAYS_INLINE Array getArray() const
{ {
return value.get_array().value(); SIMDJSON_ASSIGN_OR_THROW(auto arr, value.get_array());
return arr;
} }
ALWAYS_INLINE Object getObject() const ALWAYS_INLINE Object getObject() const
{ {
return value.get_object().value(); SIMDJSON_ASSIGN_OR_THROW(auto obj, value.get_object());
return obj;
} }
ALWAYS_INLINE simdjson::ondemand::value getElement() const { return value; } ALWAYS_INLINE simdjson::ondemand::value getElement() const { return value; }
@ -610,6 +613,7 @@ struct OnDemandSimdJSONParser
class Iterator class Iterator
{ {
public: public:
Iterator() = default;
ALWAYS_INLINE Iterator(const simdjson::ondemand::array_iterator & it_) : it(it_) {} /// NOLINT ALWAYS_INLINE Iterator(const simdjson::ondemand::array_iterator & it_) : it(it_) {} /// NOLINT
ALWAYS_INLINE Element operator*() const { return (*it).value(); } ALWAYS_INLINE Element operator*() const { return (*it).value(); }
ALWAYS_INLINE Iterator & operator++() { ++it; return *this; } ALWAYS_INLINE Iterator & operator++() { ++it; return *this; }
@ -623,9 +627,26 @@ struct OnDemandSimdJSONParser
ALWAYS_INLINE Iterator begin() const { return array.begin().value(); } ALWAYS_INLINE Iterator begin() const { return array.begin().value(); }
ALWAYS_INLINE Iterator end() const { return array.end().value(); } ALWAYS_INLINE Iterator end() const { return array.end().value(); }
ALWAYS_INLINE size_t size() const { return array.count_elements().value(); } ALWAYS_INLINE size_t size() const { return array.count_elements().value(); }
ALWAYS_INLINE Element operator[](size_t index) const { return array.at(index).value(); } ALWAYS_INLINE Element operator[](size_t index) const
{
if (index < last_index)
array.reset();
if (last_index == 0)
{
SIMDJSON_ASSIGN_OR_THROW(auto iter, array.begin());
it = iter;
}
size_t diff = index - last_index;
while (diff--)
++it;
last_index = index;
SIMDJSON_ASSIGN_OR_THROW(auto ele, *it);
return ele;
}
private: private:
mutable size_t last_index{};
mutable simdjson::ondemand::array_iterator it;
mutable simdjson::ondemand::array array; mutable simdjson::ondemand::array array;
}; };
@ -642,8 +663,18 @@ struct OnDemandSimdJSONParser
ALWAYS_INLINE KeyValuePair operator*() const ALWAYS_INLINE KeyValuePair operator*() const
{ {
SIMDJSON_ASSIGN_OR_THROW(auto field_wrapper, *it); //SIMDJSON_ASSIGN_OR_THROW(auto field_wrapper, *it);
SIMDJSON_ASSIGN_OR_THROW(std::string_view key, field_wrapper.unescaped_key()); auto field_wrapper = *it;
if (field_wrapper.error())
{
return {};
}
std::string_view key;
auto key_error = field_wrapper.unescaped_key().get(key);
if (key_error)
{
return {};
}
::simdjson::ondemand::value v = field_wrapper.value(); ::simdjson::ondemand::value v = field_wrapper.value();
return {key, Element(std::move(v))}; return {key, Element(std::move(v))};
} }
@ -674,6 +705,14 @@ struct OnDemandSimdJSONParser
return true; return true;
} }
bool reset()
{
auto v = object.reset();
if (v.error())
return false;
return true;
}
/// Optional: Provides access to an object's element by index. /// Optional: Provides access to an object's element by index.
KeyValuePair operator[](size_t index) const KeyValuePair operator[](size_t index) const
{ {

View File

@ -102,8 +102,12 @@ public:
} }
/// serialize the json element into column's buffer directly /// serialize the json element into column's buffer directly
void addElement(const Element & element) void addElement(const Element & element, const std::string sep = ", ")
{ {
if (is_first)
is_first = false;
else
addRawData(sep.data(), sep.size());
formatter.append(element.getElement()); formatter.append(element.getElement());
} }
void commit() void commit()
@ -121,7 +125,7 @@ private:
IColumn::Offsets & offsets; IColumn::Offsets & offsets;
Formatter formatter; Formatter formatter;
size_t prev_offset; size_t prev_offset;
bool is_first{true};
}; };
@ -364,7 +368,7 @@ public:
/// Here it is possible to handle errors with ON ERROR (as described in ISO/IEC TR 19075-6), /// Here it is possible to handle errors with ON ERROR (as described in ISO/IEC TR 19075-6),
/// however this functionality is not implemented yet /// however this functionality is not implemented yet
} }
current_element = root; //current_element = root;
} }
if (status == VisitorStatus::Exhausted) if (status == VisitorStatus::Exhausted)
@ -416,19 +420,16 @@ public:
bool success = false; bool success = false;
const char * array_begin = "["; const char * array_begin = "[";
const char * array_end = "]"; const char * array_end = "]";
const char * comma = ", "; //const char * comma = ", ";
JSONStringSerializer json_serializer(col_str); JSONStringSerializer json_serializer(col_str);
json_serializer.addRawData(array_begin, 1); json_serializer.addRawData(array_begin, 1);
while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted) //std::vector<Element> result{};
std::function<void(const Element&)> result_func= [&json_serializer](const Element & element) { json_serializer.addElement(element); };
while ((status = generator_json_path.getNextItemBatch(current_element, result_func)) != VisitorStatus::Exhausted)
{ {
if (status == VisitorStatus::Ok) if (status == VisitorStatus::Ok)
{ {
if (success)
{
json_serializer.addRawData(comma, 2);
}
success = true; success = true;
json_serializer.addElement(current_element);
} }
else if (status == VisitorStatus::Error) else if (status == VisitorStatus::Error)
{ {
@ -436,7 +437,8 @@ public:
/// Here it is possible to handle errors with ON ERROR (as described in ISO/IEC TR 19075-6), /// Here it is possible to handle errors with ON ERROR (as described in ISO/IEC TR 19075-6),
/// however this functionality is not implemented yet /// however this functionality is not implemented yet
} }
current_element = root; //current_element = root;
//result.clear();
} }
if (!success) if (!success)
{ {

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include <utility>
#include <Functions/JSONPath/Generator/IGenerator.h> #include <Functions/JSONPath/Generator/IGenerator.h>
#include <Functions/JSONPath/Generator/VisitorJSONPathMemberAccess.h> #include <Functions/JSONPath/Generator/VisitorJSONPathMemberAccess.h>
#include <Functions/JSONPath/Generator/VisitorJSONPathRange.h> #include <Functions/JSONPath/Generator/VisitorJSONPathRange.h>
@ -21,6 +22,7 @@ template <typename JSONParser>
class GeneratorJSONPath : public IGenerator<JSONParser> class GeneratorJSONPath : public IGenerator<JSONParser>
{ {
public: public:
using TElement = typename JSONParser::Element;
/** /**
* Traverses children ASTs of ASTJSONPathQuery and creates a vector of corresponding visitors * Traverses children ASTs of ASTJSONPathQuery and creates a vector of corresponding visitors
* @param query_ptr_ pointer to ASTJSONPathQuery * @param query_ptr_ pointer to ASTJSONPathQuery
@ -80,11 +82,6 @@ public:
return VisitorStatus::Exhausted; return VisitorStatus::Exhausted;
} }
for (int i = 0; i < current_visitor; ++i)
{
visitors[i]->apply(current);
}
VisitorStatus status = VisitorStatus::Error; VisitorStatus status = VisitorStatus::Error;
for (size_t i = current_visitor; i < visitors.size(); ++i) for (size_t i = current_visitor; i < visitors.size(); ++i)
{ {
@ -105,6 +102,31 @@ public:
} }
} }
VisitorStatus getNextItemBatch(TElement & element, std::function<void(const TElement &)> & res_func) override
{
while (true)
{
/// element passed to us actually is root, so here we assign current to root
auto current = element;
if (current_visitor < 0)
return VisitorStatus::Exhausted;
VisitorStatus status = VisitorStatus::Error;
size_t visitor_size = visitors.size();
for (size_t i = current_visitor; i < visitor_size; ++i)
{
status = visitors[i]->visitBatch(current, res_func, i == visitor_size - 1);
current_visitor = static_cast<int>(i);
if (status == VisitorStatus::Error || status == VisitorStatus::Ignore)
break;
}
updateVisitorsForNextRun();
if (status != VisitorStatus::Ignore)
return status;
}
}
void reinitialize() void reinitialize()
{ {
while (current_visitor >= 0) while (current_visitor >= 0)

View File

@ -10,6 +10,8 @@ template <typename JSONParser>
class IGenerator class IGenerator
{ {
public: public:
using TElement = typename JSONParser::Element;
IGenerator() = default; IGenerator() = default;
virtual const char * getName() const = 0; virtual const char * getName() const = 0;
@ -20,7 +22,9 @@ public:
* @param element to be extracted into * @param element to be extracted into
* @return true if generator is not exhausted * @return true if generator is not exhausted
*/ */
virtual VisitorStatus getNextItem(typename JSONParser::Element & element) = 0; virtual VisitorStatus getNextItem(TElement & element) = 0;
virtual VisitorStatus getNextItemBatch(TElement & element, std::function<void(const TElement &)> & ) = 0;
virtual ~IGenerator() = default; virtual ~IGenerator() = default;
}; };

View File

@ -8,6 +8,7 @@ template <typename JSONParser>
class IVisitor class IVisitor
{ {
public: public:
using TElement = typename JSONParser::Element;
virtual const char * getName() const = 0; virtual const char * getName() const = 0;
/** /**
@ -15,12 +16,13 @@ public:
* @param element simdjson element * @param element simdjson element
*/ */
virtual VisitorStatus visit(typename JSONParser::Element & element) = 0; virtual VisitorStatus visit(typename JSONParser::Element & element) = 0;
virtual VisitorStatus visitBatch(TElement & element, std::function<void(const TElement &)> & , bool ) = 0;
/** /**
* Applies this visitor to document, but does not mutate state * Applies this visitor to document, but does not mutate state
* @param element simdjson element * @param element simdjson element
*/ */
virtual VisitorStatus apply(typename JSONParser::Element & element) const = 0; virtual VisitorStatus apply(typename JSONParser::Element & element) = 0;
/** /**
* Restores visitor's initial state for later use * Restores visitor's initial state for later use

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include <optional>
#include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h> #include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h>
#include <Functions/JSONPath/Generator/IVisitor.h> #include <Functions/JSONPath/Generator/IVisitor.h>
#include <Functions/JSONPath/Generator/VisitorStatus.h> #include <Functions/JSONPath/Generator/VisitorStatus.h>
@ -10,33 +11,51 @@ template <typename JSONParser>
class VisitorJSONPathMemberAccess : public IVisitor<JSONParser> class VisitorJSONPathMemberAccess : public IVisitor<JSONParser>
{ {
public: public:
using TElement = JSONParser::Element;
explicit VisitorJSONPathMemberAccess(ASTPtr member_access_ptr_) explicit VisitorJSONPathMemberAccess(ASTPtr member_access_ptr_)
: member_access_ptr(member_access_ptr_->as<ASTJSONPathMemberAccess>()) { } : member_access_ptr(member_access_ptr_->as<ASTJSONPathMemberAccess>()) { }
const char * getName() const override { return "VisitorJSONPathMemberAccess"; } const char * getName() const override { return "VisitorJSONPathMemberAccess"; }
VisitorStatus apply(typename JSONParser::Element & element) const override VisitorStatus apply(typename JSONParser::Element & element) override
{ {
typename JSONParser::Element result; typename JSONParser::Element result;
element.getObject().find(std::string_view(member_access_ptr->member_name), result); auto obj = element.getObject();
element = result; if (!obj.find(std::string_view(member_access_ptr->member_name), result))
{
return VisitorStatus::Error;
}
element = std::move(result);
return VisitorStatus::Ok; return VisitorStatus::Ok;
} }
VisitorStatus visit(typename JSONParser::Element & element) override VisitorStatus visit(typename JSONParser::Element & element) override
{ {
if (this->isExhausted())
{
return VisitorStatus::Exhausted;
}
this->setExhausted(true); this->setExhausted(true);
if (!element.isObject()) if (!element.isObject())
{ {
return VisitorStatus::Error; return VisitorStatus::Error;
} }
typename JSONParser::Element result; return apply(element);
if (!element.getObject().find(std::string_view(member_access_ptr->member_name), result))
{
return VisitorStatus::Error;
} }
apply(element);
return VisitorStatus::Ok; VisitorStatus visitBatch(TElement & element, std::function<void(const TElement &)> & res_func, bool can_reduce) override
{
if (this->isExhausted())
return VisitorStatus::Exhausted;
this->setExhausted(true);
if (!element.isObject())
return VisitorStatus::Error;
auto status = apply(element);
if (status == VisitorStatus::Ok && can_reduce)
res_func(element);
return status;
} }
void reinitialize() override { this->setExhausted(false); } void reinitialize() override { this->setExhausted(false); }

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include <optional>
#include <Functions/JSONPath/ASTs/ASTJSONPathRange.h> #include <Functions/JSONPath/ASTs/ASTJSONPathRange.h>
#include <Functions/JSONPath/Generator/IVisitor.h> #include <Functions/JSONPath/Generator/IVisitor.h>
#include <Functions/JSONPath/Generator/VisitorStatus.h> #include <Functions/JSONPath/Generator/VisitorStatus.h>
@ -18,23 +19,80 @@ public:
const char * getName() const override { return "VisitorJSONPathRange"; } const char * getName() const override { return "VisitorJSONPathRange"; }
VisitorStatus apply(typename JSONParser::Element & element) const override VisitorStatus apply(typename JSONParser::Element & element) override
{ {
typename JSONParser::Array array = element.getArray(); element = (*array)[current_index];
element = array[current_index];
return VisitorStatus::Ok; return VisitorStatus::Ok;
} }
using TElement = JSONParser::Element;
VisitorStatus visitBatch(TElement & element, std::function<void(const TElement &)> & res_func, bool can_reduce) override
{
if (!array && !element.isArray())
{
this->setExhausted(true);
return VisitorStatus::Error;
}
VisitorStatus status = VisitorStatus::Ok;
if (!array)
{
current_element = element;
array = current_element.getArray();
if (!can_reduce)
array_size = array.value().size();
}
if (can_reduce)
{
std::set<size_t> index_set{};
for (auto range: range_ptr->ranges)
for (size_t i = range.first ; i < range.second; ++i)
index_set.insert(i);
size_t idx = 0;
for (auto item: array.value())
if (index_set.find(idx++) != index_set.end())
res_func(item);
this->setExhausted(true);
status = VisitorStatus::Ok;
}
else
{
if (current_index < array_size.value())
{
apply(element);
status = VisitorStatus::Ok;
}
else
status = VisitorStatus::Ignore;
if (current_index + 1 == range_ptr->ranges[current_range].second
&& current_range + 1 == range_ptr->ranges.size())
{
this->setExhausted(true);
}
}
return status;
}
VisitorStatus visit(typename JSONParser::Element & element) override VisitorStatus visit(typename JSONParser::Element & element) override
{ {
if (!element.isArray()) if (!array && !element.isArray())
{ {
this->setExhausted(true); this->setExhausted(true);
return VisitorStatus::Error; return VisitorStatus::Error;
} }
VisitorStatus status; VisitorStatus status;
if (current_index < element.getArray().size()) if (!array)
{
current_element = element;
array = current_element.getArray();
array_size = array.value().size();
}
if (current_index < array_size.value())
{ {
apply(element); apply(element);
status = VisitorStatus::Ok; status = VisitorStatus::Ok;
@ -58,6 +116,8 @@ public:
current_range = 0; current_range = 0;
current_index = range_ptr->ranges[current_range].first; current_index = range_ptr->ranges[current_range].first;
this->setExhausted(false); this->setExhausted(false);
array_size.reset();
array.reset();
} }
void updateState() override void updateState() override
@ -74,6 +134,9 @@ private:
ASTJSONPathRange * range_ptr; ASTJSONPathRange * range_ptr;
size_t current_range; size_t current_range;
UInt32 current_index; UInt32 current_index;
std::optional<size_t> array_size{};
std::optional<typename JSONParser::Array> array{};
typename JSONParser::Element current_element{};
}; };
} }

View File

@ -10,11 +10,12 @@ template <typename JSONParser>
class VisitorJSONPathRoot : public IVisitor<JSONParser> class VisitorJSONPathRoot : public IVisitor<JSONParser>
{ {
public: public:
using TElement = JSONParser::Element;
explicit VisitorJSONPathRoot(ASTPtr) { } explicit VisitorJSONPathRoot(ASTPtr) { }
const char * getName() const override { return "VisitorJSONPathRoot"; } const char * getName() const override { return "VisitorJSONPathRoot"; }
VisitorStatus apply(typename JSONParser::Element & /*element*/) const override VisitorStatus apply(typename JSONParser::Element & /*element*/) override
{ {
/// No-op on document, since we are already passed document's root /// No-op on document, since we are already passed document's root
return VisitorStatus::Ok; return VisitorStatus::Ok;
@ -27,9 +28,19 @@ public:
return VisitorStatus::Ok; return VisitorStatus::Ok;
} }
VisitorStatus visitBatch(TElement & element, std::function<void(const TElement &)> & res_func, bool can_reduce) override
{
apply(element);
this->setExhausted(true);
if (can_reduce)
res_func(element);
return VisitorStatus::Ok;
}
void reinitialize() override { this->setExhausted(false); } void reinitialize() override { this->setExhausted(false); }
void updateState() override { } void updateState() override { }
}; };
} }

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include <optional>
#include <Functions/JSONPath/ASTs/ASTJSONPathStar.h> #include <Functions/JSONPath/ASTs/ASTJSONPathStar.h>
#include <Functions/JSONPath/Generator/IVisitor.h> #include <Functions/JSONPath/Generator/IVisitor.h>
#include <Functions/JSONPath/Generator/VisitorStatus.h> #include <Functions/JSONPath/Generator/VisitorStatus.h>
@ -17,23 +18,27 @@ public:
const char * getName() const override { return "VisitorJSONPathStar"; } const char * getName() const override { return "VisitorJSONPathStar"; }
VisitorStatus apply(typename JSONParser::Element & element) const override VisitorStatus apply(typename JSONParser::Element & element) override
{ {
typename JSONParser::Array array = element.getArray(); element = array.value()[current_index];
element = array[current_index];
return VisitorStatus::Ok; return VisitorStatus::Ok;
} }
VisitorStatus visit(typename JSONParser::Element & element) override VisitorStatus visit(typename JSONParser::Element & element) override
{ {
if (!element.isArray()) if (!array && !element.isArray())
{ {
this->setExhausted(true); this->setExhausted(true);
return VisitorStatus::Error; return VisitorStatus::Error;
} }
if (!array_size)
{
array = element.getArray();
array_size = array.value().size();
}
VisitorStatus status; VisitorStatus status;
if (current_index < element.getArray().size()) if (current_index < array_size.value())
{ {
apply(element); apply(element);
status = VisitorStatus::Ok; status = VisitorStatus::Ok;
@ -46,11 +51,53 @@ public:
return status; return status;
} }
using TElement = JSONParser::Element;
VisitorStatus visitBatch(TElement & element, std::function<void(const TElement &)> & res_func, bool can_reduce) override
{
if (!array && !element.isArray())
{
this->setExhausted(true);
return VisitorStatus::Error;
}
if (!array)
{
array = element.getArray();
array_size = array.value().size();
}
VisitorStatus status = VisitorStatus::Ok;
if (can_reduce)
{
for (auto item: array.value())
res_func(item);
this->setExhausted(true);
}
else
{
if (current_index < array_size.value())
{
apply(element);
status = VisitorStatus::Ok;
}
else
{
status = VisitorStatus::Ignore;
this->setExhausted(true);
}
}
return status;
}
void reinitialize() override void reinitialize() override
{ {
current_index = 0; current_index = 0;
this->setExhausted(false); this->setExhausted(false);
array_size.reset();
array.reset();
} }
void updateState() override void updateState() override
@ -59,7 +106,9 @@ public:
} }
private: private:
UInt32 current_index; UInt32 current_index{};
std::optional<typename JSONParser::Array> array{};
std::optional<size_t> array_size{};
}; };
} }