mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 17:12:03 +00:00
Merge pull request #54613 from bigo-sg/improve_json_query
Improve json sql functions by serializing json element into column's buffer direclty
This commit is contained in:
commit
51851ecc21
@ -8,6 +8,9 @@
|
|||||||
# include <base/defines.h>
|
# include <base/defines.h>
|
||||||
# include <simdjson.h>
|
# include <simdjson.h>
|
||||||
# include "ElementTypes.h"
|
# include "ElementTypes.h"
|
||||||
|
# include <Common/PODArray_fwd.h>
|
||||||
|
# include <Common/PODArray.h>
|
||||||
|
# include <charconv>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -16,6 +19,254 @@ namespace ErrorCodes
|
|||||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Format elements of basic types into string.
|
||||||
|
/// The original implementation is mini_formatter in simdjson.h. But it is not public API, so we
|
||||||
|
/// add a implementation here.
|
||||||
|
class SimdJSONBasicFormatter
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit SimdJSONBasicFormatter(PaddedPODArray<UInt8> & buffer_) : buffer(buffer_) {}
|
||||||
|
inline void comma() { oneChar(','); }
|
||||||
|
/** Start an array, prints [ **/
|
||||||
|
inline void startArray() { oneChar('['); }
|
||||||
|
/** End an array, prints ] **/
|
||||||
|
inline void endArray() { oneChar(']'); }
|
||||||
|
/** Start an array, prints { **/
|
||||||
|
inline void startObject() { oneChar('{'); }
|
||||||
|
/** Start an array, prints } **/
|
||||||
|
inline void endObject() { oneChar('}'); }
|
||||||
|
/** Prints a true **/
|
||||||
|
inline void trueAtom()
|
||||||
|
{
|
||||||
|
const char * s = "true";
|
||||||
|
buffer.insert(s, s + 4);
|
||||||
|
}
|
||||||
|
/** Prints a false **/
|
||||||
|
inline void falseAtom()
|
||||||
|
{
|
||||||
|
const char * s = "false";
|
||||||
|
buffer.insert(s, s + 5);
|
||||||
|
}
|
||||||
|
/** Prints a null **/
|
||||||
|
inline void nullAtom()
|
||||||
|
{
|
||||||
|
const char * s = "null";
|
||||||
|
buffer.insert(s, s + 4);
|
||||||
|
}
|
||||||
|
/** Prints a number **/
|
||||||
|
inline void number(int64_t x)
|
||||||
|
{
|
||||||
|
char number_buffer[24];
|
||||||
|
auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
|
||||||
|
buffer.insert(number_buffer, res.ptr);
|
||||||
|
}
|
||||||
|
/** Prints a number **/
|
||||||
|
inline void number(uint64_t x)
|
||||||
|
{
|
||||||
|
char number_buffer[24];
|
||||||
|
auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
|
||||||
|
buffer.insert(number_buffer, res.ptr);
|
||||||
|
}
|
||||||
|
/** Prints a number **/
|
||||||
|
inline void number(double x)
|
||||||
|
{
|
||||||
|
char number_buffer[24];
|
||||||
|
auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
|
||||||
|
buffer.insert(number_buffer, res.ptr);
|
||||||
|
}
|
||||||
|
/** Prints a key (string + colon) **/
|
||||||
|
inline void key(std::string_view unescaped)
|
||||||
|
{
|
||||||
|
string(unescaped);
|
||||||
|
oneChar(':');
|
||||||
|
}
|
||||||
|
/** Prints a string. The string is escaped as needed. **/
|
||||||
|
inline void string(std::string_view unescaped)
|
||||||
|
{
|
||||||
|
oneChar('\"');
|
||||||
|
size_t i = 0;
|
||||||
|
// Fast path for the case where we have no control character, no ", and no backslash.
|
||||||
|
// This should include most keys.
|
||||||
|
//
|
||||||
|
// We would like to use 'bool' but some compilers take offense to bitwise operation
|
||||||
|
// with bool types.
|
||||||
|
constexpr static char needs_escaping[] = {
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||||
|
for (; i + 8 <= unescaped.length(); i += 8)
|
||||||
|
{
|
||||||
|
// Poor's man vectorization. This could get much faster if we used SIMD.
|
||||||
|
//
|
||||||
|
// It is not the case that replacing '|' with '||' would be neutral performance-wise.
|
||||||
|
if (needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i + 1])]
|
||||||
|
| needs_escaping[uint8_t(unescaped[i + 2])] | needs_escaping[uint8_t(unescaped[i + 3])]
|
||||||
|
| needs_escaping[uint8_t(unescaped[i + 4])] | needs_escaping[uint8_t(unescaped[i + 5])]
|
||||||
|
| needs_escaping[uint8_t(unescaped[i + 6])] | needs_escaping[uint8_t(unescaped[i + 7])])
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (; i < unescaped.length(); i++)
|
||||||
|
{
|
||||||
|
if (needs_escaping[uint8_t(unescaped[i])])
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// The following is also possible and omits a 256-byte table, but it is slower:
|
||||||
|
// for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F)
|
||||||
|
// && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {}
|
||||||
|
|
||||||
|
// At least for long strings, the following should be fast. We could
|
||||||
|
// do better by integrating the checks and the insertion.
|
||||||
|
buffer.insert(unescaped.data(), unescaped.data() + i);
|
||||||
|
// We caught a control character if we enter this loop (slow).
|
||||||
|
// Note that we are do not restart from the beginning, but rather we continue
|
||||||
|
// from the point where we encountered something that requires escaping.
|
||||||
|
for (; i < unescaped.length(); i++)
|
||||||
|
{
|
||||||
|
switch (unescaped[i])
|
||||||
|
{
|
||||||
|
case '\"': {
|
||||||
|
const char * s = "\\\"";
|
||||||
|
buffer.insert(s, s + 2);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case '\\': {
|
||||||
|
const char * s = "\\\\";
|
||||||
|
buffer.insert(s, s + 2);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
if (uint8_t(unescaped[i]) <= 0x1F)
|
||||||
|
{
|
||||||
|
// If packed, this uses 8 * 32 bytes.
|
||||||
|
// Note that we expect most compilers to embed this code in the data
|
||||||
|
// section.
|
||||||
|
constexpr static simdjson::escape_sequence escaped[32] = {
|
||||||
|
{6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"}, {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"},
|
||||||
|
{6, "\\u0007"}, {2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"}, {2, "\\f"}, {2, "\\r"},
|
||||||
|
{6, "\\u000e"}, {6, "\\u000f"}, {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"}, {6, "\\u0014"},
|
||||||
|
{6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"}, {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"},
|
||||||
|
{6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}};
|
||||||
|
auto u = escaped[uint8_t(unescaped[i])];
|
||||||
|
buffer.insert(u.string, u.string + u.length);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
oneChar(unescaped[i]);
|
||||||
|
}
|
||||||
|
} // switch
|
||||||
|
} // for
|
||||||
|
oneChar('\"');
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void oneChar(char c)
|
||||||
|
{
|
||||||
|
buffer.push_back(c);
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
PaddedPODArray<UInt8> & buffer;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/// Format object elements into string, element, array, object, kv-pair.
|
||||||
|
/// Similar to string_builder in simdjson.h.
|
||||||
|
class SimdJSONElementFormatter
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit SimdJSONElementFormatter(PaddedPODArray<UInt8> & buffer_) : format(buffer_) {}
|
||||||
|
/** Append an element to the builder (to be printed) **/
|
||||||
|
inline void append(simdjson::dom::element value)
|
||||||
|
{
|
||||||
|
switch (value.type())
|
||||||
|
{
|
||||||
|
case simdjson::dom::element_type::UINT64: {
|
||||||
|
format.number(value.get_uint64().value_unsafe());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case simdjson::dom::element_type::INT64: {
|
||||||
|
format.number(value.get_int64().value_unsafe());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case simdjson::dom::element_type::DOUBLE: {
|
||||||
|
format.number(value.get_double().value_unsafe());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case simdjson::dom::element_type::STRING: {
|
||||||
|
format.string(value.get_string().value_unsafe());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case simdjson::dom::element_type::BOOL: {
|
||||||
|
if (value.get_bool().value_unsafe())
|
||||||
|
format.trueAtom();
|
||||||
|
else
|
||||||
|
format.falseAtom();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case simdjson::dom::element_type::NULL_VALUE: {
|
||||||
|
format.nullAtom();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case simdjson::dom::element_type::ARRAY: {
|
||||||
|
append(value.get_array().value_unsafe());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case simdjson::dom::element_type::OBJECT: {
|
||||||
|
append(value.get_object().value_unsafe());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/** Append an array to the builder (to be printed) **/
|
||||||
|
inline void append(simdjson::dom::array value)
|
||||||
|
{
|
||||||
|
format.startArray();
|
||||||
|
auto iter = value.begin();
|
||||||
|
auto end = value.end();
|
||||||
|
if (iter != end)
|
||||||
|
{
|
||||||
|
append(*iter);
|
||||||
|
for (++iter; iter != end; ++iter)
|
||||||
|
{
|
||||||
|
format.comma();
|
||||||
|
append(*iter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
format.endArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void append(simdjson::dom::object value)
|
||||||
|
{
|
||||||
|
format.startObject();
|
||||||
|
auto pair = value.begin();
|
||||||
|
auto end = value.end();
|
||||||
|
if (pair != end)
|
||||||
|
{
|
||||||
|
append(*pair);
|
||||||
|
for (++pair; pair != end; ++pair)
|
||||||
|
{
|
||||||
|
format.comma();
|
||||||
|
append(*pair);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
format.endObject();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void append(simdjson::dom::key_value_pair kv)
|
||||||
|
{
|
||||||
|
format.key(kv.key);
|
||||||
|
append(kv.value);
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
SimdJSONBasicFormatter format;
|
||||||
|
};
|
||||||
|
|
||||||
/// This class can be used as an argument for the template class FunctionJSON.
|
/// This class can be used as an argument for the template class FunctionJSON.
|
||||||
/// It provides ability to parse JSONs using simdjson library.
|
/// It provides ability to parse JSONs using simdjson library.
|
||||||
struct SimdJSONParser
|
struct SimdJSONParser
|
||||||
|
@ -35,10 +35,92 @@ extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
|
|||||||
extern const int BAD_ARGUMENTS;
|
extern const int BAD_ARGUMENTS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Have implemented the operator << for json elements. So we could use stringstream to serialize json elements.
|
||||||
|
/// But stingstream have bad performance, not recommend to use it.
|
||||||
|
template <typename Element>
|
||||||
|
class DefaultJSONStringSerializer
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit DefaultJSONStringSerializer(ColumnString & col_str_) : col_str(col_str_) { }
|
||||||
|
|
||||||
|
inline void addRawData(const char * ptr, size_t len)
|
||||||
|
{
|
||||||
|
out << std::string_view(ptr, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void addRawString(std::string_view str)
|
||||||
|
{
|
||||||
|
out << str;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// serialize the json element into stringstream
|
||||||
|
inline void addElement(const Element & element)
|
||||||
|
{
|
||||||
|
out << element.getElement();
|
||||||
|
}
|
||||||
|
inline void commit()
|
||||||
|
{
|
||||||
|
auto out_str = out.str();
|
||||||
|
col_str.insertData(out_str.data(), out_str.size());
|
||||||
|
}
|
||||||
|
inline void rollback() {}
|
||||||
|
private:
|
||||||
|
ColumnString & col_str;
|
||||||
|
std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||||
|
};
|
||||||
|
|
||||||
|
/// A more efficient way to serialize json elements into destination column.
|
||||||
|
/// Formatter takes the chars buffer in the ColumnString and put data into it directly.
|
||||||
|
template<typename Element, typename Formatter>
|
||||||
|
class JSONStringSerializer
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit JSONStringSerializer(ColumnString & col_str_)
|
||||||
|
: col_str(col_str_), chars(col_str_.getChars()), offsets(col_str_.getOffsets()), formatter(col_str_.getChars())
|
||||||
|
{
|
||||||
|
prev_offset = offsets.empty() ? 0 : offsets.back();
|
||||||
|
}
|
||||||
|
/// Put the data into column's buffer directly.
|
||||||
|
inline void addRawData(const char * ptr, size_t len)
|
||||||
|
{
|
||||||
|
chars.insert(ptr, ptr + len);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void addRawString(std::string_view str)
|
||||||
|
{
|
||||||
|
chars.insert(str.data(), str.data() + str.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// serialize the json element into column's buffer directly
|
||||||
|
inline void addElement(const Element & element)
|
||||||
|
{
|
||||||
|
formatter.append(element.getElement());
|
||||||
|
}
|
||||||
|
inline void commit()
|
||||||
|
{
|
||||||
|
chars.push_back(0);
|
||||||
|
offsets.push_back(chars.size());
|
||||||
|
}
|
||||||
|
inline void rollback()
|
||||||
|
{
|
||||||
|
chars.resize(prev_offset);
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
ColumnString & col_str;
|
||||||
|
ColumnString::Chars & chars;
|
||||||
|
IColumn::Offsets & offsets;
|
||||||
|
Formatter formatter;
|
||||||
|
size_t prev_offset;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
class EmptyJSONStringSerializer{};
|
||||||
|
|
||||||
|
|
||||||
class FunctionSQLJSONHelpers
|
class FunctionSQLJSONHelpers
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
template <typename Name, template <typename> typename Impl, class JSONParser>
|
template <typename Name, typename Impl, class JSONParser>
|
||||||
class Executor
|
class Executor
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -116,7 +198,7 @@ public:
|
|||||||
bool document_ok = false;
|
bool document_ok = false;
|
||||||
|
|
||||||
/// Parse JSON for every row
|
/// Parse JSON for every row
|
||||||
Impl<JSONParser> impl;
|
Impl impl;
|
||||||
for (const auto i : collections::range(0, input_rows_count))
|
for (const auto i : collections::range(0, input_rows_count))
|
||||||
{
|
{
|
||||||
std::string_view json{
|
std::string_view json{
|
||||||
@ -138,7 +220,7 @@ public:
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Name, template <typename> typename Impl>
|
template <typename Name, template <typename, typename> typename Impl>
|
||||||
class FunctionSQLJSON : public IFunction, WithConstContext
|
class FunctionSQLJSON : public IFunction, WithConstContext
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -155,7 +237,8 @@ public:
|
|||||||
|
|
||||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||||
{
|
{
|
||||||
return Impl<DummyJSONParser>::getReturnType(Name::name, arguments, getContext());
|
return Impl<DummyJSONParser, DefaultJSONStringSerializer<DummyJSONParser::Element>>::getReturnType(
|
||||||
|
Name::name, arguments, getContext());
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||||
@ -168,9 +251,14 @@ public:
|
|||||||
unsigned parse_depth = static_cast<unsigned>(getContext()->getSettingsRef().max_parser_depth);
|
unsigned parse_depth = static_cast<unsigned>(getContext()->getSettingsRef().max_parser_depth);
|
||||||
#if USE_SIMDJSON
|
#if USE_SIMDJSON
|
||||||
if (getContext()->getSettingsRef().allow_simdjson)
|
if (getContext()->getSettingsRef().allow_simdjson)
|
||||||
return FunctionSQLJSONHelpers::Executor<Name, Impl, SimdJSONParser>::run(arguments, result_type, input_rows_count, parse_depth, getContext());
|
return FunctionSQLJSONHelpers::Executor<
|
||||||
|
Name,
|
||||||
|
Impl<SimdJSONParser, JSONStringSerializer<SimdJSONParser::Element, SimdJSONElementFormatter>>,
|
||||||
|
SimdJSONParser>::run(arguments, result_type, input_rows_count, parse_depth, getContext());
|
||||||
#endif
|
#endif
|
||||||
return FunctionSQLJSONHelpers::Executor<Name, Impl, DummyJSONParser>::run(arguments, result_type, input_rows_count, parse_depth, getContext());
|
return FunctionSQLJSONHelpers::
|
||||||
|
Executor<Name, Impl<DummyJSONParser, DefaultJSONStringSerializer<DummyJSONParser::Element>>, DummyJSONParser>::run(
|
||||||
|
arguments, result_type, input_rows_count, parse_depth, getContext());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -189,7 +277,7 @@ struct NameJSONQuery
|
|||||||
static constexpr auto name{"JSON_QUERY"};
|
static constexpr auto name{"JSON_QUERY"};
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename JSONParser>
|
template <typename JSONParser, typename JSONStringSerializer>
|
||||||
class JSONExistsImpl
|
class JSONExistsImpl
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -228,7 +316,7 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename JSONParser>
|
template <typename JSONParser, typename JSONStringSerializer>
|
||||||
class JSONValueImpl
|
class JSONValueImpl
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -279,11 +367,7 @@ public:
|
|||||||
|
|
||||||
if (status == VisitorStatus::Exhausted)
|
if (status == VisitorStatus::Exhausted)
|
||||||
return false;
|
return false;
|
||||||
|
ColumnString * col_str = nullptr;
|
||||||
std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
|
||||||
out << current_element.getElement();
|
|
||||||
auto output_str = out.str();
|
|
||||||
ColumnString * col_str;
|
|
||||||
if (isColumnNullable(dest))
|
if (isColumnNullable(dest))
|
||||||
{
|
{
|
||||||
ColumnNullable & col_null = assert_cast<ColumnNullable &>(dest);
|
ColumnNullable & col_null = assert_cast<ColumnNullable &>(dest);
|
||||||
@ -294,20 +378,15 @@ public:
|
|||||||
{
|
{
|
||||||
col_str = assert_cast<ColumnString *>(&dest);
|
col_str = assert_cast<ColumnString *>(&dest);
|
||||||
}
|
}
|
||||||
ColumnString::Chars & data = col_str->getChars();
|
JSONStringSerializer json_serializer(*col_str);
|
||||||
ColumnString::Offsets & offsets = col_str->getOffsets();
|
|
||||||
|
|
||||||
if (current_element.isString())
|
if (current_element.isString())
|
||||||
{
|
{
|
||||||
ReadBufferFromString buf(output_str);
|
auto str = current_element.getString();
|
||||||
readJSONStringInto(data, buf);
|
json_serializer.addRawString(str);
|
||||||
data.push_back(0);
|
|
||||||
offsets.push_back(data.size());
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
json_serializer.addElement(current_element);
|
||||||
col_str->insertData(output_str.data(), output_str.size());
|
json_serializer.commit();
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -316,7 +395,7 @@ public:
|
|||||||
* Function to test jsonpath member access, will be removed in final PR
|
* Function to test jsonpath member access, will be removed in final PR
|
||||||
* @tparam JSONParser parser
|
* @tparam JSONParser parser
|
||||||
*/
|
*/
|
||||||
template <typename JSONParser>
|
template <typename JSONParser, typename JSONStringSerializer>
|
||||||
class JSONQueryImpl
|
class JSONQueryImpl
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -328,23 +407,27 @@ public:
|
|||||||
|
|
||||||
static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr, const ContextPtr &)
|
static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr, const ContextPtr &)
|
||||||
{
|
{
|
||||||
|
ColumnString & col_str = assert_cast<ColumnString &>(dest);
|
||||||
|
|
||||||
GeneratorJSONPath<JSONParser> generator_json_path(query_ptr);
|
GeneratorJSONPath<JSONParser> generator_json_path(query_ptr);
|
||||||
Element current_element = root;
|
Element current_element = root;
|
||||||
VisitorStatus status;
|
VisitorStatus status;
|
||||||
std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
|
||||||
/// Create json array of results: [res1, res2, ...]
|
|
||||||
out << "[";
|
|
||||||
bool success = false;
|
bool success = false;
|
||||||
|
const char * array_begin = "[";
|
||||||
|
const char * array_end = "]";
|
||||||
|
const char * comma = ", ";
|
||||||
|
JSONStringSerializer json_serializer(col_str);
|
||||||
|
json_serializer.addRawData(array_begin, 1);
|
||||||
while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted)
|
while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted)
|
||||||
{
|
{
|
||||||
if (status == VisitorStatus::Ok)
|
if (status == VisitorStatus::Ok)
|
||||||
{
|
{
|
||||||
if (success)
|
if (success)
|
||||||
{
|
{
|
||||||
out << ", ";
|
json_serializer.addRawData(comma, 2);
|
||||||
}
|
}
|
||||||
success = true;
|
success = true;
|
||||||
out << current_element.getElement();
|
json_serializer.addElement(current_element);
|
||||||
}
|
}
|
||||||
else if (status == VisitorStatus::Error)
|
else if (status == VisitorStatus::Error)
|
||||||
{
|
{
|
||||||
@ -354,14 +437,13 @@ public:
|
|||||||
}
|
}
|
||||||
current_element = root;
|
current_element = root;
|
||||||
}
|
}
|
||||||
out << "]";
|
|
||||||
if (!success)
|
if (!success)
|
||||||
{
|
{
|
||||||
|
json_serializer.rollback();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
ColumnString & col_str = assert_cast<ColumnString &>(dest);
|
json_serializer.addRawData(array_end, 1);
|
||||||
auto output_str = out.str();
|
json_serializer.commit();
|
||||||
col_str.insertData(output_str.data(), output_str.size());
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -33,4 +33,8 @@
|
|||||||
<query>SELECT 'simdjson-12', count() FROM zeros(5000000) WHERE NOT ignore(JSONExtractFloat(materialize({long_json}), 'fparam', 'nested_2', -2))</query>
|
<query>SELECT 'simdjson-12', count() FROM zeros(5000000) WHERE NOT ignore(JSONExtractFloat(materialize({long_json}), 'fparam', 'nested_2', -2))</query>
|
||||||
<query>SELECT 'simdjson-13', count() FROM zeros(5000000) WHERE NOT ignore(JSONExtractBool(materialize({long_json}), 'bparam'))</query>
|
<query>SELECT 'simdjson-13', count() FROM zeros(5000000) WHERE NOT ignore(JSONExtractBool(materialize({long_json}), 'bparam'))</query>
|
||||||
|
|
||||||
|
<query>SELECT 'simdjson-14', count() FROM zeros(5000000) WHERE NOT ignore(JSON_VALUE(materialize({long_json}), '$.nparam'))</query>
|
||||||
|
<query>SELECT 'simdjson-15', count() FROM zeros(5000000) WHERE NOT ignore(JSON_QUERY(materialize({long_json}), '$.nparam'))</query>
|
||||||
|
<query>SELECT 'simdjson-16', count() FROM zeros(5000000) WHERE NOT ignore(JSON_VALUE(materialize({json}), '$.nparam'))</query>
|
||||||
|
<query>SELECT 'simdjson-17', count() FROM zeros(5000000) WHERE NOT ignore(JSON_QUERY(materialize({json}), '$.nparam'))</query>
|
||||||
</test>
|
</test>
|
||||||
|
Loading…
Reference in New Issue
Block a user