diff --git a/dbms/src/Functions/FunctionsJSON.h b/dbms/src/Functions/FunctionsJSON.h index 0d340f6cd73..acf853b6c83 100644 --- a/dbms/src/Functions/FunctionsJSON.h +++ b/dbms/src/Functions/FunctionsJSON.h @@ -275,8 +275,9 @@ struct NameJSONExtractUInt { static constexpr auto name{"JSONExtractUInt"}; }; struct NameJSONExtractFloat { static constexpr auto name{"JSONExtractFloat"}; }; struct NameJSONExtractBool { static constexpr auto name{"JSONExtractBool"}; }; struct NameJSONExtractString { static constexpr auto name{"JSONExtractString"}; }; -struct NameJSONExtractRaw { static constexpr auto name{"JSONExtractRaw"}; }; struct NameJSONExtract { static constexpr auto name{"JSONExtract"}; }; +struct NameJSONExtractKeysAndValues { static constexpr auto name{"JSONExtractKeysAndValues"}; }; +struct NameJSONExtractRaw { static constexpr auto name{"JSONExtractRaw"}; }; template @@ -531,148 +532,12 @@ public: }; +/// Nodes of the extract tree. We need the extract tree to extract from JSON complex values containing array, tuples or nullables. template -class JSONExtractRawImpl +struct JSONExtractTree { -public: - static DataTypePtr getType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - using Iterator = typename JSONParser::Iterator; - static bool addValueToColumn(IColumn & dest, const Iterator & it) - { - ColumnString & col_str = static_cast(dest); - auto & chars = col_str.getChars(); - WriteBufferFromVector buf(chars, WriteBufferFromVector::AppendModeTag()); - traverse(it, buf); - buf.finish(); - chars.push_back(0); - col_str.getOffsets().push_back(chars.size()); - return true; - } - static constexpr size_t num_extra_arguments = 0; - static void prepare(const char *, const Block &, const ColumnNumbers &, size_t) {} - -private: - static void traverse(const Iterator & it, WriteBuffer & buf) - { - if (JSONParser::isInteger(it)) - { - writeIntText(JSONParser::getInteger(it), buf); - return; - } - if (JSONParser::isFloat(it)) - { - writeFloatText(JSONParser::getFloat(it), buf); - return; - } - if (JSONParser::isBool(it)) - { - if (JSONParser::getBool(it)) - writeCString("true", buf); - else - writeCString("false", buf); - return; - } - if (JSONParser::isString(it)) - { - writeJSONString(JSONParser::getString(it), buf, format_settings()); - return; - } - if (JSONParser::isArray(it)) - { - writeChar('[', buf); - Iterator it2 = it; - if (JSONParser::downToArray(it2)) - { - traverse(it2, buf); - while (JSONParser::next(it2)) - { - writeChar(',', buf); - traverse(it2, buf); - } - } - writeChar(']', buf); - return; - } - if (JSONParser::isObject(it)) - { - writeChar('{', buf); - Iterator it2 = it; - StringRef key; - if (JSONParser::downToObject(it2, key)) - { - writeJSONString(key, buf, format_settings()); - writeChar(':', buf); - traverse(it2, buf); - while (JSONParser::nextKeyValue(it2, key)) - { - writeChar(',', buf); - writeJSONString(key, buf, format_settings()); - writeChar(':', buf); - traverse(it2, buf); - } - } - writeChar('}', buf); - return; - } - if (JSONParser::isNull(it)) - { - writeCString("null", buf); - return; - } - } - - static const FormatSettings & format_settings() - { - static const FormatSettings the_instance = [] - { - FormatSettings settings; - settings.json.escape_forward_slashes = false; - return settings; - }(); - return the_instance; - } -}; - - -template -class JSONExtractImpl -{ -public: - static constexpr size_t num_extra_arguments = 1; - - static DataTypePtr getType(const char * function_name, const ColumnsWithTypeAndName & arguments) - { - if (arguments.size() < 2) - throw Exception{"Function " + String(function_name) + " requires at least two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; - - const auto & col = arguments.back(); - auto col_type_const = typeid_cast(col.column.get()); - if (!col_type_const || !isString(col.type)) - throw Exception{"The last argument of function " + String(function_name) - + " should be a constant string specifying the data type, illegal value: " + col.column->getName(), - ErrorCodes::ILLEGAL_COLUMN}; - - return DataTypeFactory::instance().get(col_type_const->getValue()); - } - - void prepare(const char * function_name, const Block & block, const ColumnNumbers &, size_t result_pos) - { - extract_tree = buildExtractTree(function_name, block.getByPosition(result_pos).type); - } - - using Iterator = typename JSONParser::Iterator; - bool addValueToColumn(IColumn & dest, const Iterator & it) - { - return extract_tree->addValueToColumn(dest, it); - } - -private: - /// Node of the extract tree. We need a tree to extract complex values containing array, tuples or nullables. class Node { public: @@ -923,7 +788,7 @@ private: std::unordered_map name_to_index_map; }; - std::unique_ptr buildExtractTree(const char * function_name, const DataTypePtr & type) + static std::unique_ptr build(const char * function_name, const DataTypePtr & type) { switch (type->getTypeId()) { @@ -945,13 +810,11 @@ private: return std::make_unique>(static_cast(*type).getValues()); case TypeIndex::Nullable: { - return std::make_unique( - buildExtractTree(function_name, static_cast(*type).getNestedType())); + return std::make_unique(build(function_name, static_cast(*type).getNestedType())); } case TypeIndex::Array: { - return std::make_unique( - buildExtractTree(function_name, static_cast(*type).getNestedType())); + return std::make_unique(build(function_name, static_cast(*type).getNestedType())); } case TypeIndex::Tuple: { @@ -959,15 +822,225 @@ private: const auto & tuple_elements = tuple.getElements(); std::vector> elements; for (const auto & tuple_element : tuple_elements) - elements.emplace_back(buildExtractTree(function_name, tuple_element)); + elements.emplace_back(build(function_name, tuple_element)); return std::make_unique(std::move(elements), tuple.haveExplicitNames() ? tuple.getElementNames() : Strings{}); } default: throw Exception{"Function " + String(function_name) + " doesn't support the return type schema: " + type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; } } +}; - std::unique_ptr extract_tree; +template +class JSONExtractImpl +{ +public: + static constexpr size_t num_extra_arguments = 1; + + static DataTypePtr getType(const char * function_name, const ColumnsWithTypeAndName & arguments) + { + if (arguments.size() < 2) + throw Exception{"Function " + String(function_name) + " requires at least two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + + const auto & col = arguments.back(); + auto col_type_const = typeid_cast(col.column.get()); + if (!col_type_const || !isString(col.type)) + throw Exception{"The last argument of function " + String(function_name) + + " should be a constant string specifying the return data type, illegal value: " + col.column->getName(), + ErrorCodes::ILLEGAL_COLUMN}; + + return DataTypeFactory::instance().get(col_type_const->getValue()); + } + + void prepare(const char * function_name, const Block & block, const ColumnNumbers &, size_t result_pos) + { + extract_tree = JSONExtractTree::build(function_name, block.getByPosition(result_pos).type); + } + + using Iterator = typename JSONParser::Iterator; + bool addValueToColumn(IColumn & dest, const Iterator & it) + { + return extract_tree->addValueToColumn(dest, it); + } + +protected: + std::unique_ptr::Node> extract_tree; +}; + + +template +class JSONExtractKeysAndValuesImpl +{ +public: + static constexpr size_t num_extra_arguments = 1; + + static DataTypePtr getType(const char * function_name, const ColumnsWithTypeAndName & arguments) + { + if (arguments.size() < 2) + throw Exception{"Function " + String(function_name) + " requires at least two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + + const auto & col = arguments.back(); + auto col_type_const = typeid_cast(col.column.get()); + if (!col_type_const || !isString(col.type)) + throw Exception{"The last argument of function " + String(function_name) + + " should be a constant string specifying the values' data type, illegal value: " + col.column->getName(), + ErrorCodes::ILLEGAL_COLUMN}; + + DataTypePtr value_type = DataTypeFactory::instance().get(col_type_const->getValue()); + DataTypePtr key_type = std::make_unique(); + DataTypePtr tuple_type = std::make_unique(DataTypes{key_type, value_type}); + return std::make_unique(tuple_type); + } + + void prepare(const char * function_name, const Block & block, const ColumnNumbers &, size_t result_pos) + { + const auto & result_type = block.getByPosition(result_pos).type; + const auto tuple_type = typeid_cast(result_type.get())->getNestedType(); + const auto value_type = typeid_cast(tuple_type.get())->getElements()[1]; + extract_tree = JSONExtractTree::build(function_name, value_type); + } + + using Iterator = typename JSONParser::Iterator; + bool addValueToColumn(IColumn & dest, const Iterator & it) + { + if (!JSONParser::isObject(it)) + return false; + + auto & col_arr = static_cast(dest); + auto & col_tuple = static_cast(col_arr.getData()); + size_t old_size = col_tuple.size(); + auto & col_key = static_cast(col_tuple.getColumn(0)); + auto & col_value = col_tuple.getColumn(1); + + StringRef key; + Iterator it2 = it; + if (!JSONParser::downToObject(it2, key)) + return false; + + do + { + if (extract_tree->addValueToColumn(col_value, it2)) + col_key.insertData(key.data, key.size); + } + while (JSONParser::nextKeyValue(it2, key)); + + if (col_tuple.size() == old_size) + return false; + + col_arr.getOffsets().push_back(col_tuple.size()); + return true; + } + +private: + std::unique_ptr::Node> extract_tree; +}; + + +template +class JSONExtractRawImpl +{ +public: + static DataTypePtr getType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + using Iterator = typename JSONParser::Iterator; + static bool addValueToColumn(IColumn & dest, const Iterator & it) + { + ColumnString & col_str = static_cast(dest); + auto & chars = col_str.getChars(); + WriteBufferFromVector buf(chars, WriteBufferFromVector::AppendModeTag()); + traverse(it, buf); + buf.finish(); + chars.push_back(0); + col_str.getOffsets().push_back(chars.size()); + return true; + } + + static constexpr size_t num_extra_arguments = 0; + static void prepare(const char *, const Block &, const ColumnNumbers &, size_t) {} + +private: + static void traverse(const Iterator & it, WriteBuffer & buf) + { + if (JSONParser::isInteger(it)) + { + writeIntText(JSONParser::getInteger(it), buf); + return; + } + if (JSONParser::isFloat(it)) + { + writeFloatText(JSONParser::getFloat(it), buf); + return; + } + if (JSONParser::isBool(it)) + { + if (JSONParser::getBool(it)) + writeCString("true", buf); + else + writeCString("false", buf); + return; + } + if (JSONParser::isString(it)) + { + writeJSONString(JSONParser::getString(it), buf, format_settings()); + return; + } + if (JSONParser::isArray(it)) + { + writeChar('[', buf); + Iterator it2 = it; + if (JSONParser::downToArray(it2)) + { + traverse(it2, buf); + while (JSONParser::next(it2)) + { + writeChar(',', buf); + traverse(it2, buf); + } + } + writeChar(']', buf); + return; + } + if (JSONParser::isObject(it)) + { + writeChar('{', buf); + Iterator it2 = it; + StringRef key; + if (JSONParser::downToObject(it2, key)) + { + writeJSONString(key, buf, format_settings()); + writeChar(':', buf); + traverse(it2, buf); + while (JSONParser::nextKeyValue(it2, key)) + { + writeChar(',', buf); + writeJSONString(key, buf, format_settings()); + writeChar(':', buf); + traverse(it2, buf); + } + } + writeChar('}', buf); + return; + } + if (JSONParser::isNull(it)) + { + writeCString("null", buf); + return; + } + } + + static const FormatSettings & format_settings() + { + static const FormatSettings the_instance = [] + { + FormatSettings settings; + settings.json.escape_forward_slashes = false; + return settings; + }(); + return the_instance; + } }; @@ -983,8 +1056,9 @@ void registerFunctionsJSONTemplate(FunctionFactory & factory) factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); - factory.registerFunction>(); factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); } } diff --git a/dbms/tests/queries/0_stateless/00918_json_functions_avx2.reference b/dbms/tests/queries/0_stateless/00918_json_functions_avx2.reference index 8d98d3202db..86200f12129 100644 --- a/dbms/tests/queries/0_stateless/00918_json_functions_avx2.reference +++ b/dbms/tests/queries/0_stateless/00918_json_functions_avx2.reference @@ -50,3 +50,8 @@ Friday {"passed":true} {} +--JSONExtractKeysAndValues-- +[('a','hello')] +[('b',[-100,200,300])] +[('a','hello'),('b','world')] +[('a',5),('b',7),('c',11)] diff --git a/dbms/tests/queries/0_stateless/00918_json_functions_avx2.sql b/dbms/tests/queries/0_stateless/00918_json_functions_avx2.sql index 5654b3528a5..4afb7d84cff 100644 --- a/dbms/tests/queries/0_stateless/00918_json_functions_avx2.sql +++ b/dbms/tests/queries/0_stateless/00918_json_functions_avx2.sql @@ -56,3 +56,9 @@ SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300], "c":{"d":[121,144 SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300], "c":{"d":[121,144]}}', 'c', 'd', 3); SELECT JSONExtractRaw('{"passed": true}'); SELECT JSONExtractRaw('{}'); + +SELECT '--JSONExtractKeysAndValues--'; +SELECT JSONExtractKeysAndValues('{"a": "hello", "b": [-100, 200.0, 300]}', 'String'); +SELECT JSONExtractKeysAndValues('{"a": "hello", "b": [-100, 200.0, 300]}', 'Array(Float64)'); +SELECT JSONExtractKeysAndValues('{"a": "hello", "b": "world"}', 'String'); +SELECT JSONExtractKeysAndValues('{"x": {"a": 5, "b": 7, "c": 11}}', 'x', 'Int8');