diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 0bea523bf7f..c248c6644cb 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -403,6 +403,8 @@ Both data output and parsing are supported in this format. For parsing, any orde Parsing allows the presence of the additional field `tskv` without the equal sign or a value. This field is ignored. +For input format columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1. + ## CSV {#csv} Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)). @@ -462,15 +464,15 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA "meta": [ { - "name": "'hello'", + "name": "num", + "type": "Int32" + }, + { + "name": "str", "type": "String" }, { - "name": "multiply(42, number)", - "type": "UInt64" - }, - { - "name": "range(5)", + "name": "arr", "type": "Array(UInt8)" } ], @@ -478,25 +480,32 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA "data": [ { - "'hello'": "hello", - "multiply(42, number)": "0", - "range(5)": [0,1,2,3,4] + "num": 42, + "str": "hello", + "arr": [0,1] }, { - "'hello'": "hello", - "multiply(42, number)": "42", - "range(5)": [0,1,2,3,4] + "num": 43, + "str": "hello", + "arr": [0,1,2] }, { - "'hello'": "hello", - "multiply(42, number)": "84", - "range(5)": [0,1,2,3,4] + "num": 44, + "str": "hello", + "arr": [0,1,2,3] } ], "rows": 3, - "rows_before_limit_at_least": 3 + "rows_before_limit_at_least": 3, + + "statistics": + { + "elapsed": 0.001137687, + "rows_read": 3, + "bytes_read": 24 + } } ``` @@ -531,15 +540,15 @@ Example: "meta": [ { - "name": "'hello'", + "name": "num", + "type": "Int32" + }, + { + "name": "str", "type": "String" }, { - "name": "multiply(42, number)", - "type": "UInt64" - }, - { - "name": "range(5)", + "name": "arr", "type": "Array(UInt8)" } ], @@ -547,100 +556,94 @@ Example: "data": [ { - "'hello'": "hello", - "multiply(42, number)": "0", - "range(5)": "[0,1,2,3,4]" + "num": "42", + "str": "hello", + "arr": "[0,1]" }, { - "'hello'": "hello", - "multiply(42, number)": "42", - "range(5)": "[0,1,2,3,4]" + "num": "43", + "str": "hello", + "arr": "[0,1,2]" }, { - "'hello'": "hello", - "multiply(42, number)": "84", - "range(5)": "[0,1,2,3,4]" + "num": "44", + "str": "hello", + "arr": "[0,1,2,3]" } ], "rows": 3, - "rows_before_limit_at_least": 3 + "rows_before_limit_at_least": 3, + + "statistics": + { + "elapsed": 0.001403233, + "rows_read": 3, + "bytes_read": 24 + } } ``` ## JSONColumns {#jsoncolumns} In this format, all data is represented as a single JSON Object. -Note that JSONColumns output format buffers all data in memory to output it as a single block. +Note that JSONColumns output format buffers all data in memory to output it as a single block and it can lead to high memory consumption. Example: ```json { - "name1": [1, 2, 3, 4], - "name2": ["Hello", ",", "world", "!"], - "name3": [[1, 2], [3, 4], [5, 6], [7, 8]] + "num": [42, 43, 44], + "str": ["hello", "hello", "hello"], + "arr": [[0,1], [0,1,2], [0,1,2,3]] } ``` Columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1. -Columns that are not presente in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here) +Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here) ## JSONColumnsWithMetadata {#jsoncolumnsmonoblock} -Differs from JSON output format in that it outputs columns as in JSONColumns format. -This format buffers all data in memory and then outputs them as a single block. +Differs from JSONColumns output format in that it also outputs some metadata and statistics (similar to JSON output format). +This format buffers all data in memory and then outputs them as a single block, so, it can lead to high memory consumption. Example: ```json { - "meta": - [ - { - "name": "sum", - "type": "UInt64" - }, - { - "name": "avg", - "type": "Float64" - } - ], + "meta": + [ + { + "name": "num", + "type": "Int32" + }, + { + "name": "str", + "type": "String" + }, + { + "name": "arr", + "type": "Array(UInt8)" + } + ], - "data": - { - "sum": ["1", "2", "3", "4"], - "avg": [1, 2, 3, 2] - }, + "data": + { + "num": [42, 43, 44], + "str": ["hello", "hello", "hello"], + "arr": [[0,1], [0,1,2], [0,1,2,3]] + }, - "totals": - { - "sum": "10", - "avg": 2 - }, + "rows": 3, - "extremes": - { - "min": - { - "sum": "1", - "avg": 1 - }, - "max": - { - "sum": "4", - "avg": 3 - } - }, + "rows_before_limit_at_least": 3, - "rows": 4, - - "statistics": - { - "elapsed": 0.003701718, - "rows_read": 5, - "bytes_read": 20 - } + "statistics": + { + "elapsed": 0.000272376, + "rows_read": 3, + "bytes_read": 24 + } } ``` @@ -696,87 +699,101 @@ Result: Differs from JSON only in that data rows are output in arrays, not in objects. -Example: +Examples: -``` -// JSONCompact +1) JSONCompact: +```json { "meta": [ { - "name": "'hello'", + "name": "num", + "type": "Int32" + }, + { + "name": "str", "type": "String" }, { - "name": "multiply(42, number)", - "type": "UInt64" - }, - { - "name": "range(5)", + "name": "arr", "type": "Array(UInt8)" } ], "data": [ - ["hello", "0", [0,1,2,3,4]], - ["hello", "42", [0,1,2,3,4]], - ["hello", "84", [0,1,2,3,4]] + [42, "hello", [0,1]], + [43, "hello", [0,1,2]], + [44, "hello", [0,1,2,3]] ], "rows": 3, - "rows_before_limit_at_least": 3 + "rows_before_limit_at_least": 3, + + "statistics": + { + "elapsed": 0.001222069, + "rows_read": 3, + "bytes_read": 24 + } } ``` -``` -// JSONCompactStrings +2) JSONCompactStrings +```json { "meta": [ { - "name": "'hello'", + "name": "num", + "type": "Int32" + }, + { + "name": "str", "type": "String" }, { - "name": "multiply(42, number)", - "type": "UInt64" - }, - { - "name": "range(5)", + "name": "arr", "type": "Array(UInt8)" } ], "data": [ - ["hello", "0", "[0,1,2,3,4]"], - ["hello", "42", "[0,1,2,3,4]"], - ["hello", "84", "[0,1,2,3,4]"] + ["42", "hello", "[0,1]"], + ["43", "hello", "[0,1,2]"], + ["44", "hello", "[0,1,2,3]"] ], "rows": 3, - "rows_before_limit_at_least": 3 + "rows_before_limit_at_least": 3, + + "statistics": + { + "elapsed": 0.001572097, + "rows_read": 3, + "bytes_read": 24 + } } ``` ## JSONCompactColumns {#jsoncompactcolumns} In this format, all data is represented as a single JSON Array. -Note that JSONCompactColumns output format buffers all data in memory to output it as a single block. +Note that JSONCompactColumns output format buffers all data in memory to output it as a single block and it can lead to high memory consumption Example: ```json [ - [1, 2, 3, 4], - ["Hello", ",", "world", "!"], - [[1, 2], [3, 4], [5, 6], [7, 8]] + [42, 43, 44], + ["hello", "hello", "hello"], + [[0,1], [0,1,2], [0,1,2,3]] ] ``` -Columns that are not presente in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here) +Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here) ## JSONEachRow {#jsoneachrow} ## JSONStringsEachRow {#jsonstringseachrow} @@ -793,15 +810,17 @@ When using these formats, ClickHouse outputs rows as separated, newline-delimite When inserting the data, you should provide a separate JSON value for each row. +In JSONEachRow/JSONStringsEachRow input formats columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1. + ## JSONEachRowWithProgress {#jsoneachrowwithprogress} ## JSONStringsEachRowWithProgress {#jsonstringseachrowwithprogress} Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yield progress information as JSON values. ```json -{"row":{"'hello'":"hello","multiply(42, number)":"0","range(5)":[0,1,2,3,4]}} -{"row":{"'hello'":"hello","multiply(42, number)":"42","range(5)":[0,1,2,3,4]}} -{"row":{"'hello'":"hello","multiply(42, number)":"84","range(5)":[0,1,2,3,4]}} +{"row":{"num":42,"str":"hello","arr":[0,1]}} +{"row":{"num":43,"str":"hello","arr":[0,1,2]}} +{"row":{"num":44,"str":"hello","arr":[0,1,2,3]}} {"progress":{"read_rows":"3","read_bytes":"24","written_rows":"0","written_bytes":"0","total_rows_to_read":"3"}} ``` @@ -822,11 +841,11 @@ Differs from `JSONCompactStringsEachRow` in that in that it also prints the head Differs from `JSONCompactStringsEachRow` in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). ```json -["'hello'", "multiply(42, number)", "range(5)"] -["String", "UInt64", "Array(UInt8)"] -["hello", "0", [0,1,2,3,4]] -["hello", "42", [0,1,2,3,4]] -["hello", "84", [0,1,2,3,4]] +["num", "str", "arr"] +["Int32", "String", "Array(UInt8)"] +[42, "hello", [0,1]] +[43, "hello", [0,1,2]] +[44, "hello", [0,1,2,3]] ``` ### Inserting Data {#inserting-data} diff --git a/src/Core/BlockInfo.cpp b/src/Core/BlockInfo.cpp index ae32e8aa579..81064dec733 100644 --- a/src/Core/BlockInfo.cpp +++ b/src/Core/BlockInfo.cpp @@ -68,7 +68,8 @@ void BlockMissingValues::setBit(size_t column_idx, size_t row_idx) void BlockMissingValues::setBits(size_t column_idx, size_t rows) { RowsBitMask & mask = rows_mask_by_column_id[column_idx]; - mask.resize(rows, true); + mask.resize(rows); + std::fill(mask.begin(), mask.end(), true); } const BlockMissingValues::RowsBitMask & BlockMissingValues::getDefaultsBitmask(size_t column_idx) const diff --git a/src/Core/BlockInfo.h b/src/Core/BlockInfo.h index 70cd3bb2221..d431303ca39 100644 --- a/src/Core/BlockInfo.h +++ b/src/Core/BlockInfo.h @@ -56,7 +56,9 @@ public: const RowsBitMask & getDefaultsBitmask(size_t column_idx) const; /// Check that we have to replace default value at least in one of columns bool hasDefaultBits(size_t column_idx) const; + /// Set bit for a specified row in a single column. void setBit(size_t column_idx, size_t row_idx); + /// Set bits for all rows in a single column. void setBits(size_t column_idx, size_t rows); bool empty() const { return rows_mask_by_column_id.empty(); } size_t size() const { return rows_mask_by_column_id.size(); } diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 146043456bd..e4b655cdcf9 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -452,7 +452,7 @@ DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSe return buf.eof() ? type : nullptr; } case FormatSettings::EscapingRule::JSON: - return getDataTypeFromJSONField(field); + return JSONUtils::getDataTypeFromField(field); case FormatSettings::EscapingRule::CSV: { if (!format_settings.csv.input_format_use_best_effort_in_schema_inference) diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index eb9a78ad734..8ead3e99e46 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -26,566 +26,578 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -template -static std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows) +namespace JSONUtils { - skipWhitespaceIfAny(in); - char * pos = in.position(); - size_t balance = 0; - bool quotes = false; - size_t number_of_rows = 0; - - while (loadAtPosition(in, memory, pos) && (balance || memory.size() + static_cast(pos - in.position()) < min_chunk_size || number_of_rows < min_rows)) + template + static std::pair + fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows) { - const auto current_object_size = memory.size() + static_cast(pos - in.position()); - if (min_chunk_size != 0 && current_object_size > 10 * min_chunk_size) - throw ParsingException("Size of JSON object is extremely large. Expected not greater than " + - std::to_string(min_chunk_size) + " bytes, but current is " + std::to_string(current_object_size) + - " bytes per row. Increase the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually, most likely JSON is malformed", ErrorCodes::INCORRECT_DATA); + skipWhitespaceIfAny(in); - if (quotes) + char * pos = in.position(); + size_t balance = 0; + bool quotes = false; + size_t number_of_rows = 0; + + while (loadAtPosition(in, memory, pos) + && (balance || memory.size() + static_cast(pos - in.position()) < min_chunk_size || number_of_rows < min_rows)) { - pos = find_first_symbols<'\\', '"'>(pos, in.buffer().end()); + const auto current_object_size = memory.size() + static_cast(pos - in.position()); + if (min_chunk_size != 0 && current_object_size > 10 * min_chunk_size) + throw ParsingException( + "Size of JSON object is extremely large. Expected not greater than " + std::to_string(min_chunk_size) + + " bytes, but current is " + std::to_string(current_object_size) + + " bytes per row. Increase the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually, most likely JSON is malformed", + ErrorCodes::INCORRECT_DATA); - if (pos > in.buffer().end()) - throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); - else if (pos == in.buffer().end()) - continue; - - if (*pos == '\\') + if (quotes) { - ++pos; - if (loadAtPosition(in, memory, pos)) + pos = find_first_symbols<'\\', '"'>(pos, in.buffer().end()); + + if (pos > in.buffer().end()) + throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + else if (pos == in.buffer().end()) + continue; + + if (*pos == '\\') + { ++pos; + if (loadAtPosition(in, memory, pos)) + ++pos; + } + else if (*pos == '"') + { + ++pos; + quotes = false; + } } - else if (*pos == '"') + else { - ++pos; - quotes = false; + pos = find_first_symbols(pos, in.buffer().end()); + + if (pos > in.buffer().end()) + throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + else if (pos == in.buffer().end()) + continue; + + else if (*pos == opening_bracket) + { + ++balance; + ++pos; + } + else if (*pos == closing_bracket) + { + --balance; + ++pos; + } + else if (*pos == '\\') + { + ++pos; + if (loadAtPosition(in, memory, pos)) + ++pos; + } + else if (*pos == '"') + { + quotes = true; + ++pos; + } + + if (balance == 0) + ++number_of_rows; } } - else - { - pos = find_first_symbols(pos, in.buffer().end()); - if (pos > in.buffer().end()) - throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); - else if (pos == in.buffer().end()) - continue; - - else if (*pos == opening_bracket) - { - ++balance; - ++pos; - } - else if (*pos == closing_bracket) - { - --balance; - ++pos; - } - else if (*pos == '\\') - { - ++pos; - if (loadAtPosition(in, memory, pos)) - ++pos; - } - else if (*pos == '"') - { - quotes = true; - ++pos; - } - - if (balance == 0) - ++number_of_rows; - } + saveUpToPosition(in, memory, pos); + return {loadAtPosition(in, memory, pos), number_of_rows}; } - saveUpToPosition(in, memory, pos); - return {loadAtPosition(in, memory, pos), number_of_rows}; -} - -template -static String readJSONEachRowLineIntoStringImpl(ReadBuffer & in) -{ - Memory memory; - fileSegmentationEngineJSONEachRowImpl(in, memory, 0, 1); - return String(memory.data(), memory.size()); -} - -template -DataTypePtr getDataTypeFromJSONFieldImpl(const Element & field) -{ - if (field.isNull()) - return nullptr; - - if (field.isBool()) - return DataTypeFactory::instance().get("Nullable(Bool)"); - - if (field.isInt64() || field.isUInt64() || field.isDouble()) - return makeNullable(std::make_shared()); - - if (field.isString()) - return makeNullable(std::make_shared()); - - if (field.isArray()) + template + static String readJSONEachRowLineIntoStringImpl(ReadBuffer & in) { - auto array = field.getArray(); + Memory memory; + fileSegmentationEngineJSONEachRowImpl(in, memory, 0, 1); + return String(memory.data(), memory.size()); + } - /// Return nullptr in case of empty array because we cannot determine nested type. - if (array.size() == 0) + template + DataTypePtr getDataTypeFromJSONFieldImpl(const Element & field) + { + if (field.isNull()) return nullptr; - DataTypes nested_data_types; - /// If this array contains fields with different types we will treat it as Tuple. - bool is_tuple = false; - for (const auto element : array) + if (field.isBool()) + return DataTypeFactory::instance().get("Nullable(Bool)"); + + if (field.isInt64() || field.isUInt64() || field.isDouble()) + return makeNullable(std::make_shared()); + + if (field.isString()) + return makeNullable(std::make_shared()); + + if (field.isArray()) { - auto type = getDataTypeFromJSONFieldImpl(element); - if (!type) + auto array = field.getArray(); + + /// Return nullptr in case of empty array because we cannot determine nested type. + if (array.size() == 0) return nullptr; - if (!nested_data_types.empty() && type->getName() != nested_data_types.back()->getName()) - is_tuple = true; + DataTypes nested_data_types; + /// If this array contains fields with different types we will treat it as Tuple. + bool is_tuple = false; + for (const auto element : array) + { + auto type = getDataTypeFromJSONFieldImpl(element); + if (!type) + return nullptr; - nested_data_types.push_back(std::move(type)); + if (!nested_data_types.empty() && type->getName() != nested_data_types.back()->getName()) + is_tuple = true; + + nested_data_types.push_back(std::move(type)); + } + + if (is_tuple) + return std::make_shared(nested_data_types); + + return std::make_shared(nested_data_types.back()); } - if (is_tuple) - return std::make_shared(nested_data_types); + if (field.isObject()) + { + auto object = field.getObject(); + DataTypePtr value_type; + bool is_object = false; + for (const auto key_value_pair : object) + { + auto type = getDataTypeFromJSONFieldImpl(key_value_pair.second); + if (!type) + continue; - return std::make_shared(nested_data_types.back()); + if (isObject(type)) + { + is_object = true; + break; + } + + if (!value_type) + { + value_type = type; + } + else if (!value_type->equals(*type)) + { + is_object = true; + break; + } + } + + if (is_object) + return std::make_shared("json", true); + + if (value_type) + return std::make_shared(std::make_shared(), value_type); + + return nullptr; + } + + throw Exception{ErrorCodes::INCORRECT_DATA, "Unexpected JSON type"}; } - if (field.isObject()) + auto getJSONParserAndElement() { - auto object = field.getObject(); - DataTypePtr value_type; - bool is_object = false; - for (const auto key_value_pair : object) +#if USE_SIMDJSON + return std::pair(); +#elif USE_RAPIDJSON + return std::pair(); +#else + return std::pair(); +#endif + } + + DataTypePtr getDataTypeFromJSONField(const String & field) + { + auto [parser, element] = getJSONParserAndElement(); + bool parsed = parser.parse(field, element); + if (!parsed) + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object here: {}", field); + + return getDataTypeFromJSONFieldImpl(element); + } + + template + static DataTypes determineColumnDataTypesFromJSONEachRowDataImpl(ReadBuffer & in, bool /*json_strings*/, Extractor & extractor) + { + String line = readJSONEachRowLineIntoStringImpl(in); + auto [parser, element] = getJSONParserAndElement(); + bool parsed = parser.parse(line, element); + if (!parsed) + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object here: {}", line); + + auto fields = extractor.extract(element); + + DataTypes data_types; + data_types.reserve(fields.size()); + for (const auto & field : fields) + data_types.push_back(getDataTypeFromJSONFieldImpl(field)); + + /// TODO: For JSONStringsEachRow/JSONCompactStringsEach all types will be strings. + /// Should we try to parse data inside strings somehow in this case? + + return data_types; + } + + std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) + { + return fileSegmentationEngineJSONEachRowImpl<'{', '}'>(in, memory, min_chunk_size, 1); + } + + std::pair + fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows) + { + return fileSegmentationEngineJSONEachRowImpl<'[', ']'>(in, memory, min_chunk_size, min_rows); + } + + struct JSONEachRowFieldsExtractor + { + template + std::vector extract(const Element & element) { - auto type = getDataTypeFromJSONFieldImpl(key_value_pair.second); - if (!type) - continue; + /// {..., "" : , ...} - if (isObject(type)) + if (!element.isObject()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Root JSON value is not an object"); + + auto object = element.getObject(); + std::vector fields; + fields.reserve(object.size()); + column_names.reserve(object.size()); + for (const auto & key_value_pair : object) { - is_object = true; - break; + column_names.emplace_back(key_value_pair.first); + fields.push_back(key_value_pair.second); } - if (!value_type) + return fields; + } + + std::vector column_names; + }; + + NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings) + { + JSONEachRowFieldsExtractor extractor; + auto data_types + = determineColumnDataTypesFromJSONEachRowDataImpl(in, json_strings, extractor); + NamesAndTypesList result; + for (size_t i = 0; i != extractor.column_names.size(); ++i) + result.emplace_back(extractor.column_names[i], data_types[i]); + return result; + } + + struct JSONCompactEachRowFieldsExtractor + { + template + std::vector extract(const Element & element) + { + /// [..., , ...] + if (!element.isArray()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Root JSON value is not an array"); + + auto array = element.getArray(); + std::vector fields; + fields.reserve(array.size()); + for (size_t i = 0; i != array.size(); ++i) + fields.push_back(array[i]); + return fields; + } + }; + + DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings) + { + JSONCompactEachRowFieldsExtractor extractor; + return determineColumnDataTypesFromJSONEachRowDataImpl(in, json_strings, extractor); + } + + + bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf) + { + /// For JSONEachRow we can safely skip whitespace characters + skipWhitespaceIfAny(buf); + return buf.eof() || *buf.position() == '['; + } + + bool readField( + ReadBuffer & in, + IColumn & column, + const DataTypePtr & type, + const SerializationPtr & serialization, + const String & column_name, + const FormatSettings & format_settings, + bool yield_strings) + { + try + { + bool as_nullable = format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable(); + + if (yield_strings) { - value_type = type; + String str; + readJSONString(str, in); + + ReadBufferFromString buf(str); + + if (as_nullable) + return SerializationNullable::deserializeWholeTextImpl(column, buf, format_settings, serialization); + + serialization->deserializeWholeText(column, buf, format_settings); + return true; } - else if (!value_type->equals(*type)) + + if (as_nullable) + return SerializationNullable::deserializeTextJSONImpl(column, in, format_settings, serialization); + + serialization->deserializeTextJSON(column, in, format_settings); + return true; + } + catch (Exception & e) + { + e.addMessage("(while reading the value of key " + column_name + ")"); + throw; + } + } + + DataTypePtr getCommonTypeForJSONFormats(const DataTypePtr & first, const DataTypePtr & second, bool allow_bools_as_numbers) + { + if (allow_bools_as_numbers) + { + auto not_nullable_first = removeNullable(first); + auto not_nullable_second = removeNullable(second); + /// Check if we have Bool and Number and if so make the result type Number + bool bool_type_presents = isBool(not_nullable_first) || isBool(not_nullable_second); + bool number_type_presents = isNumber(not_nullable_first) || isNumber(not_nullable_second); + if (bool_type_presents && number_type_presents) { - is_object = true; - break; + if (isBool(not_nullable_first)) + return second; + return first; } } - if (is_object) - return std::make_shared("json", true); + /// If we have Map and Object, make result type Object + bool object_type_presents = isObject(first) || isObject(second); + bool map_type_presents = isMap(first) || isMap(second); + if (object_type_presents && map_type_presents) + { + if (isObject(first)) + return first; + return second; + } - if (value_type) - return std::make_shared(std::make_shared(), value_type); + /// If we have different Maps, make result type Object + if (isMap(first) && isMap(second) && !first->equals(*second)) + return std::make_shared("json", true); return nullptr; } - throw Exception{ErrorCodes::INCORRECT_DATA, "Unexpected JSON type"}; -} - -auto getJSONParserAndElement() -{ -#if USE_SIMDJSON - return std::pair(); -#elif USE_RAPIDJSON - return std::pair(); -#else - return std::pair(); -#endif -} - -DataTypePtr getDataTypeFromJSONField(const String & field) -{ - auto [parser, element] = getJSONParserAndElement(); - bool parsed = parser.parse(field, element); - if (!parsed) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object here: {}", field); - - return getDataTypeFromJSONFieldImpl(element); -} - -template -static DataTypes determineColumnDataTypesFromJSONEachRowDataImpl(ReadBuffer & in, bool /*json_strings*/, Extractor & extractor) -{ - String line = readJSONEachRowLineIntoStringImpl(in); - auto [parser, element] = getJSONParserAndElement(); - bool parsed = parser.parse(line, element); - if (!parsed) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object here: {}", line); - - auto fields = extractor.extract(element); - - DataTypes data_types; - data_types.reserve(fields.size()); - for (const auto & field : fields) - data_types.push_back(getDataTypeFromJSONFieldImpl(field)); - - /// TODO: For JSONStringsEachRow/JSONCompactStringsEach all types will be strings. - /// Should we try to parse data inside strings somehow in this case? - - return data_types; -} - -std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) -{ - return fileSegmentationEngineJSONEachRowImpl<'{', '}'>(in, memory, min_chunk_size, 1); -} - -std::pair fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows) -{ - return fileSegmentationEngineJSONEachRowImpl<'[', ']'>(in, memory, min_chunk_size, min_rows); -} - -struct JSONEachRowFieldsExtractor -{ - template - std::vector extract(const Element & element) + void writeFieldDelimiter(WriteBuffer & out, size_t new_lines) { - /// {..., "" : , ...} - - if (!element.isObject()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Root JSON value is not an object"); - - auto object = element.getObject(); - std::vector fields; - fields.reserve(object.size()); - column_names.reserve(object.size()); - for (const auto & key_value_pair : object) - { - column_names.emplace_back(key_value_pair.first); - fields.push_back(key_value_pair.second); - } - - return fields; + writeChar(',', out); + writeChar('\n', new_lines, out); } - std::vector column_names; -}; + void writeFieldCompactDelimiter(WriteBuffer & out) { writeCString(", ", out); } -NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings) -{ - JSONEachRowFieldsExtractor extractor; - auto data_types = determineColumnDataTypesFromJSONEachRowDataImpl(in, json_strings, extractor); - NamesAndTypesList result; - for (size_t i = 0; i != extractor.column_names.size(); ++i) - result.emplace_back(extractor.column_names[i], data_types[i]); - return result; -} - -struct JSONCompactEachRowFieldsExtractor -{ - template - std::vector extract(const Element & element) + template + void writeTitle(const char * title, WriteBuffer & out, size_t indent) { - /// [..., , ...] - if (!element.isArray()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Root JSON value is not an array"); - - auto array = element.getArray(); - std::vector fields; - fields.reserve(array.size()); - for (size_t i = 0; i != array.size(); ++i) - fields.push_back(array[i]); - return fields; + writeChar('\t', indent, out); + writeChar('"', out); + writeCString(title, out); + if constexpr (with_space) + writeCString("\": ", out); + else + writeCString("\":\n", out); } -}; -DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings) -{ - JSONCompactEachRowFieldsExtractor extractor; - return determineColumnDataTypesFromJSONEachRowDataImpl(in, json_strings, extractor); -} - - -bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf) -{ - /// For JSONEachRow we can safely skip whitespace characters - skipWhitespaceIfAny(buf); - return buf.eof() || *buf.position() == '['; -} - -bool readFieldImpl(ReadBuffer & in, IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, const String & column_name, const FormatSettings & format_settings, bool yield_strings) -{ - try + void writeObjectStart(WriteBuffer & out, size_t indent, const char * title) { - bool as_nullable = format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable(); + if (title) + writeTitle(title, out, indent); + writeChar('\t', indent, out); + writeCString("{\n", out); + } + + void writeObjectEnd(WriteBuffer & out, size_t indent) + { + writeChar('\n', out); + writeChar('\t', indent, out); + writeChar('}', out); + } + + void writeArrayStart(WriteBuffer & out, size_t indent, const char * title) + { + if (title) + writeTitle(title, out, indent); + writeChar('\t', indent, out); + writeCString("[\n", out); + } + + void writeCompactArrayStart(WriteBuffer & out, size_t indent, const char * title) + { + if (title) + writeTitle(title, out, indent); + else + writeChar('\t', indent, out); + writeCString("[", out); + } + + void writeArrayEnd(WriteBuffer & out, size_t indent) + { + writeChar('\n', out); + writeChar('\t', indent, out); + writeChar(']', out); + } + + void writeCompactArrayEnd(WriteBuffer & out) { writeChar(']', out); } + + void writeFieldFromColumn( + const IColumn & column, + const ISerialization & serialization, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out, + const std::optional & name, + size_t indent) + { + if (name.has_value()) + writeTitle(name->data(), out, indent); if (yield_strings) { - String str; - readJSONString(str, in); + WriteBufferFromOwnString buf; - ReadBufferFromString buf(str); - - if (as_nullable) - return SerializationNullable::deserializeWholeTextImpl(column, buf, format_settings, serialization); - - serialization->deserializeWholeText(column, buf, format_settings); - return true; + serialization.serializeText(column, row_num, buf, settings); + writeJSONString(buf.str(), out, settings); } - - if (as_nullable) - return SerializationNullable::deserializeTextJSONImpl(column, in, format_settings, serialization); - - serialization->deserializeTextJSON(column, in, format_settings); - return true; + else + serialization.serializeTextJSON(column, row_num, out, settings); } - catch (Exception & e) - { - e.addMessage("(while reading the value of key " + column_name + ")"); - throw; - } -} -DataTypePtr getCommonTypeForJSONFormats(const DataTypePtr & first, const DataTypePtr & second, bool allow_bools_as_numbers) -{ - if (allow_bools_as_numbers) + void writeColumns( + const Columns & columns, + const NamesAndTypes & fields, + const Serializations & serializations, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out, + size_t indent) { - auto not_nullable_first = removeNullable(first); - auto not_nullable_second = removeNullable(second); - /// Check if we have Bool and Number and if so make the result type Number - bool bool_type_presents = isBool(not_nullable_first) || isBool(not_nullable_second); - bool number_type_presents = isNumber(not_nullable_first) || isNumber(not_nullable_second); - if (bool_type_presents && number_type_presents) + for (size_t i = 0; i < columns.size(); ++i) { - if (isBool(not_nullable_first)) - return second; - return first; + if (i != 0) + writeFieldDelimiter(out); + writeFieldFromColumn(*columns[i], *serializations[i], row_num, yield_strings, settings, out, fields[i].name, indent); } } - /// If we have Map and Object, make result type Object - bool object_type_presents = isObject(first) || isObject(second); - bool map_type_presents = isMap(first) || isMap(second); - if (object_type_presents && map_type_presents) + void writeCompactColumns( + const Columns & columns, + const Serializations & serializations, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out) { - if (isObject(first)) - return first; - return second; - } - - /// If we have different Maps, make result type Object - if (isMap(first) && isMap(second) && !first->equals(*second)) - return std::make_shared("json", true); - - return nullptr; -} - -void writeJSONFieldDelimiter(WriteBuffer & out, size_t new_lines) -{ - writeChar(',', out); - writeChar('\n', new_lines, out); -} - -void writeJSONFieldCompactDelimiter(WriteBuffer & out) -{ - writeCString(", ", out); -} - -template -void writeJSONTitle(const char * title, WriteBuffer & out, size_t indent) -{ - writeChar('\t', indent, out); - writeChar('"', out); - writeCString(title, out); - if constexpr (with_space) - writeCString("\": ", out); - else - writeCString("\":\n", out); -} - -void writeJSONObjectStart(WriteBuffer & out, size_t indent, const char * title) -{ - if (title) - writeJSONTitle(title, out, indent); - writeChar('\t', indent, out); - writeCString("{\n", out); -} - -void writeJSONObjectEnd(WriteBuffer & out, size_t indent) -{ - writeChar('\n', out); - writeChar('\t', indent, out); - writeChar('}', out); -} - -void writeJSONArrayStart(WriteBuffer & out, size_t indent, const char * title) -{ - if (title) - writeJSONTitle(title, out, indent); - writeChar('\t', indent, out); - writeCString("[\n", out); -} - -void writeJSONCompactArrayStart(WriteBuffer & out, size_t indent, const char * title) -{ - if (title) - writeJSONTitle(title, out, indent); - else - writeChar('\t', indent, out); - writeCString("[", out); -} - -void writeJSONArrayEnd(WriteBuffer & out, size_t indent) -{ - writeChar('\n', out); - writeChar('\t', indent, out); - writeChar(']', out); -} - -void writeJSONCompactArrayEnd(WriteBuffer & out) -{ - writeChar(']', out); -} - -void writeJSONFieldFromColumn( - const IColumn & column, - const ISerialization & serialization, - size_t row_num, - bool yield_strings, - const FormatSettings & settings, - WriteBuffer & out, - const std::optional & name, - size_t indent) -{ - if (name.has_value()) - writeJSONTitle(name->data(), out, indent); - - if (yield_strings) - { - WriteBufferFromOwnString buf; - - serialization.serializeText(column, row_num, buf, settings); - writeJSONString(buf.str(), out, settings); - } - else - serialization.serializeTextJSON(column, row_num, out, settings); -} - -void writeJSONColumns( - const Columns & columns, - const NamesAndTypes & fields, - const Serializations & serializations, - size_t row_num, - bool yield_strings, - const FormatSettings & settings, - WriteBuffer & out, - size_t indent) -{ - for (size_t i = 0; i < columns.size(); ++i) - { - if (i != 0) - writeJSONFieldDelimiter(out); - writeJSONFieldFromColumn(*columns[i], *serializations[i], row_num, yield_strings, settings, out, fields[i].name, indent); - } -} - -void writeJSONCompactColumns( - const Columns & columns, - const Serializations & serializations, - size_t row_num, - bool yield_strings, - const FormatSettings & settings, - WriteBuffer & out) -{ - for (size_t i = 0; i < columns.size(); ++i) - { - if (i != 0) - writeJSONFieldCompactDelimiter(out); - writeJSONFieldFromColumn(*columns[i], *serializations[i], row_num, yield_strings, settings, out); - } -} - -void writeJSONMetadata(const NamesAndTypes & fields, const FormatSettings & settings, WriteBuffer & out) -{ - writeJSONArrayStart(out, 1, "meta"); - - for (size_t i = 0; i < fields.size(); ++i) - { - writeJSONObjectStart(out, 2); - - writeJSONTitle("name", out, 3); - writeDoubleQuoted(fields[i].name, out); - writeJSONFieldDelimiter(out); - writeJSONTitle("type", out, 3); - writeJSONString(fields[i].type->getName(), out, settings); - writeJSONObjectEnd(out, 2); - - if (i + 1 < fields.size()) - writeJSONFieldDelimiter(out); - } - - writeJSONArrayEnd(out, 1); -} - -void writeJSONAdditionalInfo( - size_t rows, - size_t rows_before_limit, - bool applied_limit, - const Stopwatch & watch, - const Progress & progress, - bool write_statistics, - WriteBuffer & out) -{ - writeJSONFieldDelimiter(out, 2); - writeJSONTitle("rows", out, 1); - writeIntText(rows, out); - - if (applied_limit) - { - writeJSONFieldDelimiter(out, 2); - writeJSONTitle("rows_before_limit_at_least", out, 1); - writeIntText(rows_before_limit, out); - } - - if (write_statistics) - { - writeJSONFieldDelimiter(out, 2); - writeJSONObjectStart(out, 1, "statistics"); - - writeJSONTitle("elapsed", out, 2); - writeText(watch.elapsedSeconds(), out); - writeJSONFieldDelimiter(out); - - writeJSONTitle("rows_read", out, 2); - writeText(progress.read_rows.load(), out); - writeJSONFieldDelimiter(out); - - writeJSONTitle("bytes_read", out, 2); - writeText(progress.read_bytes.load(), out); - - writeJSONObjectEnd(out, 1); - } -} - -void makeNamesAndTypesWithValidUTF8(NamesAndTypes & fields, const FormatSettings & settings, bool & need_validate_utf8) -{ - for (auto & field : fields) - { - if (!field.type->textCanContainOnlyValidUTF8()) - need_validate_utf8 = true; - - WriteBufferFromOwnString buf; + for (size_t i = 0; i < columns.size(); ++i) { - WriteBufferValidUTF8 validating_buf(buf); - writeJSONString(field.name, validating_buf, settings); + if (i != 0) + writeFieldCompactDelimiter(out); + writeFieldFromColumn(*columns[i], *serializations[i], row_num, yield_strings, settings, out); } - field.name = buf.str().substr(1, buf.str().size() - 2); } + + void writeMetadata(const NamesAndTypes & fields, const FormatSettings & settings, WriteBuffer & out) + { + writeArrayStart(out, 1, "meta"); + + for (size_t i = 0; i < fields.size(); ++i) + { + writeObjectStart(out, 2); + + writeTitle("name", out, 3); + writeDoubleQuoted(fields[i].name, out); + writeFieldDelimiter(out); + writeTitle("type", out, 3); + writeJSONString(fields[i].type->getName(), out, settings); + writeObjectEnd(out, 2); + + if (i + 1 < fields.size()) + writeFieldDelimiter(out); + } + + writeArrayEnd(out, 1); + } + + void writeAdditionalInfo( + size_t rows, + size_t rows_before_limit, + bool applied_limit, + const Stopwatch & watch, + const Progress & progress, + bool write_statistics, + WriteBuffer & out) + { + writeFieldDelimiter(out, 2); + writeTitle("rows", out, 1); + writeIntText(rows, out); + + if (applied_limit) + { + writeFieldDelimiter(out, 2); + writeTitle("rows_before_limit_at_least", out, 1); + writeIntText(rows_before_limit, out); + } + + if (write_statistics) + { + writeFieldDelimiter(out, 2); + writeObjectStart(out, 1, "statistics"); + + writeTitle("elapsed", out, 2); + writeText(watch.elapsedSeconds(), out); + writeFieldDelimiter(out); + + writeTitle("rows_read", out, 2); + writeText(progress.read_rows.load(), out); + writeFieldDelimiter(out); + + writeTitle("bytes_read", out, 2); + writeText(progress.read_bytes.load(), out); + + writeObjectEnd(out, 1); + } + } + + void makeNamesAndTypesWithValidUTF8(NamesAndTypes & fields, const FormatSettings & settings, bool & need_validate_utf8) + { + for (auto & field : fields) + { + if (!field.type->textCanContainOnlyValidUTF8()) + need_validate_utf8 = true; + + WriteBufferFromOwnString buf; + { + WriteBufferValidUTF8 validating_buf(buf); + writeJSONString(field.name, validating_buf, settings); + } + field.name = buf.str().substr(1, buf.str().size() - 2); + } + } + } } diff --git a/src/Formats/JSONUtils.h b/src/Formats/JSONUtils.h index b4b34498311..f2aba3cbcb5 100644 --- a/src/Formats/JSONUtils.h +++ b/src/Formats/JSONUtils.h @@ -13,85 +13,97 @@ namespace DB { -std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size); -std::pair fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows); +namespace JSONUtils +{ + std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size); + std::pair + fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows); + /// Parse JSON from string and convert it's type to ClickHouse type. Make the result type always Nullable. + /// JSON array with different nested types is treated as Tuple. + /// If cannot convert (for example when field contains null), return nullptr. + DataTypePtr getDataTypeFromField(const String & field); -/// Parse JSON from string and convert it's type to ClickHouse type. Make the result type always Nullable. -/// JSON array with different nested types is treated as Tuple. -/// If cannot convert (for example when field contains null), return nullptr. -DataTypePtr getDataTypeFromJSONField(const String & field); + /// Read row in JSONEachRow format and try to determine type for each field. + /// Return list of names and types. + /// If cannot determine the type of some field, return nullptr for it. + NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings); -/// Read row in JSONEachRow format and try to determine type for each field. -/// Return list of names and types. -/// If cannot determine the type of some field, return nullptr for it. -NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings); + /// Read row in JSONCompactEachRow format and try to determine type for each field. + /// If cannot determine the type of some field, return nullptr for it. + DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings); -/// Read row in JSONCompactEachRow format and try to determine type for each field. -/// If cannot determine the type of some field, return nullptr for it. -DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings); + bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf); -bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf); + bool readField( + ReadBuffer & in, + IColumn & column, + const DataTypePtr & type, + const SerializationPtr & serialization, + const String & column_name, + const FormatSettings & format_settings, + bool yield_strings); -bool readFieldImpl(ReadBuffer & in, IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, const String & column_name, const FormatSettings & format_settings, bool yield_strings); + DataTypePtr getCommonTypeForJSONFormats(const DataTypePtr & first, const DataTypePtr & second, bool allow_bools_as_numbers); -DataTypePtr getCommonTypeForJSONFormats(const DataTypePtr & first, const DataTypePtr & second, bool allow_bools_as_numbers); + void makeNamesAndTypesWithValidUTF8(NamesAndTypes & fields, const FormatSettings & settings, bool & need_validate_utf8); -void makeNamesAndTypesWithValidUTF8(NamesAndTypes & fields, const FormatSettings & settings, bool & need_validate_utf8); + /// Functions helpers for writing JSON data to WriteBuffer. + void writeFieldDelimiter(WriteBuffer & out, size_t new_lines = 1); -/// Functions helpers for writing JSON data to WriteBuffer. + void writeFieldCompactDelimiter(WriteBuffer & out); -void writeJSONFieldDelimiter(WriteBuffer & out, size_t new_lines = 1); + void writeObjectStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr); -void writeJSONFieldCompactDelimiter(WriteBuffer & out); + void writeObjectEnd(WriteBuffer & out, size_t indent = 0); -void writeJSONObjectStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr); + void writeArrayStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr); -void writeJSONObjectEnd(WriteBuffer & out, size_t indent = 0); + void writeCompactArrayStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr); -void writeJSONArrayStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr); + void writeArrayEnd(WriteBuffer & out, size_t indent = 0); -void writeJSONCompactArrayStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr); + void writeCompactArrayEnd(WriteBuffer & out); -void writeJSONArrayEnd(WriteBuffer & out, size_t indent = 0); + void writeFieldFromColumn( + const IColumn & column, + const ISerialization & serialization, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out, + const std::optional & name = std::nullopt, + size_t indent = 0); -void writeJSONCompactArrayEnd(WriteBuffer & out); + void writeColumns( + const Columns & columns, + const NamesAndTypes & fields, + const Serializations & serializations, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out, + size_t indent = 0); -void writeJSONFieldFromColumn( - const IColumn & column, - const ISerialization & serialization, - size_t row_num, - bool yield_strings, - const FormatSettings & settings, - WriteBuffer & out, - const std::optional & name = std::nullopt, - size_t indent = 0); + void writeCompactColumns( + const Columns & columns, + const Serializations & serializations, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out); -void writeJSONColumns(const Columns & columns, - const NamesAndTypes & fields, - const Serializations & serializations, - size_t row_num, - bool yield_strings, - const FormatSettings & settings, - WriteBuffer & out, - size_t indent = 0); + void writeMetadata(const NamesAndTypes & fields, const FormatSettings & settings, WriteBuffer & out); + + void writeAdditionalInfo( + size_t rows, + size_t rows_before_limit, + bool applied_limit, + const Stopwatch & watch, + const Progress & progress, + bool write_statistics, + WriteBuffer & out); +} -void writeJSONCompactColumns(const Columns & columns, - const Serializations & serializations, - size_t row_num, - bool yield_strings, - const FormatSettings & settings, - WriteBuffer & out); - -void writeJSONMetadata(const NamesAndTypes & fields, const FormatSettings & settings, WriteBuffer & out); - -void writeJSONAdditionalInfo( - size_t rows, - size_t rows_before_limit, - bool applied_limit, - const Stopwatch & watch, - const Progress & progress, - bool write_statistics, - WriteBuffer & out); } diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp index 7630d7bb699..d369eedceea 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp @@ -221,12 +221,12 @@ void registerInputFormatJSONAsString(FormatFactory & factory) void registerFileSegmentationEngineJSONAsString(FormatFactory & factory) { - factory.registerFileSegmentationEngine("JSONAsString", &fileSegmentationEngineJSONEachRow); + factory.registerFileSegmentationEngine("JSONAsString", &JSONUtils::fileSegmentationEngineJSONEachRow); } void registerNonTrivialPrefixAndSuffixCheckerJSONAsString(FormatFactory & factory) { - factory.registerNonTrivialPrefixAndSuffixChecker("JSONAsString", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); + factory.registerNonTrivialPrefixAndSuffixChecker("JSONAsString", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); } void registerJSONAsStringSchemaReader(FormatFactory & factory) @@ -251,12 +251,12 @@ void registerInputFormatJSONAsObject(FormatFactory & factory) void registerNonTrivialPrefixAndSuffixCheckerJSONAsObject(FormatFactory & factory) { - factory.registerNonTrivialPrefixAndSuffixChecker("JSONAsObject", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); + factory.registerNonTrivialPrefixAndSuffixChecker("JSONAsObject", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); } void registerFileSegmentationEngineJSONAsObject(FormatFactory & factory) { - factory.registerFileSegmentationEngine("JSONAsObject", &fileSegmentationEngineJSONEachRow); + factory.registerFileSegmentationEngine("JSONAsObject", &JSONUtils::fileSegmentationEngineJSONEachRow); } void registerJSONAsObjectSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp index b2c4a8b5283..0e4e74e14c4 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp @@ -96,7 +96,7 @@ size_t JSONColumnsBaseBlockInputFormat::readColumn( do { - readFieldImpl(*in, column, type, serialization, column_name, format_settings, false); + JSONUtils::readField(*in, column, type, serialization, column_name, format_settings, false); } while (!reader->checkColumnEndOrSkipFieldDelimiter()); @@ -185,7 +185,7 @@ void JSONColumnsBaseSchemaReader::chooseResulType(DataTypePtr & type, const Data { auto common_type_checker = [&](const DataTypePtr & first, const DataTypePtr & second) { - return getCommonTypeForJSONFormats(first, second, format_settings.json.read_bools_as_numbers); + return JSONUtils::getCommonTypeForJSONFormats(first, second, format_settings.json.read_bools_as_numbers); }; chooseResultColumnType(type, new_type, common_type_checker, nullptr, column_name, row); } @@ -260,7 +260,7 @@ DataTypePtr JSONColumnsBaseSchemaReader::readColumnAndGetDataType(const String & } readJSONField(field, in); - DataTypePtr field_type = getDataTypeFromJSONField(field); + DataTypePtr field_type = JSONUtils::getDataTypeFromField(field); chooseResulType(column_type, field_type, column_name, rows_read); ++rows_read; } diff --git a/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.cpp index bd920bd6367..832f65e4463 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.cpp @@ -48,9 +48,9 @@ void JSONColumnsBaseBlockOutputFormat::writeChunk(Chunk & chunk) void JSONColumnsBaseBlockOutputFormat::writeColumnEnd(bool is_last) { - writeJSONCompactArrayEnd(*ostr); + JSONUtils::writeCompactArrayEnd(*ostr); if (!is_last) - writeJSONFieldDelimiter(*ostr); + JSONUtils::writeFieldDelimiter(*ostr); } void JSONColumnsBaseBlockOutputFormat::writeColumn(const IColumn & column, const ISerialization & serialization) @@ -58,7 +58,7 @@ void JSONColumnsBaseBlockOutputFormat::writeColumn(const IColumn & column, const for (size_t i = 0; i != column.size(); ++i) { if (i != 0) - writeJSONFieldCompactDelimiter(*ostr); + JSONUtils::writeFieldCompactDelimiter(*ostr); serialization.serializeTextJSON(column, i, *ostr, format_settings); } } diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp index c018751f1fb..bf8c50b923d 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp @@ -20,17 +20,17 @@ JSONColumnsBlockOutputFormat::JSONColumnsBlockOutputFormat(WriteBuffer & out_, c void JSONColumnsBlockOutputFormat::writeChunkStart() { - writeJSONObjectStart(*ostr, indent); + JSONUtils::writeObjectStart(*ostr, indent); } void JSONColumnsBlockOutputFormat::writeColumnStart(size_t column_index) { - writeJSONCompactArrayStart(*ostr, indent + 1, fields[column_index].name.data()); + JSONUtils::writeCompactArrayStart(*ostr, indent + 1, fields[column_index].name.data()); } void JSONColumnsBlockOutputFormat::writeChunkEnd() { - writeJSONObjectEnd(*ostr, indent); + JSONUtils::writeObjectEnd(*ostr, indent); writeChar('\n', *ostr); } diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp index 2b41f1d4a4d..1887a10e9f7 100644 --- a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp @@ -16,7 +16,7 @@ JSONColumnsWithMetadataBlockOutputFormat::JSONColumnsWithMetadataBlockOutputForm : JSONColumnsBlockOutputFormat(out_, header_, format_settings_, 1) { bool need_validate_utf8 = false; - makeNamesAndTypesWithValidUTF8(fields, format_settings, need_validate_utf8); + JSONUtils::makeNamesAndTypesWithValidUTF8(fields, format_settings, need_validate_utf8); if (need_validate_utf8) { @@ -27,8 +27,8 @@ JSONColumnsWithMetadataBlockOutputFormat::JSONColumnsWithMetadataBlockOutputForm void JSONColumnsWithMetadataBlockOutputFormat::writePrefix() { - writeJSONObjectStart(*ostr); - writeJSONMetadata(fields, format_settings, *ostr); + JSONUtils::writeObjectStart(*ostr); + JSONUtils::writeMetadata(fields, format_settings, *ostr); } void JSONColumnsWithMetadataBlockOutputFormat::writeSuffix() @@ -39,13 +39,13 @@ void JSONColumnsWithMetadataBlockOutputFormat::writeSuffix() void JSONColumnsWithMetadataBlockOutputFormat::writeChunkStart() { - writeJSONFieldDelimiter(*ostr, 2); - writeJSONObjectStart(*ostr, 1, "data"); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 1, "data"); } void JSONColumnsWithMetadataBlockOutputFormat::writeChunkEnd() { - writeJSONObjectEnd(*ostr, indent); + JSONUtils::writeObjectEnd(*ostr, indent); } void JSONColumnsWithMetadataBlockOutputFormat::consumeExtremes(Chunk chunk) @@ -55,19 +55,19 @@ void JSONColumnsWithMetadataBlockOutputFormat::consumeExtremes(Chunk chunk) throw Exception("Got " + toString(num_rows) + " in extremes chunk, expected 2", ErrorCodes::LOGICAL_ERROR); const auto & columns = chunk.getColumns(); - writeJSONFieldDelimiter(*ostr, 2); - writeJSONObjectStart(*ostr, 1, "extremes"); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 1, "extremes"); writeExtremesElement("min", columns, 0); - writeJSONFieldDelimiter(*ostr); + JSONUtils::writeFieldDelimiter(*ostr); writeExtremesElement("max", columns, 1); - writeJSONObjectEnd(*ostr, 1); + JSONUtils::writeObjectEnd(*ostr, 1); } void JSONColumnsWithMetadataBlockOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num) { - writeJSONObjectStart(*ostr, 2, title); - writeJSONColumns(columns, fields, serializations, row_num, false, format_settings, *ostr, 3); - writeJSONObjectEnd(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 2, title); + JSONUtils::writeColumns(columns, fields, serializations, row_num, false, format_settings, *ostr, 3); + JSONUtils::writeObjectEnd(*ostr, 2); } void JSONColumnsWithMetadataBlockOutputFormat::consumeTotals(Chunk chunk) @@ -77,10 +77,10 @@ void JSONColumnsWithMetadataBlockOutputFormat::consumeTotals(Chunk chunk) throw Exception("Got " + toString(num_rows) + " in totals chunk, expected 1", ErrorCodes::LOGICAL_ERROR); const auto & columns = chunk.getColumns(); - writeJSONFieldDelimiter(*ostr, 2); - writeJSONObjectStart(*ostr, 1, "totals"); - writeJSONColumns(columns, fields, serializations, 0, false, format_settings, *ostr, 2); - writeJSONObjectEnd(*ostr, 1); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 1, "totals"); + JSONUtils::writeColumns(columns, fields, serializations, 0, false, format_settings, *ostr, 2); + JSONUtils::writeObjectEnd(*ostr, 1); } void JSONColumnsWithMetadataBlockOutputFormat::finalizeImpl() @@ -89,7 +89,7 @@ void JSONColumnsWithMetadataBlockOutputFormat::finalizeImpl() if (outside_statistics) statistics = std::move(*outside_statistics); - writeJSONAdditionalInfo( + JSONUtils::writeAdditionalInfo( rows, statistics.rows_before_limit, statistics.applied_limit, @@ -98,7 +98,7 @@ void JSONColumnsWithMetadataBlockOutputFormat::finalizeImpl() format_settings.write_statistics, *ostr); - writeJSONObjectEnd(*ostr); + JSONUtils::writeObjectEnd(*ostr); writeChar('\n', *ostr); ostr->next(); } diff --git a/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp index f748f619cb5..6f240d5e922 100644 --- a/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp @@ -13,17 +13,17 @@ JSONCompactColumnsBlockOutputFormat::JSONCompactColumnsBlockOutputFormat(WriteBu void JSONCompactColumnsBlockOutputFormat::writeChunkStart() { - writeJSONArrayStart(*ostr); + JSONUtils::writeArrayStart(*ostr); } void JSONCompactColumnsBlockOutputFormat::writeColumnStart(size_t) { - writeJSONCompactArrayStart(*ostr, 1); + JSONUtils::writeCompactArrayStart(*ostr, 1); } void JSONCompactColumnsBlockOutputFormat::writeChunkEnd() { - writeJSONArrayEnd(*ostr); + JSONUtils::writeArrayEnd(*ostr); writeChar('\n', *ostr); } diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp index 140f3fb41b3..ef59fc8f05a 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -109,7 +109,7 @@ std::vector JSONCompactEachRowFormatReader::readHeaderRow() bool JSONCompactEachRowFormatReader::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & column_name) { skipWhitespaceIfAny(*in); - return readFieldImpl(*in, column, type, serialization, column_name, format_settings, yield_strings); + return JSONUtils::readField(*in, column, type, serialization, column_name, format_settings, yield_strings); } bool JSONCompactEachRowFormatReader::parseRowStartWithDiagnosticInfo(WriteBuffer & out) @@ -189,7 +189,7 @@ JSONCompactEachRowRowSchemaReader::JSONCompactEachRowRowSchemaReader( bool allow_bools_as_numbers = format_settings_.json.read_bools_as_numbers; setCommonTypeChecker([allow_bools_as_numbers](const DataTypePtr & first, const DataTypePtr & second) { - return getCommonTypeForJSONFormats(first, second, allow_bools_as_numbers); + return JSONUtils::getCommonTypeForJSONFormats(first, second, allow_bools_as_numbers); }); } @@ -209,7 +209,7 @@ DataTypes JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypes() if (in.eof()) return {}; - return readRowAndGetDataTypesForJSONCompactEachRow(in, reader.yieldStrings()); + return JSONUtils::readRowAndGetDataTypesForJSONCompactEachRow(in, reader.yieldStrings()); } void registerInputFormatJSONCompactEachRow(FormatFactory & factory) @@ -258,7 +258,7 @@ void registerFileSegmentationEngineJSONCompactEachRow(FormatFactory & factory) size_t min_rows = 1 + int(with_names) + int(with_types); factory.registerFileSegmentationEngine(format_name, [min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) { - return fileSegmentationEngineJSONCompactEachRow(in, memory, min_chunk_size, min_rows); + return JSONUtils::fileSegmentationEngineJSONCompactEachRow(in, memory, min_chunk_size, min_rows); }); }; diff --git a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp index fbb4a8d9116..47b79b71ae2 100644 --- a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp @@ -21,50 +21,50 @@ JSONCompactRowOutputFormat::JSONCompactRowOutputFormat( void JSONCompactRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) { - writeJSONFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr); + JSONUtils::writeFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr); ++field_number; } void JSONCompactRowOutputFormat::writeFieldDelimiter() { - writeJSONFieldCompactDelimiter(*ostr); + JSONUtils::writeFieldCompactDelimiter(*ostr); } void JSONCompactRowOutputFormat::writeRowStartDelimiter() { - writeJSONCompactArrayStart(*ostr, 2); + JSONUtils::writeCompactArrayStart(*ostr, 2); } void JSONCompactRowOutputFormat::writeRowEndDelimiter() { - writeJSONCompactArrayEnd(*ostr); + JSONUtils::writeCompactArrayEnd(*ostr); field_number = 0; ++row_count; } void JSONCompactRowOutputFormat::writeBeforeTotals() { - writeJSONFieldDelimiter(*ostr, 2); - writeJSONCompactArrayStart(*ostr, 1, "totals"); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeCompactArrayStart(*ostr, 1, "totals"); } void JSONCompactRowOutputFormat::writeTotals(const Columns & columns, size_t row_num) { - writeJSONCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr); + JSONUtils::writeCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr); } void JSONCompactRowOutputFormat::writeAfterTotals() { - writeJSONCompactArrayEnd(*ostr); + JSONUtils::writeCompactArrayEnd(*ostr); } void JSONCompactRowOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num) { - writeJSONCompactArrayStart(*ostr, 2, title); - writeJSONCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr); - writeJSONCompactArrayEnd(*ostr); + JSONUtils::writeCompactArrayStart(*ostr, 2, title); + JSONUtils::writeCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr); + JSONUtils::writeCompactArrayEnd(*ostr); } void registerOutputFormatJSONCompact(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index e9d6b516feb..76af5bf02c5 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -140,7 +140,7 @@ void JSONEachRowRowInputFormat::readField(size_t index, MutableColumns & columns seen_columns[index] = true; const auto & type = getPort().getHeader().getByPosition(index).type; const auto & serialization = serializations[index]; - read_columns[index] = readFieldImpl(*in, *columns[index], type, serialization, columnName(index), format_settings, yield_strings); + read_columns[index] = JSONUtils::readField(*in, *columns[index], type, serialization, columnName(index), format_settings, yield_strings); } inline bool JSONEachRowRowInputFormat::advanceToNextKey(size_t key_index) @@ -313,7 +313,7 @@ JSONEachRowSchemaReader::JSONEachRowSchemaReader(ReadBuffer & in_, bool json_str bool allow_bools_as_numbers = format_settings.json.read_bools_as_numbers; setCommonTypeChecker([allow_bools_as_numbers](const DataTypePtr & first, const DataTypePtr & second) { - return getCommonTypeForJSONFormats(first, second, allow_bools_as_numbers); + return JSONUtils::getCommonTypeForJSONFormats(first, second, allow_bools_as_numbers); }); } @@ -350,7 +350,7 @@ NamesAndTypesList JSONEachRowSchemaReader::readRowAndGetNamesAndDataTypes(bool & return {}; } - return readRowAndGetNamesAndDataTypesForJSONEachRow(in, json_strings); + return JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, json_strings); } void registerInputFormatJSONEachRow(FormatFactory & factory) @@ -397,18 +397,18 @@ void registerInputFormatJSONEachRow(FormatFactory & factory) void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory) { - factory.registerFileSegmentationEngine("JSONEachRow", &fileSegmentationEngineJSONEachRow); - factory.registerFileSegmentationEngine("JSONStringsEachRow", &fileSegmentationEngineJSONEachRow); - factory.registerFileSegmentationEngine("JSONLines", &fileSegmentationEngineJSONEachRow); - factory.registerFileSegmentationEngine("NDJSON", &fileSegmentationEngineJSONEachRow); + factory.registerFileSegmentationEngine("JSONEachRow", &JSONUtils::fileSegmentationEngineJSONEachRow); + factory.registerFileSegmentationEngine("JSONStringsEachRow", &JSONUtils::fileSegmentationEngineJSONEachRow); + factory.registerFileSegmentationEngine("JSONLines", &JSONUtils::fileSegmentationEngineJSONEachRow); + factory.registerFileSegmentationEngine("NDJSON", &JSONUtils::fileSegmentationEngineJSONEachRow); } void registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(FormatFactory & factory) { - factory.registerNonTrivialPrefixAndSuffixChecker("JSONEachRow", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); - factory.registerNonTrivialPrefixAndSuffixChecker("JSONStringsEachRow", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); - factory.registerNonTrivialPrefixAndSuffixChecker("JSONLines", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); - factory.registerNonTrivialPrefixAndSuffixChecker("NDJSON", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); + factory.registerNonTrivialPrefixAndSuffixChecker("JSONEachRow", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); + factory.registerNonTrivialPrefixAndSuffixChecker("JSONStringsEachRow", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); + factory.registerNonTrivialPrefixAndSuffixChecker("JSONLines", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); + factory.registerNonTrivialPrefixAndSuffixChecker("NDJSON", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); } void registerJSONEachRowSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp index d0d50526a0d..fc2d3cb8133 100644 --- a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp @@ -18,7 +18,7 @@ JSONRowOutputFormat::JSONRowOutputFormat( { bool need_validate_utf8 = false; fields = header.getNamesAndTypes(); - makeNamesAndTypesWithValidUTF8(fields, settings, need_validate_utf8); + JSONUtils::makeNamesAndTypesWithValidUTF8(fields, settings, need_validate_utf8); if (need_validate_utf8) { @@ -32,34 +32,34 @@ JSONRowOutputFormat::JSONRowOutputFormat( void JSONRowOutputFormat::writePrefix() { - writeJSONObjectStart(*ostr); - writeJSONMetadata(fields, settings, *ostr); - writeJSONFieldDelimiter(*ostr, 2); - writeJSONArrayStart(*ostr, 1, "data"); + JSONUtils::writeObjectStart(*ostr); + JSONUtils::writeMetadata(fields, settings, *ostr); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeArrayStart(*ostr, 1, "data"); } void JSONRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) { - writeJSONFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr, fields[field_number].name, 3); + JSONUtils::writeFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr, fields[field_number].name, 3); ++field_number; } void JSONRowOutputFormat::writeFieldDelimiter() { - writeJSONFieldDelimiter(*ostr); + JSONUtils::writeFieldDelimiter(*ostr); } void JSONRowOutputFormat::writeRowStartDelimiter() { - writeJSONObjectStart(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 2); } void JSONRowOutputFormat::writeRowEndDelimiter() { - writeJSONObjectEnd(*ostr, 2); + JSONUtils::writeObjectEnd(*ostr, 2); field_number = 0; ++row_count; } @@ -67,42 +67,42 @@ void JSONRowOutputFormat::writeRowEndDelimiter() void JSONRowOutputFormat::writeRowBetweenDelimiter() { - writeJSONFieldDelimiter(*ostr); + JSONUtils::writeFieldDelimiter(*ostr); } void JSONRowOutputFormat::writeSuffix() { - writeJSONArrayEnd(*ostr, 1); + JSONUtils::writeArrayEnd(*ostr, 1); } void JSONRowOutputFormat::writeBeforeTotals() { - writeJSONFieldDelimiter(*ostr, 2); - writeJSONObjectStart(*ostr, 1, "totals"); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 1, "totals"); } void JSONRowOutputFormat::writeTotals(const Columns & columns, size_t row_num) { - writeJSONColumns(columns, fields, serializations, row_num, yield_strings, settings, *ostr, 2); + JSONUtils::writeColumns(columns, fields, serializations, row_num, yield_strings, settings, *ostr, 2); } void JSONRowOutputFormat::writeAfterTotals() { - writeJSONObjectEnd(*ostr, 1); + JSONUtils::writeObjectEnd(*ostr, 1); } void JSONRowOutputFormat::writeBeforeExtremes() { - writeJSONFieldDelimiter(*ostr, 2); - writeJSONObjectStart(*ostr, 1, "extremes"); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 1, "extremes"); } void JSONRowOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num) { - writeJSONObjectStart(*ostr, 2, title); - writeJSONColumns(columns, fields, serializations, row_num, yield_strings, settings, *ostr, 3); - writeJSONObjectEnd(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 2, title); + JSONUtils::writeColumns(columns, fields, serializations, row_num, yield_strings, settings, *ostr, 3); + JSONUtils::writeObjectEnd(*ostr, 2); } void JSONRowOutputFormat::writeMinExtreme(const Columns & columns, size_t row_num) @@ -117,7 +117,7 @@ void JSONRowOutputFormat::writeMaxExtreme(const Columns & columns, size_t row_nu void JSONRowOutputFormat::writeAfterExtremes() { - writeJSONObjectEnd(*ostr, 1); + JSONUtils::writeObjectEnd(*ostr, 1); } void JSONRowOutputFormat::finalizeImpl() @@ -126,7 +126,7 @@ void JSONRowOutputFormat::finalizeImpl() if (outside_statistics) statistics = std::move(*outside_statistics); - writeJSONAdditionalInfo( + JSONUtils::writeAdditionalInfo( row_count, statistics.rows_before_limit, statistics.applied_limit, @@ -135,7 +135,7 @@ void JSONRowOutputFormat::finalizeImpl() settings.write_statistics, *ostr); - writeJSONObjectEnd(*ostr); + JSONUtils::writeObjectEnd(*ostr); writeChar('\n', *ostr); ostr->next(); }