Make better

This commit is contained in:
avogar 2022-05-18 09:25:26 +00:00
parent 8572879c37
commit 12010a81b7
16 changed files with 802 additions and 756 deletions

View File

@ -403,6 +403,8 @@ Both data output and parsing are supported in this format. For parsing, any orde
Parsing allows the presence of the additional field `tskv` without the equal sign or a value. This field is ignored.
For input format columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1.
## CSV {#csv}
Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)).
@ -462,15 +464,15 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA
"meta":
[
{
"name": "'hello'",
"name": "num",
"type": "Int32"
},
{
"name": "str",
"type": "String"
},
{
"name": "multiply(42, number)",
"type": "UInt64"
},
{
"name": "range(5)",
"name": "arr",
"type": "Array(UInt8)"
}
],
@ -478,25 +480,32 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA
"data":
[
{
"'hello'": "hello",
"multiply(42, number)": "0",
"range(5)": [0,1,2,3,4]
"num": 42,
"str": "hello",
"arr": [0,1]
},
{
"'hello'": "hello",
"multiply(42, number)": "42",
"range(5)": [0,1,2,3,4]
"num": 43,
"str": "hello",
"arr": [0,1,2]
},
{
"'hello'": "hello",
"multiply(42, number)": "84",
"range(5)": [0,1,2,3,4]
"num": 44,
"str": "hello",
"arr": [0,1,2,3]
}
],
"rows": 3,
"rows_before_limit_at_least": 3
"rows_before_limit_at_least": 3,
"statistics":
{
"elapsed": 0.001137687,
"rows_read": 3,
"bytes_read": 24
}
}
```
@ -531,15 +540,15 @@ Example:
"meta":
[
{
"name": "'hello'",
"name": "num",
"type": "Int32"
},
{
"name": "str",
"type": "String"
},
{
"name": "multiply(42, number)",
"type": "UInt64"
},
{
"name": "range(5)",
"name": "arr",
"type": "Array(UInt8)"
}
],
@ -547,100 +556,94 @@ Example:
"data":
[
{
"'hello'": "hello",
"multiply(42, number)": "0",
"range(5)": "[0,1,2,3,4]"
"num": "42",
"str": "hello",
"arr": "[0,1]"
},
{
"'hello'": "hello",
"multiply(42, number)": "42",
"range(5)": "[0,1,2,3,4]"
"num": "43",
"str": "hello",
"arr": "[0,1,2]"
},
{
"'hello'": "hello",
"multiply(42, number)": "84",
"range(5)": "[0,1,2,3,4]"
"num": "44",
"str": "hello",
"arr": "[0,1,2,3]"
}
],
"rows": 3,
"rows_before_limit_at_least": 3
"rows_before_limit_at_least": 3,
"statistics":
{
"elapsed": 0.001403233,
"rows_read": 3,
"bytes_read": 24
}
}
```
## JSONColumns {#jsoncolumns}
In this format, all data is represented as a single JSON Object.
Note that JSONColumns output format buffers all data in memory to output it as a single block.
Note that JSONColumns output format buffers all data in memory to output it as a single block and it can lead to high memory consumption.
Example:
```json
{
"name1": [1, 2, 3, 4],
"name2": ["Hello", ",", "world", "!"],
"name3": [[1, 2], [3, 4], [5, 6], [7, 8]]
"num": [42, 43, 44],
"str": ["hello", "hello", "hello"],
"arr": [[0,1], [0,1,2], [0,1,2,3]]
}
```
Columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1.
Columns that are not presente in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here)
Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here)
## JSONColumnsWithMetadata {#jsoncolumnsmonoblock}
Differs from JSON output format in that it outputs columns as in JSONColumns format.
This format buffers all data in memory and then outputs them as a single block.
Differs from JSONColumns output format in that it also outputs some metadata and statistics (similar to JSON output format).
This format buffers all data in memory and then outputs them as a single block, so, it can lead to high memory consumption.
Example:
```json
{
"meta":
[
{
"name": "sum",
"type": "UInt64"
},
{
"name": "avg",
"type": "Float64"
}
],
"meta":
[
{
"name": "num",
"type": "Int32"
},
{
"name": "str",
"type": "String"
},
{
"name": "arr",
"type": "Array(UInt8)"
}
],
"data":
{
"sum": ["1", "2", "3", "4"],
"avg": [1, 2, 3, 2]
},
"data":
{
"num": [42, 43, 44],
"str": ["hello", "hello", "hello"],
"arr": [[0,1], [0,1,2], [0,1,2,3]]
},
"totals":
{
"sum": "10",
"avg": 2
},
"rows": 3,
"extremes":
{
"min":
{
"sum": "1",
"avg": 1
},
"max":
{
"sum": "4",
"avg": 3
}
},
"rows_before_limit_at_least": 3,
"rows": 4,
"statistics":
{
"elapsed": 0.003701718,
"rows_read": 5,
"bytes_read": 20
}
"statistics":
{
"elapsed": 0.000272376,
"rows_read": 3,
"bytes_read": 24
}
}
```
@ -696,87 +699,101 @@ Result:
Differs from JSON only in that data rows are output in arrays, not in objects.
Example:
Examples:
```
// JSONCompact
1) JSONCompact:
```json
{
"meta":
[
{
"name": "'hello'",
"name": "num",
"type": "Int32"
},
{
"name": "str",
"type": "String"
},
{
"name": "multiply(42, number)",
"type": "UInt64"
},
{
"name": "range(5)",
"name": "arr",
"type": "Array(UInt8)"
}
],
"data":
[
["hello", "0", [0,1,2,3,4]],
["hello", "42", [0,1,2,3,4]],
["hello", "84", [0,1,2,3,4]]
[42, "hello", [0,1]],
[43, "hello", [0,1,2]],
[44, "hello", [0,1,2,3]]
],
"rows": 3,
"rows_before_limit_at_least": 3
"rows_before_limit_at_least": 3,
"statistics":
{
"elapsed": 0.001222069,
"rows_read": 3,
"bytes_read": 24
}
}
```
```
// JSONCompactStrings
2) JSONCompactStrings
```json
{
"meta":
[
{
"name": "'hello'",
"name": "num",
"type": "Int32"
},
{
"name": "str",
"type": "String"
},
{
"name": "multiply(42, number)",
"type": "UInt64"
},
{
"name": "range(5)",
"name": "arr",
"type": "Array(UInt8)"
}
],
"data":
[
["hello", "0", "[0,1,2,3,4]"],
["hello", "42", "[0,1,2,3,4]"],
["hello", "84", "[0,1,2,3,4]"]
["42", "hello", "[0,1]"],
["43", "hello", "[0,1,2]"],
["44", "hello", "[0,1,2,3]"]
],
"rows": 3,
"rows_before_limit_at_least": 3
"rows_before_limit_at_least": 3,
"statistics":
{
"elapsed": 0.001572097,
"rows_read": 3,
"bytes_read": 24
}
}
```
## JSONCompactColumns {#jsoncompactcolumns}
In this format, all data is represented as a single JSON Array.
Note that JSONCompactColumns output format buffers all data in memory to output it as a single block.
Note that JSONCompactColumns output format buffers all data in memory to output it as a single block and it can lead to high memory consumption
Example:
```json
[
[1, 2, 3, 4],
["Hello", ",", "world", "!"],
[[1, 2], [3, 4], [5, 6], [7, 8]]
[42, 43, 44],
["hello", "hello", "hello"],
[[0,1], [0,1,2], [0,1,2,3]]
]
```
Columns that are not presente in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here)
Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here)
## JSONEachRow {#jsoneachrow}
## JSONStringsEachRow {#jsonstringseachrow}
@ -793,15 +810,17 @@ When using these formats, ClickHouse outputs rows as separated, newline-delimite
When inserting the data, you should provide a separate JSON value for each row.
In JSONEachRow/JSONStringsEachRow input formats columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1.
## JSONEachRowWithProgress {#jsoneachrowwithprogress}
## JSONStringsEachRowWithProgress {#jsonstringseachrowwithprogress}
Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yield progress information as JSON values.
```json
{"row":{"'hello'":"hello","multiply(42, number)":"0","range(5)":[0,1,2,3,4]}}
{"row":{"'hello'":"hello","multiply(42, number)":"42","range(5)":[0,1,2,3,4]}}
{"row":{"'hello'":"hello","multiply(42, number)":"84","range(5)":[0,1,2,3,4]}}
{"row":{"num":42,"str":"hello","arr":[0,1]}}
{"row":{"num":43,"str":"hello","arr":[0,1,2]}}
{"row":{"num":44,"str":"hello","arr":[0,1,2,3]}}
{"progress":{"read_rows":"3","read_bytes":"24","written_rows":"0","written_bytes":"0","total_rows_to_read":"3"}}
```
@ -822,11 +841,11 @@ Differs from `JSONCompactStringsEachRow` in that in that it also prints the head
Differs from `JSONCompactStringsEachRow` in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
```json
["'hello'", "multiply(42, number)", "range(5)"]
["String", "UInt64", "Array(UInt8)"]
["hello", "0", [0,1,2,3,4]]
["hello", "42", [0,1,2,3,4]]
["hello", "84", [0,1,2,3,4]]
["num", "str", "arr"]
["Int32", "String", "Array(UInt8)"]
[42, "hello", [0,1]]
[43, "hello", [0,1,2]]
[44, "hello", [0,1,2,3]]
```
### Inserting Data {#inserting-data}

View File

@ -68,7 +68,8 @@ void BlockMissingValues::setBit(size_t column_idx, size_t row_idx)
void BlockMissingValues::setBits(size_t column_idx, size_t rows)
{
RowsBitMask & mask = rows_mask_by_column_id[column_idx];
mask.resize(rows, true);
mask.resize(rows);
std::fill(mask.begin(), mask.end(), true);
}
const BlockMissingValues::RowsBitMask & BlockMissingValues::getDefaultsBitmask(size_t column_idx) const

View File

@ -56,7 +56,9 @@ public:
const RowsBitMask & getDefaultsBitmask(size_t column_idx) const;
/// Check that we have to replace default value at least in one of columns
bool hasDefaultBits(size_t column_idx) const;
/// Set bit for a specified row in a single column.
void setBit(size_t column_idx, size_t row_idx);
/// Set bits for all rows in a single column.
void setBits(size_t column_idx, size_t rows);
bool empty() const { return rows_mask_by_column_id.empty(); }
size_t size() const { return rows_mask_by_column_id.size(); }

View File

@ -452,7 +452,7 @@ DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSe
return buf.eof() ? type : nullptr;
}
case FormatSettings::EscapingRule::JSON:
return getDataTypeFromJSONField(field);
return JSONUtils::getDataTypeFromField(field);
case FormatSettings::EscapingRule::CSV:
{
if (!format_settings.csv.input_format_use_best_effort_in_schema_inference)

File diff suppressed because it is too large Load Diff

View File

@ -13,85 +13,97 @@
namespace DB
{
std::pair<bool, size_t> fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size);
std::pair<bool, size_t> fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows);
namespace JSONUtils
{
std::pair<bool, size_t> fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size);
std::pair<bool, size_t>
fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows);
/// Parse JSON from string and convert it's type to ClickHouse type. Make the result type always Nullable.
/// JSON array with different nested types is treated as Tuple.
/// If cannot convert (for example when field contains null), return nullptr.
DataTypePtr getDataTypeFromField(const String & field);
/// Parse JSON from string and convert it's type to ClickHouse type. Make the result type always Nullable.
/// JSON array with different nested types is treated as Tuple.
/// If cannot convert (for example when field contains null), return nullptr.
DataTypePtr getDataTypeFromJSONField(const String & field);
/// Read row in JSONEachRow format and try to determine type for each field.
/// Return list of names and types.
/// If cannot determine the type of some field, return nullptr for it.
NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings);
/// Read row in JSONEachRow format and try to determine type for each field.
/// Return list of names and types.
/// If cannot determine the type of some field, return nullptr for it.
NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings);
/// Read row in JSONCompactEachRow format and try to determine type for each field.
/// If cannot determine the type of some field, return nullptr for it.
DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings);
/// Read row in JSONCompactEachRow format and try to determine type for each field.
/// If cannot determine the type of some field, return nullptr for it.
DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings);
bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf);
bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf);
bool readField(
ReadBuffer & in,
IColumn & column,
const DataTypePtr & type,
const SerializationPtr & serialization,
const String & column_name,
const FormatSettings & format_settings,
bool yield_strings);
bool readFieldImpl(ReadBuffer & in, IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, const String & column_name, const FormatSettings & format_settings, bool yield_strings);
DataTypePtr getCommonTypeForJSONFormats(const DataTypePtr & first, const DataTypePtr & second, bool allow_bools_as_numbers);
DataTypePtr getCommonTypeForJSONFormats(const DataTypePtr & first, const DataTypePtr & second, bool allow_bools_as_numbers);
void makeNamesAndTypesWithValidUTF8(NamesAndTypes & fields, const FormatSettings & settings, bool & need_validate_utf8);
void makeNamesAndTypesWithValidUTF8(NamesAndTypes & fields, const FormatSettings & settings, bool & need_validate_utf8);
/// Functions helpers for writing JSON data to WriteBuffer.
void writeFieldDelimiter(WriteBuffer & out, size_t new_lines = 1);
/// Functions helpers for writing JSON data to WriteBuffer.
void writeFieldCompactDelimiter(WriteBuffer & out);
void writeJSONFieldDelimiter(WriteBuffer & out, size_t new_lines = 1);
void writeObjectStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr);
void writeJSONFieldCompactDelimiter(WriteBuffer & out);
void writeObjectEnd(WriteBuffer & out, size_t indent = 0);
void writeJSONObjectStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr);
void writeArrayStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr);
void writeJSONObjectEnd(WriteBuffer & out, size_t indent = 0);
void writeCompactArrayStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr);
void writeJSONArrayStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr);
void writeArrayEnd(WriteBuffer & out, size_t indent = 0);
void writeJSONCompactArrayStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr);
void writeCompactArrayEnd(WriteBuffer & out);
void writeJSONArrayEnd(WriteBuffer & out, size_t indent = 0);
void writeFieldFromColumn(
const IColumn & column,
const ISerialization & serialization,
size_t row_num,
bool yield_strings,
const FormatSettings & settings,
WriteBuffer & out,
const std::optional<String> & name = std::nullopt,
size_t indent = 0);
void writeJSONCompactArrayEnd(WriteBuffer & out);
void writeColumns(
const Columns & columns,
const NamesAndTypes & fields,
const Serializations & serializations,
size_t row_num,
bool yield_strings,
const FormatSettings & settings,
WriteBuffer & out,
size_t indent = 0);
void writeJSONFieldFromColumn(
const IColumn & column,
const ISerialization & serialization,
size_t row_num,
bool yield_strings,
const FormatSettings & settings,
WriteBuffer & out,
const std::optional<String> & name = std::nullopt,
size_t indent = 0);
void writeCompactColumns(
const Columns & columns,
const Serializations & serializations,
size_t row_num,
bool yield_strings,
const FormatSettings & settings,
WriteBuffer & out);
void writeJSONColumns(const Columns & columns,
const NamesAndTypes & fields,
const Serializations & serializations,
size_t row_num,
bool yield_strings,
const FormatSettings & settings,
WriteBuffer & out,
size_t indent = 0);
void writeMetadata(const NamesAndTypes & fields, const FormatSettings & settings, WriteBuffer & out);
void writeAdditionalInfo(
size_t rows,
size_t rows_before_limit,
bool applied_limit,
const Stopwatch & watch,
const Progress & progress,
bool write_statistics,
WriteBuffer & out);
}
void writeJSONCompactColumns(const Columns & columns,
const Serializations & serializations,
size_t row_num,
bool yield_strings,
const FormatSettings & settings,
WriteBuffer & out);
void writeJSONMetadata(const NamesAndTypes & fields, const FormatSettings & settings, WriteBuffer & out);
void writeJSONAdditionalInfo(
size_t rows,
size_t rows_before_limit,
bool applied_limit,
const Stopwatch & watch,
const Progress & progress,
bool write_statistics,
WriteBuffer & out);
}

View File

@ -221,12 +221,12 @@ void registerInputFormatJSONAsString(FormatFactory & factory)
void registerFileSegmentationEngineJSONAsString(FormatFactory & factory)
{
factory.registerFileSegmentationEngine("JSONAsString", &fileSegmentationEngineJSONEachRow);
factory.registerFileSegmentationEngine("JSONAsString", &JSONUtils::fileSegmentationEngineJSONEachRow);
}
void registerNonTrivialPrefixAndSuffixCheckerJSONAsString(FormatFactory & factory)
{
factory.registerNonTrivialPrefixAndSuffixChecker("JSONAsString", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl);
factory.registerNonTrivialPrefixAndSuffixChecker("JSONAsString", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl);
}
void registerJSONAsStringSchemaReader(FormatFactory & factory)
@ -251,12 +251,12 @@ void registerInputFormatJSONAsObject(FormatFactory & factory)
void registerNonTrivialPrefixAndSuffixCheckerJSONAsObject(FormatFactory & factory)
{
factory.registerNonTrivialPrefixAndSuffixChecker("JSONAsObject", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl);
factory.registerNonTrivialPrefixAndSuffixChecker("JSONAsObject", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl);
}
void registerFileSegmentationEngineJSONAsObject(FormatFactory & factory)
{
factory.registerFileSegmentationEngine("JSONAsObject", &fileSegmentationEngineJSONEachRow);
factory.registerFileSegmentationEngine("JSONAsObject", &JSONUtils::fileSegmentationEngineJSONEachRow);
}
void registerJSONAsObjectSchemaReader(FormatFactory & factory)

View File

@ -96,7 +96,7 @@ size_t JSONColumnsBaseBlockInputFormat::readColumn(
do
{
readFieldImpl(*in, column, type, serialization, column_name, format_settings, false);
JSONUtils::readField(*in, column, type, serialization, column_name, format_settings, false);
}
while (!reader->checkColumnEndOrSkipFieldDelimiter());
@ -185,7 +185,7 @@ void JSONColumnsBaseSchemaReader::chooseResulType(DataTypePtr & type, const Data
{
auto common_type_checker = [&](const DataTypePtr & first, const DataTypePtr & second)
{
return getCommonTypeForJSONFormats(first, second, format_settings.json.read_bools_as_numbers);
return JSONUtils::getCommonTypeForJSONFormats(first, second, format_settings.json.read_bools_as_numbers);
};
chooseResultColumnType(type, new_type, common_type_checker, nullptr, column_name, row);
}
@ -260,7 +260,7 @@ DataTypePtr JSONColumnsBaseSchemaReader::readColumnAndGetDataType(const String &
}
readJSONField(field, in);
DataTypePtr field_type = getDataTypeFromJSONField(field);
DataTypePtr field_type = JSONUtils::getDataTypeFromField(field);
chooseResulType(column_type, field_type, column_name, rows_read);
++rows_read;
}

View File

@ -48,9 +48,9 @@ void JSONColumnsBaseBlockOutputFormat::writeChunk(Chunk & chunk)
void JSONColumnsBaseBlockOutputFormat::writeColumnEnd(bool is_last)
{
writeJSONCompactArrayEnd(*ostr);
JSONUtils::writeCompactArrayEnd(*ostr);
if (!is_last)
writeJSONFieldDelimiter(*ostr);
JSONUtils::writeFieldDelimiter(*ostr);
}
void JSONColumnsBaseBlockOutputFormat::writeColumn(const IColumn & column, const ISerialization & serialization)
@ -58,7 +58,7 @@ void JSONColumnsBaseBlockOutputFormat::writeColumn(const IColumn & column, const
for (size_t i = 0; i != column.size(); ++i)
{
if (i != 0)
writeJSONFieldCompactDelimiter(*ostr);
JSONUtils::writeFieldCompactDelimiter(*ostr);
serialization.serializeTextJSON(column, i, *ostr, format_settings);
}
}

View File

@ -20,17 +20,17 @@ JSONColumnsBlockOutputFormat::JSONColumnsBlockOutputFormat(WriteBuffer & out_, c
void JSONColumnsBlockOutputFormat::writeChunkStart()
{
writeJSONObjectStart(*ostr, indent);
JSONUtils::writeObjectStart(*ostr, indent);
}
void JSONColumnsBlockOutputFormat::writeColumnStart(size_t column_index)
{
writeJSONCompactArrayStart(*ostr, indent + 1, fields[column_index].name.data());
JSONUtils::writeCompactArrayStart(*ostr, indent + 1, fields[column_index].name.data());
}
void JSONColumnsBlockOutputFormat::writeChunkEnd()
{
writeJSONObjectEnd(*ostr, indent);
JSONUtils::writeObjectEnd(*ostr, indent);
writeChar('\n', *ostr);
}

View File

@ -16,7 +16,7 @@ JSONColumnsWithMetadataBlockOutputFormat::JSONColumnsWithMetadataBlockOutputForm
: JSONColumnsBlockOutputFormat(out_, header_, format_settings_, 1)
{
bool need_validate_utf8 = false;
makeNamesAndTypesWithValidUTF8(fields, format_settings, need_validate_utf8);
JSONUtils::makeNamesAndTypesWithValidUTF8(fields, format_settings, need_validate_utf8);
if (need_validate_utf8)
{
@ -27,8 +27,8 @@ JSONColumnsWithMetadataBlockOutputFormat::JSONColumnsWithMetadataBlockOutputForm
void JSONColumnsWithMetadataBlockOutputFormat::writePrefix()
{
writeJSONObjectStart(*ostr);
writeJSONMetadata(fields, format_settings, *ostr);
JSONUtils::writeObjectStart(*ostr);
JSONUtils::writeMetadata(fields, format_settings, *ostr);
}
void JSONColumnsWithMetadataBlockOutputFormat::writeSuffix()
@ -39,13 +39,13 @@ void JSONColumnsWithMetadataBlockOutputFormat::writeSuffix()
void JSONColumnsWithMetadataBlockOutputFormat::writeChunkStart()
{
writeJSONFieldDelimiter(*ostr, 2);
writeJSONObjectStart(*ostr, 1, "data");
JSONUtils::writeFieldDelimiter(*ostr, 2);
JSONUtils::writeObjectStart(*ostr, 1, "data");
}
void JSONColumnsWithMetadataBlockOutputFormat::writeChunkEnd()
{
writeJSONObjectEnd(*ostr, indent);
JSONUtils::writeObjectEnd(*ostr, indent);
}
void JSONColumnsWithMetadataBlockOutputFormat::consumeExtremes(Chunk chunk)
@ -55,19 +55,19 @@ void JSONColumnsWithMetadataBlockOutputFormat::consumeExtremes(Chunk chunk)
throw Exception("Got " + toString(num_rows) + " in extremes chunk, expected 2", ErrorCodes::LOGICAL_ERROR);
const auto & columns = chunk.getColumns();
writeJSONFieldDelimiter(*ostr, 2);
writeJSONObjectStart(*ostr, 1, "extremes");
JSONUtils::writeFieldDelimiter(*ostr, 2);
JSONUtils::writeObjectStart(*ostr, 1, "extremes");
writeExtremesElement("min", columns, 0);
writeJSONFieldDelimiter(*ostr);
JSONUtils::writeFieldDelimiter(*ostr);
writeExtremesElement("max", columns, 1);
writeJSONObjectEnd(*ostr, 1);
JSONUtils::writeObjectEnd(*ostr, 1);
}
void JSONColumnsWithMetadataBlockOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num)
{
writeJSONObjectStart(*ostr, 2, title);
writeJSONColumns(columns, fields, serializations, row_num, false, format_settings, *ostr, 3);
writeJSONObjectEnd(*ostr, 2);
JSONUtils::writeObjectStart(*ostr, 2, title);
JSONUtils::writeColumns(columns, fields, serializations, row_num, false, format_settings, *ostr, 3);
JSONUtils::writeObjectEnd(*ostr, 2);
}
void JSONColumnsWithMetadataBlockOutputFormat::consumeTotals(Chunk chunk)
@ -77,10 +77,10 @@ void JSONColumnsWithMetadataBlockOutputFormat::consumeTotals(Chunk chunk)
throw Exception("Got " + toString(num_rows) + " in totals chunk, expected 1", ErrorCodes::LOGICAL_ERROR);
const auto & columns = chunk.getColumns();
writeJSONFieldDelimiter(*ostr, 2);
writeJSONObjectStart(*ostr, 1, "totals");
writeJSONColumns(columns, fields, serializations, 0, false, format_settings, *ostr, 2);
writeJSONObjectEnd(*ostr, 1);
JSONUtils::writeFieldDelimiter(*ostr, 2);
JSONUtils::writeObjectStart(*ostr, 1, "totals");
JSONUtils::writeColumns(columns, fields, serializations, 0, false, format_settings, *ostr, 2);
JSONUtils::writeObjectEnd(*ostr, 1);
}
void JSONColumnsWithMetadataBlockOutputFormat::finalizeImpl()
@ -89,7 +89,7 @@ void JSONColumnsWithMetadataBlockOutputFormat::finalizeImpl()
if (outside_statistics)
statistics = std::move(*outside_statistics);
writeJSONAdditionalInfo(
JSONUtils::writeAdditionalInfo(
rows,
statistics.rows_before_limit,
statistics.applied_limit,
@ -98,7 +98,7 @@ void JSONColumnsWithMetadataBlockOutputFormat::finalizeImpl()
format_settings.write_statistics,
*ostr);
writeJSONObjectEnd(*ostr);
JSONUtils::writeObjectEnd(*ostr);
writeChar('\n', *ostr);
ostr->next();
}

View File

@ -13,17 +13,17 @@ JSONCompactColumnsBlockOutputFormat::JSONCompactColumnsBlockOutputFormat(WriteBu
void JSONCompactColumnsBlockOutputFormat::writeChunkStart()
{
writeJSONArrayStart(*ostr);
JSONUtils::writeArrayStart(*ostr);
}
void JSONCompactColumnsBlockOutputFormat::writeColumnStart(size_t)
{
writeJSONCompactArrayStart(*ostr, 1);
JSONUtils::writeCompactArrayStart(*ostr, 1);
}
void JSONCompactColumnsBlockOutputFormat::writeChunkEnd()
{
writeJSONArrayEnd(*ostr);
JSONUtils::writeArrayEnd(*ostr);
writeChar('\n', *ostr);
}

View File

@ -109,7 +109,7 @@ std::vector<String> JSONCompactEachRowFormatReader::readHeaderRow()
bool JSONCompactEachRowFormatReader::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & column_name)
{
skipWhitespaceIfAny(*in);
return readFieldImpl(*in, column, type, serialization, column_name, format_settings, yield_strings);
return JSONUtils::readField(*in, column, type, serialization, column_name, format_settings, yield_strings);
}
bool JSONCompactEachRowFormatReader::parseRowStartWithDiagnosticInfo(WriteBuffer & out)
@ -189,7 +189,7 @@ JSONCompactEachRowRowSchemaReader::JSONCompactEachRowRowSchemaReader(
bool allow_bools_as_numbers = format_settings_.json.read_bools_as_numbers;
setCommonTypeChecker([allow_bools_as_numbers](const DataTypePtr & first, const DataTypePtr & second)
{
return getCommonTypeForJSONFormats(first, second, allow_bools_as_numbers);
return JSONUtils::getCommonTypeForJSONFormats(first, second, allow_bools_as_numbers);
});
}
@ -209,7 +209,7 @@ DataTypes JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypes()
if (in.eof())
return {};
return readRowAndGetDataTypesForJSONCompactEachRow(in, reader.yieldStrings());
return JSONUtils::readRowAndGetDataTypesForJSONCompactEachRow(in, reader.yieldStrings());
}
void registerInputFormatJSONCompactEachRow(FormatFactory & factory)
@ -258,7 +258,7 @@ void registerFileSegmentationEngineJSONCompactEachRow(FormatFactory & factory)
size_t min_rows = 1 + int(with_names) + int(with_types);
factory.registerFileSegmentationEngine(format_name, [min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
{
return fileSegmentationEngineJSONCompactEachRow(in, memory, min_chunk_size, min_rows);
return JSONUtils::fileSegmentationEngineJSONCompactEachRow(in, memory, min_chunk_size, min_rows);
});
};

View File

@ -21,50 +21,50 @@ JSONCompactRowOutputFormat::JSONCompactRowOutputFormat(
void JSONCompactRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num)
{
writeJSONFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr);
JSONUtils::writeFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr);
++field_number;
}
void JSONCompactRowOutputFormat::writeFieldDelimiter()
{
writeJSONFieldCompactDelimiter(*ostr);
JSONUtils::writeFieldCompactDelimiter(*ostr);
}
void JSONCompactRowOutputFormat::writeRowStartDelimiter()
{
writeJSONCompactArrayStart(*ostr, 2);
JSONUtils::writeCompactArrayStart(*ostr, 2);
}
void JSONCompactRowOutputFormat::writeRowEndDelimiter()
{
writeJSONCompactArrayEnd(*ostr);
JSONUtils::writeCompactArrayEnd(*ostr);
field_number = 0;
++row_count;
}
void JSONCompactRowOutputFormat::writeBeforeTotals()
{
writeJSONFieldDelimiter(*ostr, 2);
writeJSONCompactArrayStart(*ostr, 1, "totals");
JSONUtils::writeFieldDelimiter(*ostr, 2);
JSONUtils::writeCompactArrayStart(*ostr, 1, "totals");
}
void JSONCompactRowOutputFormat::writeTotals(const Columns & columns, size_t row_num)
{
writeJSONCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr);
JSONUtils::writeCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr);
}
void JSONCompactRowOutputFormat::writeAfterTotals()
{
writeJSONCompactArrayEnd(*ostr);
JSONUtils::writeCompactArrayEnd(*ostr);
}
void JSONCompactRowOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num)
{
writeJSONCompactArrayStart(*ostr, 2, title);
writeJSONCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr);
writeJSONCompactArrayEnd(*ostr);
JSONUtils::writeCompactArrayStart(*ostr, 2, title);
JSONUtils::writeCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr);
JSONUtils::writeCompactArrayEnd(*ostr);
}
void registerOutputFormatJSONCompact(FormatFactory & factory)

View File

@ -140,7 +140,7 @@ void JSONEachRowRowInputFormat::readField(size_t index, MutableColumns & columns
seen_columns[index] = true;
const auto & type = getPort().getHeader().getByPosition(index).type;
const auto & serialization = serializations[index];
read_columns[index] = readFieldImpl(*in, *columns[index], type, serialization, columnName(index), format_settings, yield_strings);
read_columns[index] = JSONUtils::readField(*in, *columns[index], type, serialization, columnName(index), format_settings, yield_strings);
}
inline bool JSONEachRowRowInputFormat::advanceToNextKey(size_t key_index)
@ -313,7 +313,7 @@ JSONEachRowSchemaReader::JSONEachRowSchemaReader(ReadBuffer & in_, bool json_str
bool allow_bools_as_numbers = format_settings.json.read_bools_as_numbers;
setCommonTypeChecker([allow_bools_as_numbers](const DataTypePtr & first, const DataTypePtr & second)
{
return getCommonTypeForJSONFormats(first, second, allow_bools_as_numbers);
return JSONUtils::getCommonTypeForJSONFormats(first, second, allow_bools_as_numbers);
});
}
@ -350,7 +350,7 @@ NamesAndTypesList JSONEachRowSchemaReader::readRowAndGetNamesAndDataTypes(bool &
return {};
}
return readRowAndGetNamesAndDataTypesForJSONEachRow(in, json_strings);
return JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, json_strings);
}
void registerInputFormatJSONEachRow(FormatFactory & factory)
@ -397,18 +397,18 @@ void registerInputFormatJSONEachRow(FormatFactory & factory)
void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory)
{
factory.registerFileSegmentationEngine("JSONEachRow", &fileSegmentationEngineJSONEachRow);
factory.registerFileSegmentationEngine("JSONStringsEachRow", &fileSegmentationEngineJSONEachRow);
factory.registerFileSegmentationEngine("JSONLines", &fileSegmentationEngineJSONEachRow);
factory.registerFileSegmentationEngine("NDJSON", &fileSegmentationEngineJSONEachRow);
factory.registerFileSegmentationEngine("JSONEachRow", &JSONUtils::fileSegmentationEngineJSONEachRow);
factory.registerFileSegmentationEngine("JSONStringsEachRow", &JSONUtils::fileSegmentationEngineJSONEachRow);
factory.registerFileSegmentationEngine("JSONLines", &JSONUtils::fileSegmentationEngineJSONEachRow);
factory.registerFileSegmentationEngine("NDJSON", &JSONUtils::fileSegmentationEngineJSONEachRow);
}
void registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(FormatFactory & factory)
{
factory.registerNonTrivialPrefixAndSuffixChecker("JSONEachRow", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl);
factory.registerNonTrivialPrefixAndSuffixChecker("JSONStringsEachRow", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl);
factory.registerNonTrivialPrefixAndSuffixChecker("JSONLines", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl);
factory.registerNonTrivialPrefixAndSuffixChecker("NDJSON", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl);
factory.registerNonTrivialPrefixAndSuffixChecker("JSONEachRow", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl);
factory.registerNonTrivialPrefixAndSuffixChecker("JSONStringsEachRow", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl);
factory.registerNonTrivialPrefixAndSuffixChecker("JSONLines", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl);
factory.registerNonTrivialPrefixAndSuffixChecker("NDJSON", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl);
}
void registerJSONEachRowSchemaReader(FormatFactory & factory)

View File

@ -18,7 +18,7 @@ JSONRowOutputFormat::JSONRowOutputFormat(
{
bool need_validate_utf8 = false;
fields = header.getNamesAndTypes();
makeNamesAndTypesWithValidUTF8(fields, settings, need_validate_utf8);
JSONUtils::makeNamesAndTypesWithValidUTF8(fields, settings, need_validate_utf8);
if (need_validate_utf8)
{
@ -32,34 +32,34 @@ JSONRowOutputFormat::JSONRowOutputFormat(
void JSONRowOutputFormat::writePrefix()
{
writeJSONObjectStart(*ostr);
writeJSONMetadata(fields, settings, *ostr);
writeJSONFieldDelimiter(*ostr, 2);
writeJSONArrayStart(*ostr, 1, "data");
JSONUtils::writeObjectStart(*ostr);
JSONUtils::writeMetadata(fields, settings, *ostr);
JSONUtils::writeFieldDelimiter(*ostr, 2);
JSONUtils::writeArrayStart(*ostr, 1, "data");
}
void JSONRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num)
{
writeJSONFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr, fields[field_number].name, 3);
JSONUtils::writeFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr, fields[field_number].name, 3);
++field_number;
}
void JSONRowOutputFormat::writeFieldDelimiter()
{
writeJSONFieldDelimiter(*ostr);
JSONUtils::writeFieldDelimiter(*ostr);
}
void JSONRowOutputFormat::writeRowStartDelimiter()
{
writeJSONObjectStart(*ostr, 2);
JSONUtils::writeObjectStart(*ostr, 2);
}
void JSONRowOutputFormat::writeRowEndDelimiter()
{
writeJSONObjectEnd(*ostr, 2);
JSONUtils::writeObjectEnd(*ostr, 2);
field_number = 0;
++row_count;
}
@ -67,42 +67,42 @@ void JSONRowOutputFormat::writeRowEndDelimiter()
void JSONRowOutputFormat::writeRowBetweenDelimiter()
{
writeJSONFieldDelimiter(*ostr);
JSONUtils::writeFieldDelimiter(*ostr);
}
void JSONRowOutputFormat::writeSuffix()
{
writeJSONArrayEnd(*ostr, 1);
JSONUtils::writeArrayEnd(*ostr, 1);
}
void JSONRowOutputFormat::writeBeforeTotals()
{
writeJSONFieldDelimiter(*ostr, 2);
writeJSONObjectStart(*ostr, 1, "totals");
JSONUtils::writeFieldDelimiter(*ostr, 2);
JSONUtils::writeObjectStart(*ostr, 1, "totals");
}
void JSONRowOutputFormat::writeTotals(const Columns & columns, size_t row_num)
{
writeJSONColumns(columns, fields, serializations, row_num, yield_strings, settings, *ostr, 2);
JSONUtils::writeColumns(columns, fields, serializations, row_num, yield_strings, settings, *ostr, 2);
}
void JSONRowOutputFormat::writeAfterTotals()
{
writeJSONObjectEnd(*ostr, 1);
JSONUtils::writeObjectEnd(*ostr, 1);
}
void JSONRowOutputFormat::writeBeforeExtremes()
{
writeJSONFieldDelimiter(*ostr, 2);
writeJSONObjectStart(*ostr, 1, "extremes");
JSONUtils::writeFieldDelimiter(*ostr, 2);
JSONUtils::writeObjectStart(*ostr, 1, "extremes");
}
void JSONRowOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num)
{
writeJSONObjectStart(*ostr, 2, title);
writeJSONColumns(columns, fields, serializations, row_num, yield_strings, settings, *ostr, 3);
writeJSONObjectEnd(*ostr, 2);
JSONUtils::writeObjectStart(*ostr, 2, title);
JSONUtils::writeColumns(columns, fields, serializations, row_num, yield_strings, settings, *ostr, 3);
JSONUtils::writeObjectEnd(*ostr, 2);
}
void JSONRowOutputFormat::writeMinExtreme(const Columns & columns, size_t row_num)
@ -117,7 +117,7 @@ void JSONRowOutputFormat::writeMaxExtreme(const Columns & columns, size_t row_nu
void JSONRowOutputFormat::writeAfterExtremes()
{
writeJSONObjectEnd(*ostr, 1);
JSONUtils::writeObjectEnd(*ostr, 1);
}
void JSONRowOutputFormat::finalizeImpl()
@ -126,7 +126,7 @@ void JSONRowOutputFormat::finalizeImpl()
if (outside_statistics)
statistics = std::move(*outside_statistics);
writeJSONAdditionalInfo(
JSONUtils::writeAdditionalInfo(
row_count,
statistics.rows_before_limit,
statistics.applied_limit,
@ -135,7 +135,7 @@ void JSONRowOutputFormat::finalizeImpl()
settings.write_statistics,
*ostr);
writeJSONObjectEnd(*ostr);
JSONUtils::writeObjectEnd(*ostr);
writeChar('\n', *ostr);
ostr->next();
}