mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-04 21:42:39 +00:00
Add setting to obtain object name as column value in JSONObjectEachRow format
This commit is contained in:
parent
2c83abaaba
commit
d3d06251a3
@ -772,6 +772,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
|
|||||||
M(Bool, output_format_json_array_of_rows, false, "Output a JSON array of all rows in JSONEachRow(Compact) format.", 0) \
|
M(Bool, output_format_json_array_of_rows, false, "Output a JSON array of all rows in JSONEachRow(Compact) format.", 0) \
|
||||||
M(Bool, output_format_json_validate_utf8, false, "Validate UTF-8 sequences in JSON output formats, doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8", 0) \
|
M(Bool, output_format_json_validate_utf8, false, "Validate UTF-8 sequences in JSON output formats, doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8", 0) \
|
||||||
\
|
\
|
||||||
|
M(String, format_json_object_each_row_column_for_object_name, "", "The name of column that will be used as object names in JSONObjectEachRow format. Column type should be String", 0) \
|
||||||
|
\
|
||||||
M(UInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \
|
M(UInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \
|
||||||
M(UInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \
|
M(UInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \
|
||||||
M(UInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \
|
M(UInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \
|
||||||
|
@ -100,6 +100,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
|||||||
format_settings.json.try_infer_numbers_from_strings = settings.input_format_json_try_infer_numbers_from_strings;
|
format_settings.json.try_infer_numbers_from_strings = settings.input_format_json_try_infer_numbers_from_strings;
|
||||||
format_settings.json.validate_types_from_metadata = settings.input_format_json_validate_types_from_metadata;
|
format_settings.json.validate_types_from_metadata = settings.input_format_json_validate_types_from_metadata;
|
||||||
format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8;
|
format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8;
|
||||||
|
format_settings.json_object_each_row.column_for_object_name = settings.format_json_object_each_row_column_for_object_name;
|
||||||
format_settings.null_as_default = settings.input_format_null_as_default;
|
format_settings.null_as_default = settings.input_format_null_as_default;
|
||||||
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
|
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
|
||||||
format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
|
format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
|
||||||
|
@ -155,6 +155,11 @@ struct FormatSettings
|
|||||||
bool validate_utf8 = false;
|
bool validate_utf8 = false;
|
||||||
} json;
|
} json;
|
||||||
|
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
String column_for_object_name;
|
||||||
|
} json_object_each_row;
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
UInt64 row_group_size = 1000000;
|
UInt64 row_group_size = 1000000;
|
||||||
|
@ -214,7 +214,7 @@ bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi
|
|||||||
seen_columns.assign(num_columns, false);
|
seen_columns.assign(num_columns, false);
|
||||||
|
|
||||||
nested_prefix_length = 0;
|
nested_prefix_length = 0;
|
||||||
readRowStart();
|
readRowStart(columns);
|
||||||
readJSONObject(columns);
|
readJSONObject(columns);
|
||||||
|
|
||||||
const auto & header = getPort().getHeader();
|
const auto & header = getPort().getHeader();
|
||||||
|
@ -48,7 +48,7 @@ private:
|
|||||||
void readJSONObject(MutableColumns & columns);
|
void readJSONObject(MutableColumns & columns);
|
||||||
void readNestedData(const String & name, MutableColumns & columns);
|
void readNestedData(const String & name, MutableColumns & columns);
|
||||||
|
|
||||||
virtual void readRowStart() {}
|
virtual void readRowStart(MutableColumns &) {}
|
||||||
virtual bool checkEndOfData(bool is_first_row);
|
virtual bool checkEndOfData(bool is_first_row);
|
||||||
|
|
||||||
const FormatSettings format_settings;
|
const FormatSettings format_settings;
|
||||||
@ -66,10 +66,6 @@ private:
|
|||||||
/// the nested column names are 'n.i' and 'n.s' and the nested prefix is 'n.'
|
/// the nested column names are 'n.i' and 'n.s' and the nested prefix is 'n.'
|
||||||
size_t nested_prefix_length = 0;
|
size_t nested_prefix_length = 0;
|
||||||
|
|
||||||
/// Set of columns for which the values were read. The rest will be filled with default values.
|
|
||||||
std::vector<UInt8> read_columns;
|
|
||||||
/// Set of columns which already met in row. Exception is thrown if there are more than one column with the same name.
|
|
||||||
std::vector<UInt8> seen_columns;
|
|
||||||
/// These sets may be different, because if null_as_default=1 read_columns[i] will be false and seen_columns[i] will be true
|
/// These sets may be different, because if null_as_default=1 read_columns[i] will be false and seen_columns[i] will be true
|
||||||
/// for row like {..., "non-nullable column name" : null, ...}
|
/// for row like {..., "non-nullable column name" : null, ...}
|
||||||
|
|
||||||
@ -85,6 +81,12 @@ private:
|
|||||||
bool yield_strings;
|
bool yield_strings;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
|
/// Set of columns for which the values were read. The rest will be filled with default values.
|
||||||
|
std::vector<UInt8> read_columns;
|
||||||
|
/// Set of columns which already met in row. Exception is thrown if there are more than one column with the same name.
|
||||||
|
std::vector<UInt8> seen_columns;
|
||||||
|
|
||||||
/// This flag is needed to know if data is in square brackets.
|
/// This flag is needed to know if data is in square brackets.
|
||||||
bool data_in_square_brackets = false;
|
bool data_in_square_brackets = false;
|
||||||
};
|
};
|
||||||
|
@ -2,12 +2,39 @@
|
|||||||
#include <Formats/JSONUtils.h>
|
#include <Formats/JSONUtils.h>
|
||||||
#include <Formats/FormatFactory.h>
|
#include <Formats/FormatFactory.h>
|
||||||
#include <Formats/EscapingRuleUtils.h>
|
#include <Formats/EscapingRuleUtils.h>
|
||||||
|
#include <DataTypes/DataTypeString.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int BAD_ARGUMENTS;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<size_t> getColumnIndexForJSONObjectEachRowObjectName(const Block & header, const FormatSettings & format_settings)
|
||||||
|
{
|
||||||
|
if (format_settings.json_object_each_row.column_for_object_name.empty())
|
||||||
|
return std::nullopt;
|
||||||
|
|
||||||
|
if (!header.has(format_settings.json_object_each_row.column_for_object_name))
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Column name '{}' from setting format_json_object_each_row_column_for_object_name doesn't exists in header",
|
||||||
|
format_settings.json_object_each_row.column_for_object_name);
|
||||||
|
|
||||||
|
size_t index = header.getPositionByName(format_settings.json_object_each_row.column_for_object_name);
|
||||||
|
if (!isStringOrFixedString(header.getDataTypes()[index]))
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Column '{}' from setting json_object_each_row_column_for_object_name must have String type",
|
||||||
|
format_settings.json_object_each_row.column_for_object_name);
|
||||||
|
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
JSONObjectEachRowInputFormat::JSONObjectEachRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_)
|
JSONObjectEachRowInputFormat::JSONObjectEachRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_)
|
||||||
: JSONEachRowRowInputFormat(in_, header_, params_, format_settings_, false)
|
: JSONEachRowRowInputFormat(in_, header_, params_, format_settings_, false), field_index_for_object_name(getColumnIndexForJSONObjectEachRowObjectName(header_, format_settings_))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -16,9 +43,15 @@ void JSONObjectEachRowInputFormat::readPrefix()
|
|||||||
JSONUtils::skipObjectStart(*in);
|
JSONUtils::skipObjectStart(*in);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JSONObjectEachRowInputFormat::readRowStart()
|
void JSONObjectEachRowInputFormat::readRowStart(MutableColumns & columns)
|
||||||
{
|
{
|
||||||
JSONUtils::readFieldName(*in);
|
auto object_name = JSONUtils::readFieldName(*in);
|
||||||
|
if (field_index_for_object_name)
|
||||||
|
{
|
||||||
|
columns[*field_index_for_object_name]->insertData(object_name.data(), object_name.size());
|
||||||
|
seen_columns[*field_index_for_object_name] = true;
|
||||||
|
read_columns[*field_index_for_object_name] = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool JSONObjectEachRowInputFormat::checkEndOfData(bool is_first_row)
|
bool JSONObjectEachRowInputFormat::checkEndOfData(bool is_first_row)
|
||||||
@ -30,7 +63,6 @@ bool JSONObjectEachRowInputFormat::checkEndOfData(bool is_first_row)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
JSONObjectEachRowSchemaReader::JSONObjectEachRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
|
JSONObjectEachRowSchemaReader::JSONObjectEachRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
|
||||||
: IRowWithNamesSchemaReader(in_, format_settings_)
|
: IRowWithNamesSchemaReader(in_, format_settings_)
|
||||||
{
|
{
|
||||||
@ -53,7 +85,10 @@ NamesAndTypesList JSONObjectEachRowSchemaReader::readRowAndGetNamesAndDataTypes(
|
|||||||
JSONUtils::skipComma(in);
|
JSONUtils::skipComma(in);
|
||||||
|
|
||||||
JSONUtils::readFieldName(in);
|
JSONUtils::readFieldName(in);
|
||||||
return JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, false);
|
auto names_and_types = JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, false);
|
||||||
|
if (!format_settings.json_object_each_row.column_for_object_name.empty())
|
||||||
|
names_and_types.emplace_front(format_settings.json_object_each_row.column_for_object_name, std::make_shared<DataTypeString>());
|
||||||
|
return names_and_types;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JSONObjectEachRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
|
void JSONObjectEachRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
|
||||||
|
@ -27,8 +27,10 @@ public:
|
|||||||
private:
|
private:
|
||||||
void readPrefix() override;
|
void readPrefix() override;
|
||||||
void readSuffix() override {}
|
void readSuffix() override {}
|
||||||
void readRowStart() override;
|
void readRowStart(MutableColumns & columns) override;
|
||||||
bool checkEndOfData(bool is_first_row) override;
|
bool checkEndOfData(bool is_first_row) override;
|
||||||
|
|
||||||
|
std::optional<size_t> field_index_for_object_name;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -44,4 +46,6 @@ private:
|
|||||||
bool first_row = true;
|
bool first_row = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
std::optional<size_t> getColumnIndexForJSONObjectEachRowObjectName(const Block & header, const FormatSettings & settings);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
#include <Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.h>
|
#include <Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.h>
|
||||||
|
#include <Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h>
|
||||||
#include <Formats/JSONUtils.h>
|
#include <Formats/JSONUtils.h>
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
|
|
||||||
@ -6,10 +7,38 @@ namespace DB
|
|||||||
{
|
{
|
||||||
|
|
||||||
JSONObjectEachRowRowOutputFormat::JSONObjectEachRowRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & settings_)
|
JSONObjectEachRowRowOutputFormat::JSONObjectEachRowRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & settings_)
|
||||||
: JSONEachRowRowOutputFormat(out_, header_, params_, settings_)
|
: JSONEachRowRowOutputFormat(out_, header_, params_, settings_), field_index_for_object_name(getColumnIndexForJSONObjectEachRowObjectName(header_, settings_))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void JSONObjectEachRowRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row)
|
||||||
|
{
|
||||||
|
if (field_number == field_index_for_object_name)
|
||||||
|
{
|
||||||
|
++field_number;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
JSONEachRowRowOutputFormat::writeField(column, serialization, row);
|
||||||
|
}
|
||||||
|
|
||||||
|
void JSONObjectEachRowRowOutputFormat::write(const Columns & columns, size_t row)
|
||||||
|
{
|
||||||
|
if (field_index_for_object_name)
|
||||||
|
object_name = columns[*field_index_for_object_name]->getDataAt(row).toString();
|
||||||
|
else
|
||||||
|
object_name = "row_" + std::to_string(row + 1);
|
||||||
|
|
||||||
|
IRowOutputFormat::write(columns, row);
|
||||||
|
}
|
||||||
|
|
||||||
|
void JSONObjectEachRowRowOutputFormat::writeFieldDelimiter()
|
||||||
|
{
|
||||||
|
/// We should not write comma before column that is used for
|
||||||
|
/// object name and also after it if it's in the first place
|
||||||
|
if (field_number != field_index_for_object_name && !(field_index_for_object_name == 0 && field_number == 1))
|
||||||
|
JSONEachRowRowOutputFormat::writeFieldDelimiter();
|
||||||
|
}
|
||||||
|
|
||||||
void JSONObjectEachRowRowOutputFormat::writePrefix()
|
void JSONObjectEachRowRowOutputFormat::writePrefix()
|
||||||
{
|
{
|
||||||
JSONUtils::writeObjectStart(*ostr);
|
JSONUtils::writeObjectStart(*ostr);
|
||||||
@ -17,9 +46,7 @@ void JSONObjectEachRowRowOutputFormat::writePrefix()
|
|||||||
|
|
||||||
void JSONObjectEachRowRowOutputFormat::writeRowStartDelimiter()
|
void JSONObjectEachRowRowOutputFormat::writeRowStartDelimiter()
|
||||||
{
|
{
|
||||||
++row_num;
|
JSONUtils::writeCompactObjectStart(*ostr, 1, object_name.c_str());
|
||||||
String title = "row_" + std::to_string(row_num);
|
|
||||||
JSONUtils::writeCompactObjectStart(*ostr, 1, title.c_str());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void JSONObjectEachRowRowOutputFormat::writeRowEndDelimiter()
|
void JSONObjectEachRowRowOutputFormat::writeRowEndDelimiter()
|
||||||
|
@ -29,6 +29,9 @@ public:
|
|||||||
String getName() const override { return "JSONObjectEachRowRowOutputFormat"; }
|
String getName() const override { return "JSONObjectEachRowRowOutputFormat"; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void write(const Columns & columns, size_t row) override;
|
||||||
|
void writeField(const IColumn & column, const ISerialization & serialization, size_t row) override;
|
||||||
|
void writeFieldDelimiter() override;
|
||||||
void writeRowStartDelimiter() override;
|
void writeRowStartDelimiter() override;
|
||||||
void writeRowEndDelimiter() override;
|
void writeRowEndDelimiter() override;
|
||||||
void writeRowBetweenDelimiter() override;
|
void writeRowBetweenDelimiter() override;
|
||||||
@ -36,7 +39,8 @@ private:
|
|||||||
void writePrefix() override;
|
void writePrefix() override;
|
||||||
void writeSuffix() override;
|
void writeSuffix() override;
|
||||||
|
|
||||||
size_t row_num = 0;
|
std::optional<size_t> field_index_for_object_name;
|
||||||
|
String object_name;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,20 @@
|
|||||||
|
{
|
||||||
|
"name_0": {"number":"0"},
|
||||||
|
"name_1": {"number":"1"},
|
||||||
|
"name_2": {"number":"2"}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
"name_0": {"number":"0","x":"1"},
|
||||||
|
"name_1": {"number":"1","x":"2"},
|
||||||
|
"name_2": {"number":"2","x":"3"}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
"name_0": {"number":"0"},
|
||||||
|
"name_1": {"number":"1"},
|
||||||
|
"name_2": {"number":"2"}
|
||||||
|
}
|
||||||
|
name String
|
||||||
|
number Nullable(Int64)
|
||||||
|
name_0 0
|
||||||
|
name_1 1
|
||||||
|
name_2 2
|
@ -0,0 +1,11 @@
|
|||||||
|
-- Tags: no-fasttest, no-parallel
|
||||||
|
set format_json_object_each_row_column_for_object_name='name';
|
||||||
|
|
||||||
|
select number, concat('name_', toString(number)) as name from numbers(3) format JSONObjectEachRow;
|
||||||
|
select number, concat('name_', toString(number)) as name, number + 1 as x from numbers(3) format JSONObjectEachRow;
|
||||||
|
select concat('name_', toString(number)) as name, number from numbers(3) format JSONObjectEachRow;
|
||||||
|
|
||||||
|
insert into function file(02454_data.jsonobjecteachrow) select number, concat('name_', toString(number)) as name from numbers(3) settings engine_file_truncate_on_insert=1;
|
||||||
|
desc file(02454_data.jsonobjecteachrow);
|
||||||
|
select * from file(02454_data.jsonobjecteachrow);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user