Make better

This commit is contained in:
avogar 2022-05-20 12:07:29 +00:00
parent a6a430c5ee
commit d2304f5d15
14 changed files with 64 additions and 63 deletions

View File

@ -403,7 +403,7 @@ Both data output and parsing are supported in this format. For parsing, any orde
Parsing allows the presence of the additional field `tskv` without the equal sign or a value. This field is ignored.
For input format columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1.
During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1.
## CSV {#csv}
@ -599,7 +599,7 @@ Example:
}
```
Columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1.
During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1.
Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here)
@ -621,6 +621,7 @@ Example:
"name": "str",
"type": "String"
},
{
"name": "arr",
"type": "Array(UInt8)"

View File

@ -5,7 +5,7 @@
namespace DB
{
JSONColumnsReader::JSONColumnsReader(ReadBuffer & in_) : JSONColumnsBaseReader(in_)
JSONColumnsReader::JSONColumnsReader(ReadBuffer & in_) : JSONColumnsReaderBase(in_)
{
}
@ -51,7 +51,7 @@ void registerInputFormatJSONColumns(FormatFactory & factory)
const RowInputFormatParams &,
const FormatSettings & settings)
{
return std::make_shared<JSONColumnsBaseBlockInputFormat>(buf, sample, settings, std::make_unique<JSONColumnsReader>(buf));
return std::make_shared<JSONColumnsBlockInputFormatBase>(buf, sample, settings, std::make_unique<JSONColumnsReader>(buf));
}
);
}
@ -62,7 +62,7 @@ void registerJSONColumnsSchemaReader(FormatFactory & factory)
"JSONColumns",
[](ReadBuffer & buf, const FormatSettings & settings)
{
return std::make_shared<JSONColumnsBaseSchemaReader>(buf, settings, std::make_unique<JSONColumnsReader>(buf));
return std::make_shared<JSONColumnsSchemaReaderBase>(buf, settings, std::make_unique<JSONColumnsReader>(buf));
}
);
}

View File

@ -1,6 +1,6 @@
#pragma once
#include <Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.h>
#include <Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h>
namespace DB
{
@ -12,7 +12,7 @@ namespace DB
* ...
* }
*/
class JSONColumnsReader : public JSONColumnsBaseReader
class JSONColumnsReader : public JSONColumnsReaderBase
{
public:
JSONColumnsReader(ReadBuffer & in_);

View File

@ -1,4 +1,4 @@
#include <Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.h>
#include <Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h>
#include <Formats/JSONUtils.h>
#include <IO/ReadHelpers.h>
#include <base/find_symbols.h>
@ -13,11 +13,11 @@ namespace ErrorCodes
}
JSONColumnsBaseReader::JSONColumnsBaseReader(ReadBuffer & in_) : in(&in_)
JSONColumnsReaderBase::JSONColumnsReaderBase(ReadBuffer & in_) : in(&in_)
{
}
bool JSONColumnsBaseReader::checkColumnEnd()
bool JSONColumnsReaderBase::checkColumnEnd()
{
skipWhitespaceIfAny(*in);
if (!in->eof() && *in->position() == ']')
@ -29,7 +29,7 @@ bool JSONColumnsBaseReader::checkColumnEnd()
return false;
}
bool JSONColumnsBaseReader::checkColumnEndOrSkipFieldDelimiter()
bool JSONColumnsReaderBase::checkColumnEndOrSkipFieldDelimiter()
{
if (checkColumnEnd())
return true;
@ -39,7 +39,7 @@ bool JSONColumnsBaseReader::checkColumnEndOrSkipFieldDelimiter()
return false;
}
bool JSONColumnsBaseReader::checkChunkEndOrSkipColumnDelimiter()
bool JSONColumnsReaderBase::checkChunkEndOrSkipColumnDelimiter()
{
if (checkChunkEnd())
return true;
@ -49,7 +49,7 @@ bool JSONColumnsBaseReader::checkChunkEndOrSkipColumnDelimiter()
return false;
}
void JSONColumnsBaseReader::skipColumn()
void JSONColumnsReaderBase::skipColumn()
{
/// We assume that we already read '[', so we should skip until matched ']'.
size_t balance = 1;
@ -76,8 +76,8 @@ void JSONColumnsBaseReader::skipColumn()
}
}
JSONColumnsBaseBlockInputFormat::JSONColumnsBaseBlockInputFormat(
ReadBuffer & in_, const Block & header_, const FormatSettings & format_settings_, std::unique_ptr<JSONColumnsBaseReader> reader_)
JSONColumnsBlockInputFormatBase::JSONColumnsBlockInputFormatBase(
ReadBuffer & in_, const Block & header_, const FormatSettings & format_settings_, std::unique_ptr<JSONColumnsReaderBase> reader_)
: IInputFormat(header_, in_)
, format_settings(format_settings_)
, fields(header_.getNamesAndTypes())
@ -87,7 +87,7 @@ JSONColumnsBaseBlockInputFormat::JSONColumnsBaseBlockInputFormat(
{
}
size_t JSONColumnsBaseBlockInputFormat::readColumn(
size_t JSONColumnsBlockInputFormatBase::readColumn(
IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, const String & column_name)
{
/// Check for empty column.
@ -103,13 +103,13 @@ size_t JSONColumnsBaseBlockInputFormat::readColumn(
return column.size();
}
void JSONColumnsBaseBlockInputFormat::setReadBuffer(ReadBuffer & in_)
void JSONColumnsBlockInputFormatBase::setReadBuffer(ReadBuffer & in_)
{
reader->setReadBuffer(in_);
IInputFormat::setReadBuffer(in_);
}
Chunk JSONColumnsBaseBlockInputFormat::generate()
Chunk JSONColumnsBlockInputFormatBase::generate()
{
MutableColumns columns = getPort().getHeader().cloneEmptyColumns();
block_missing_values.clear();
@ -175,13 +175,13 @@ Chunk JSONColumnsBaseBlockInputFormat::generate()
return Chunk(std::move(columns), rows);
}
JSONColumnsBaseSchemaReader::JSONColumnsBaseSchemaReader(
ReadBuffer & in_, const FormatSettings & format_settings_, std::unique_ptr<JSONColumnsBaseReader> reader_)
JSONColumnsSchemaReaderBase::JSONColumnsSchemaReaderBase(
ReadBuffer & in_, const FormatSettings & format_settings_, std::unique_ptr<JSONColumnsReaderBase> reader_)
: ISchemaReader(in_), format_settings(format_settings_), reader(std::move(reader_))
{
}
void JSONColumnsBaseSchemaReader::chooseResulType(DataTypePtr & type, const DataTypePtr & new_type, const String & column_name, size_t row) const
void JSONColumnsSchemaReaderBase::chooseResulType(DataTypePtr & type, const DataTypePtr & new_type, const String & column_name, size_t row) const
{
auto common_type_checker = [&](const DataTypePtr & first, const DataTypePtr & second)
{
@ -190,7 +190,7 @@ void JSONColumnsBaseSchemaReader::chooseResulType(DataTypePtr & type, const Data
chooseResultColumnType(type, new_type, common_type_checker, nullptr, column_name, row);
}
NamesAndTypesList JSONColumnsBaseSchemaReader::readSchema()
NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema()
{
size_t total_rows_read = 0;
std::unordered_map<String, DataTypePtr> names_to_types;
@ -242,7 +242,7 @@ NamesAndTypesList JSONColumnsBaseSchemaReader::readSchema()
return result;
}
DataTypePtr JSONColumnsBaseSchemaReader::readColumnAndGetDataType(const String & column_name, size_t & rows_read, size_t max_rows_to_read)
DataTypePtr JSONColumnsSchemaReaderBase::readColumnAndGetDataType(const String & column_name, size_t & rows_read, size_t max_rows_to_read)
{
/// Check for empty column.
if (reader->checkColumnEnd())

View File

@ -12,12 +12,12 @@ class ReadBuffer;
/// Base class for reading data in Columnar JSON formats.
class JSONColumnsBaseReader
class JSONColumnsReaderBase
{
public:
JSONColumnsBaseReader(ReadBuffer & in_);
JSONColumnsReaderBase(ReadBuffer & in_);
virtual ~JSONColumnsBaseReader() = default;
virtual ~JSONColumnsReaderBase() = default;
void setReadBuffer(ReadBuffer & in_) { in = &in_; }
@ -38,15 +38,15 @@ protected:
/// Base class for Columnar JSON input formats. It works with data using
/// JSONColumnsBaseReader interface.
/// To implement new Columnar JSON format you need to implement new JSONColumnsBaseReader
/// interface and provide it to JSONColumnsBaseBlockInputFormat.
class JSONColumnsBaseBlockInputFormat : public IInputFormat
/// JSONColumnsReaderBase interface.
/// To implement new Columnar JSON format you need to implement new JSONColumnsReaderBase
/// interface and provide it to JSONColumnsBlockInputFormatBase.
class JSONColumnsBlockInputFormatBase : public IInputFormat
{
public:
JSONColumnsBaseBlockInputFormat(ReadBuffer & in_, const Block & header_, const FormatSettings & format_settings_, std::unique_ptr<JSONColumnsBaseReader> reader_);
JSONColumnsBlockInputFormatBase(ReadBuffer & in_, const Block & header_, const FormatSettings & format_settings_, std::unique_ptr<JSONColumnsReaderBase> reader_);
String getName() const override { return "JSONColumnsBaseBlockInputFormat"; }
String getName() const override { return "JSONColumnsBlockInputFormatBase"; }
void setReadBuffer(ReadBuffer & in_) override;
@ -62,19 +62,19 @@ protected:
/// Maps column names and their positions in header.
std::unordered_map<String, size_t> name_to_index;
Serializations serializations;
std::unique_ptr<JSONColumnsBaseReader> reader;
std::unique_ptr<JSONColumnsReaderBase> reader;
BlockMissingValues block_missing_values;
};
/// Base class for schema inference from Columnar JSON input formats. It works with data using
/// JSONColumnsBaseReader interface.
/// To implement schema reader for the new Columnar JSON format you need to implement new JSONColumnsBaseReader
/// interface and provide it to JSONColumnsBaseSchemaReader.
class JSONColumnsBaseSchemaReader : public ISchemaReader
/// JSONColumnsReaderBase interface.
/// To implement schema reader for the new Columnar JSON format you need to implement new JSONColumnsReaderBase
/// interface and provide it to JSONColumnsSchemaReaderBase.
class JSONColumnsSchemaReaderBase : public ISchemaReader
{
public:
JSONColumnsBaseSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, std::unique_ptr<JSONColumnsBaseReader> reader_);
JSONColumnsSchemaReaderBase(ReadBuffer & in_, const FormatSettings & format_settings_, std::unique_ptr<JSONColumnsReaderBase> reader_);
private:
NamesAndTypesList readSchema() override;
@ -86,7 +86,7 @@ private:
void chooseResulType(DataTypePtr & type, const DataTypePtr & new_type, const String & column_name, size_t row) const;
const FormatSettings format_settings;
std::unique_ptr<JSONColumnsBaseReader> reader;
std::unique_ptr<JSONColumnsReaderBase> reader;
};
}

View File

@ -8,7 +8,7 @@ namespace DB
{
JSONColumnsBlockOutputFormat::JSONColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, size_t indent_)
: JSONColumnsBaseBlockOutputFormat(out_, header_, format_settings_), fields(header_.getNamesAndTypes()), indent(indent_)
: JSONColumnsBlockOutputFormatBase(out_, header_, format_settings_), fields(header_.getNamesAndTypes()), indent(indent_)
{
for (auto & field : fields)
{

View File

@ -1,6 +1,6 @@
#pragma once
#include <Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.h>
#include <Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.h>
namespace DB
{
@ -12,7 +12,7 @@ namespace DB
* ...
* }
*/
class JSONColumnsBlockOutputFormat : public JSONColumnsBaseBlockOutputFormat
class JSONColumnsBlockOutputFormat : public JSONColumnsBlockOutputFormatBase
{
public:
JSONColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, size_t indent_ = 0);

View File

@ -1,4 +1,4 @@
#include <Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.h>
#include <Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.h>
#include <IO/WriteHelpers.h>
#include <Formats/JSONUtils.h>
@ -6,7 +6,7 @@
namespace DB
{
JSONColumnsBaseBlockOutputFormat::JSONColumnsBaseBlockOutputFormat(
JSONColumnsBlockOutputFormatBase::JSONColumnsBlockOutputFormatBase(
WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_)
: IOutputFormat(header_, out_)
, format_settings(format_settings_)
@ -15,7 +15,7 @@ JSONColumnsBaseBlockOutputFormat::JSONColumnsBaseBlockOutputFormat(
{
}
void JSONColumnsBaseBlockOutputFormat::consume(Chunk chunk)
void JSONColumnsBlockOutputFormatBase::consume(Chunk chunk)
{
if (!mono_chunk)
{
@ -26,14 +26,14 @@ void JSONColumnsBaseBlockOutputFormat::consume(Chunk chunk)
mono_chunk.append(chunk);
}
void JSONColumnsBaseBlockOutputFormat::writeSuffix()
void JSONColumnsBlockOutputFormatBase::writeSuffix()
{
writeChunk(mono_chunk);
mono_chunk.clear();
}
void JSONColumnsBaseBlockOutputFormat::writeChunk(Chunk & chunk)
void JSONColumnsBlockOutputFormatBase::writeChunk(Chunk & chunk)
{
writeChunkStart();
const auto & columns = chunk.getColumns();
@ -46,14 +46,14 @@ void JSONColumnsBaseBlockOutputFormat::writeChunk(Chunk & chunk)
writeChunkEnd();
}
void JSONColumnsBaseBlockOutputFormat::writeColumnEnd(bool is_last)
void JSONColumnsBlockOutputFormatBase::writeColumnEnd(bool is_last)
{
JSONUtils::writeCompactArrayEnd(*ostr);
if (!is_last)
JSONUtils::writeFieldDelimiter(*ostr);
}
void JSONColumnsBaseBlockOutputFormat::writeColumn(const IColumn & column, const ISerialization & serialization)
void JSONColumnsBlockOutputFormatBase::writeColumn(const IColumn & column, const ISerialization & serialization)
{
for (size_t i = 0; i != column.size(); ++i)
{

View File

@ -12,12 +12,12 @@ class WriteBuffer;
/// Base class for Columnar JSON output formats.
/// It buffers all data and outputs it as a single block in writeSuffix() method.
class JSONColumnsBaseBlockOutputFormat : public IOutputFormat
class JSONColumnsBlockOutputFormatBase : public IOutputFormat
{
public:
JSONColumnsBaseBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_);
JSONColumnsBlockOutputFormatBase(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_);
String getName() const override { return "JSONColumnsBaseBlockOutputFormat"; }
String getName() const override { return "JSONColumnsBlockOutputFormatBase"; }
protected:
void consume(Chunk chunk) override;
@ -32,7 +32,7 @@ protected:
void writeColumnEnd(bool is_last);
const FormatSettings format_settings;
Serializations serializations;
const Serializations serializations;
WriteBuffer * ostr;

View File

@ -34,7 +34,7 @@ void JSONColumnsWithMetadataBlockOutputFormat::writePrefix()
void JSONColumnsWithMetadataBlockOutputFormat::writeSuffix()
{
rows = mono_chunk.getNumRows();
JSONColumnsBaseBlockOutputFormat::writeSuffix();
JSONColumnsBlockOutputFormatBase::writeSuffix();
}
void JSONColumnsWithMetadataBlockOutputFormat::writeChunkStart()

View File

@ -5,7 +5,7 @@
namespace DB
{
JSONCompactColumnsReader::JSONCompactColumnsReader(ReadBuffer & in_) : JSONColumnsBaseReader(in_)
JSONCompactColumnsReader::JSONCompactColumnsReader(ReadBuffer & in_) : JSONColumnsReaderBase(in_)
{
}
@ -46,7 +46,7 @@ void registerInputFormatJSONCompactColumns(FormatFactory & factory)
const RowInputFormatParams &,
const FormatSettings & settings)
{
return std::make_shared<JSONColumnsBaseBlockInputFormat>(buf, sample, settings, std::make_unique<JSONCompactColumnsReader>(buf));
return std::make_shared<JSONColumnsBlockInputFormatBase>(buf, sample, settings, std::make_unique<JSONCompactColumnsReader>(buf));
}
);
}
@ -57,7 +57,7 @@ void registerJSONCompactColumnsSchemaReader(FormatFactory & factory)
"JSONCompactColumns",
[](ReadBuffer & buf, const FormatSettings & settings)
{
return std::make_shared<JSONColumnsBaseSchemaReader>(buf, settings, std::make_unique<JSONCompactColumnsReader>(buf));
return std::make_shared<JSONColumnsSchemaReaderBase>(buf, settings, std::make_unique<JSONCompactColumnsReader>(buf));
}
);
}

View File

@ -1,6 +1,6 @@
#pragma once
#include <Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.h>
#include <Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h>
namespace DB
{
@ -12,7 +12,7 @@ namespace DB
* ...
* ]
*/
class JSONCompactColumnsReader : public JSONColumnsBaseReader
class JSONCompactColumnsReader : public JSONColumnsReaderBase
{
public:
JSONCompactColumnsReader(ReadBuffer & in_);

View File

@ -7,7 +7,7 @@ namespace DB
{
JSONCompactColumnsBlockOutputFormat::JSONCompactColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_)
: JSONColumnsBaseBlockOutputFormat(out_, header_, format_settings_), column_names(header_.getNames())
: JSONColumnsBlockOutputFormatBase(out_, header_, format_settings_), column_names(header_.getNames())
{
}

View File

@ -1,5 +1,5 @@
#pragma once
#include <Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.h>
#include <Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.h>
namespace DB
{
@ -11,7 +11,7 @@ namespace DB
* ...
* ]
*/
class JSONCompactColumnsBlockOutputFormat : public JSONColumnsBaseBlockOutputFormat
class JSONCompactColumnsBlockOutputFormat : public JSONColumnsBlockOutputFormatBase
{
public:
JSONCompactColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_);
@ -24,7 +24,7 @@ protected:
void writeColumnStart(size_t column_index) override;
Names column_names;
const Names column_names;
};
}