mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
Add TabSeparatedRawRowInputFormat
This commit is contained in:
parent
1b1c32fe89
commit
0a8a29272b
58
src/Processors/Formats/Impl/TabSeparatedRawRowInputFormat.h
Normal file
58
src/Processors/Formats/Impl/TabSeparatedRawRowInputFormat.h
Normal file
@ -0,0 +1,58 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Block.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** A stream to input data in tsv format, but without escaping individual values.
|
||||
* It only supports one string column
|
||||
*/
|
||||
class TabSeparatedRawRowInputFormat : public TabSeparatedRowInputFormat
|
||||
{
|
||||
public:
|
||||
/** with_names - the first line is the header with the names of the columns
|
||||
* with_types - on the next line header with type names
|
||||
*/
|
||||
TabSeparatedRawRowInputFormat(
|
||||
const Block & header_,
|
||||
ReadBuffer & in_,
|
||||
const Params & params_,
|
||||
bool with_names_,
|
||||
bool with_types_,
|
||||
const FormatSettings & format_settings_)
|
||||
: TabSeparatedRowInputFormat(header_, in_, params_, with_names_, with_types_, format_settings_)
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return "TabSeparatedRawRowInputFormat"; }
|
||||
|
||||
bool readField(IColumn & column, const DataTypePtr & type, bool) override
|
||||
{
|
||||
// TODO: possible to optimize
|
||||
std::string buf;
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
char c = *in.position();
|
||||
|
||||
if (c == '\n' || c == '\t')
|
||||
break;
|
||||
|
||||
in.ignore();
|
||||
buf.push_back(c);
|
||||
}
|
||||
|
||||
ReadBufferFromString line_in(buf);
|
||||
|
||||
type->deserializeAsWholeText(column, line_in, format_settings);
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -3,6 +3,7 @@
|
||||
#include <IO/Operators.h>
|
||||
|
||||
#include <Processors/Formats/Impl/TabSeparatedRowInputFormat.h>
|
||||
#include <Processors/Formats/Impl/TabSeparatedRawRowInputFormat.h>
|
||||
#include <Formats/verbosePrintString.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <DataTypes/DataTypeNothing.h>
|
||||
@ -360,6 +361,18 @@ void registerInputFormatProcessorTabSeparated(FormatFactory & factory)
|
||||
});
|
||||
}
|
||||
|
||||
for (const auto * name : {"TabSeparatedRaw", "TSVRaw"})
|
||||
{
|
||||
factory.registerInputFormatProcessor(name, [](
|
||||
ReadBuffer & buf,
|
||||
const Block & sample,
|
||||
IRowInputFormat::Params params,
|
||||
const FormatSettings & settings)
|
||||
{
|
||||
return std::make_shared<TabSeparatedRawRowInputFormat>(sample, buf, params, false, false, settings);
|
||||
});
|
||||
}
|
||||
|
||||
for (const auto * name : {"TabSeparatedWithNames", "TSVWithNames"})
|
||||
{
|
||||
factory.registerInputFormatProcessor(name, [](
|
||||
|
@ -28,10 +28,14 @@ public:
|
||||
|
||||
void resetParser() override;
|
||||
|
||||
private:
|
||||
protected:
|
||||
bool with_names;
|
||||
bool with_types;
|
||||
const FormatSettings format_settings;
|
||||
|
||||
virtual bool readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column);
|
||||
|
||||
private:
|
||||
DataTypes data_types;
|
||||
|
||||
using IndexesMap = std::unordered_map<String, size_t>;
|
||||
@ -43,8 +47,6 @@ private:
|
||||
std::vector<UInt8> read_columns;
|
||||
std::vector<size_t> columns_to_fill_with_default_values;
|
||||
|
||||
bool readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column);
|
||||
|
||||
void addInputColumn(const String & column_name);
|
||||
void setupAllColumnsByTableSchema();
|
||||
void fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension & row_read_extension);
|
||||
|
Loading…
Reference in New Issue
Block a user