2020-06-28 08:41:56 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <Core/Block.h>
|
|
|
|
#include <Formats/FormatSettings.h>
|
|
|
|
#include <IO/ReadBufferFromString.h>
|
|
|
|
#include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h>
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
/** A stream to input data in tsv format, but without escaping individual values.
|
2020-07-01 03:21:53 +00:00
|
|
|
* It only supports columns without '\n' or '\t'
|
2020-06-28 08:41:56 +00:00
|
|
|
*/
|
|
|
|
class TabSeparatedRawRowInputFormat : public TabSeparatedRowInputFormat
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
/** with_names - the first line is the header with the names of the columns
|
|
|
|
* with_types - on the next line header with type names
|
|
|
|
*/
|
|
|
|
TabSeparatedRawRowInputFormat(
|
|
|
|
const Block & header_,
|
|
|
|
ReadBuffer & in_,
|
|
|
|
const Params & params_,
|
|
|
|
bool with_names_,
|
|
|
|
bool with_types_,
|
|
|
|
const FormatSettings & format_settings_)
|
|
|
|
: TabSeparatedRowInputFormat(header_, in_, params_, with_names_, with_types_, format_settings_)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
String getName() const override { return "TabSeparatedRawRowInputFormat"; }
|
|
|
|
|
|
|
|
bool readField(IColumn & column, const DataTypePtr & type, bool) override
|
|
|
|
{
|
2020-07-07 09:28:31 +00:00
|
|
|
String tmp;
|
2020-06-28 08:41:56 +00:00
|
|
|
|
2020-07-07 09:28:31 +00:00
|
|
|
while (!in.eof())
|
|
|
|
{
|
|
|
|
char * pos = find_first_symbols<'\n', '\t'>(in.position(), in.buffer().end());
|
|
|
|
|
|
|
|
tmp.append(in.position(), pos - in.position());
|
|
|
|
in.position() = pos;
|
|
|
|
|
|
|
|
if (pos == in.buffer().end())
|
|
|
|
in.next();
|
|
|
|
else
|
|
|
|
break;
|
|
|
|
}
|
2020-06-28 08:41:56 +00:00
|
|
|
|
2020-07-07 09:28:31 +00:00
|
|
|
ReadBufferFromString cell(tmp);
|
|
|
|
|
|
|
|
type->deserializeAsWholeText(column, cell, format_settings);
|
2020-06-28 08:41:56 +00:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|