2020-08-17 10:20:23 +00:00
|
|
|
#include <Processors/Formats/Impl/LineAsStringRowInputFormat.h>
|
|
|
|
#include <Formats/JSONEachRowUtils.h>
|
|
|
|
#include <common/find_symbols.h>
|
|
|
|
#include <IO/ReadHelpers.h>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2020-09-10 16:35:18 +00:00
|
|
|
extern const int INCORRECT_QUERY;
|
2020-08-17 10:20:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
LineAsStringRowInputFormat::LineAsStringRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_) :
|
2020-11-06 14:06:52 +00:00
|
|
|
IRowInputFormat(header_, in_, std::move(params_))
|
2020-08-17 10:20:23 +00:00
|
|
|
{
|
|
|
|
if (header_.columns() > 1 || header_.getDataTypes()[0]->getTypeId() != TypeIndex::String)
|
|
|
|
{
|
2020-09-10 16:35:18 +00:00
|
|
|
throw Exception("This input format is only suitable for tables with a single column of type String.", ErrorCodes::INCORRECT_QUERY);
|
2020-08-17 10:20:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void LineAsStringRowInputFormat::resetParser()
|
|
|
|
{
|
|
|
|
IRowInputFormat::resetParser();
|
|
|
|
}
|
|
|
|
|
|
|
|
void LineAsStringRowInputFormat::readLineObject(IColumn & column)
|
|
|
|
{
|
2020-11-06 14:06:52 +00:00
|
|
|
DB::Memory<> object;
|
2020-08-17 10:20:23 +00:00
|
|
|
|
2020-11-06 14:06:52 +00:00
|
|
|
char * pos = in.position();
|
|
|
|
bool need_more_data = true;
|
2020-08-17 10:20:23 +00:00
|
|
|
|
2020-11-06 14:06:52 +00:00
|
|
|
while (loadAtPosition(in, object, pos) && need_more_data)
|
2020-08-17 10:20:23 +00:00
|
|
|
{
|
2020-11-06 14:06:52 +00:00
|
|
|
pos = find_first_symbols<'\n'>(pos, in.buffer().end());
|
|
|
|
if (pos == in.buffer().end())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (*pos == '\n')
|
|
|
|
need_more_data = false;
|
|
|
|
|
|
|
|
++pos;
|
2020-08-17 10:20:23 +00:00
|
|
|
}
|
2020-08-19 03:50:43 +00:00
|
|
|
|
2020-11-06 14:06:52 +00:00
|
|
|
saveUpToPosition(in, object, pos);
|
|
|
|
loadAtPosition(in, object, pos);
|
|
|
|
|
2020-11-06 18:59:16 +00:00
|
|
|
/// Last character is always \n.
|
2020-11-06 14:06:52 +00:00
|
|
|
column.insertData(object.data(), object.size() - 1);
|
2020-08-17 10:20:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool LineAsStringRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &)
|
|
|
|
{
|
2020-11-06 14:06:52 +00:00
|
|
|
if (in.eof())
|
2020-09-15 10:13:39 +00:00
|
|
|
return false;
|
2020-08-17 10:20:23 +00:00
|
|
|
|
2020-09-15 10:13:39 +00:00
|
|
|
readLineObject(*columns[0]);
|
|
|
|
|
|
|
|
return true;
|
2020-08-17 10:20:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void registerInputFormatProcessorLineAsString(FormatFactory & factory)
|
|
|
|
{
|
|
|
|
factory.registerInputFormatProcessor("LineAsString", [](
|
|
|
|
ReadBuffer & buf,
|
|
|
|
const Block & sample,
|
|
|
|
const RowInputFormatParams & params,
|
|
|
|
const FormatSettings &)
|
|
|
|
{
|
|
|
|
return std::make_shared<LineAsStringRowInputFormat>(sample, buf, params);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|