mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Merge pull request #34306 from ClickHouse/line-as-string-low-performance
Fix terribly low performance of `LineAsString` format
This commit is contained in:
commit
074b827cf3
@ -216,6 +216,15 @@ void readStringUntilWhitespaceInto(Vector & s, ReadBuffer & buf)
|
||||
readStringUntilCharsInto<' '>(s, buf);
|
||||
}
|
||||
|
||||
template <typename Vector>
|
||||
void readStringUntilNewlineInto(Vector & s, ReadBuffer & buf)
|
||||
{
|
||||
readStringUntilCharsInto<'\n'>(s, buf);
|
||||
}
|
||||
|
||||
template void readStringUntilNewlineInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
|
||||
template void readStringUntilNewlineInto<String>(String & s, ReadBuffer & buf);
|
||||
|
||||
template <typename Vector>
|
||||
void readNullTerminated(Vector & s, ReadBuffer & buf)
|
||||
{
|
||||
|
@ -604,6 +604,9 @@ bool tryReadJSONStringInto(Vector & s, ReadBuffer & buf)
|
||||
template <typename Vector>
|
||||
void readStringUntilWhitespaceInto(Vector & s, ReadBuffer & buf);
|
||||
|
||||
template <typename Vector>
|
||||
void readStringUntilNewlineInto(Vector & s, ReadBuffer & buf);
|
||||
|
||||
/// This could be used as template parameter for functions above, if you want to just skip data.
|
||||
struct NullOutput
|
||||
{
|
||||
@ -1387,4 +1390,3 @@ void readQuotedFieldIntoString(String & s, ReadBuffer & buf);
|
||||
void readJSONFieldIntoString(String & s, ReadBuffer & buf);
|
||||
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,8 @@
|
||||
#include <Formats/JSONEachRowUtils.h>
|
||||
#include <base/find_symbols.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -14,7 +16,8 @@ namespace ErrorCodes
|
||||
LineAsStringRowInputFormat::LineAsStringRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_) :
|
||||
IRowInputFormat(header_, in_, std::move(params_))
|
||||
{
|
||||
if (header_.columns() > 1 || header_.getDataTypes()[0]->getTypeId() != TypeIndex::String)
|
||||
if (header_.columns() != 1
|
||||
|| !typeid_cast<const ColumnString *>(header_.getByPosition(0).column.get()))
|
||||
{
|
||||
throw Exception("This input format is only suitable for tables with a single column of type String.", ErrorCodes::INCORRECT_QUERY);
|
||||
}
|
||||
@ -27,28 +30,16 @@ void LineAsStringRowInputFormat::resetParser()
|
||||
|
||||
void LineAsStringRowInputFormat::readLineObject(IColumn & column)
|
||||
{
|
||||
DB::Memory<> object;
|
||||
ColumnString & column_string = assert_cast<ColumnString &>(column);
|
||||
auto & chars = column_string.getChars();
|
||||
auto & offsets = column_string.getOffsets();
|
||||
|
||||
char * pos = in->position();
|
||||
bool need_more_data = true;
|
||||
readStringUntilNewlineInto(chars, *in);
|
||||
chars.push_back(0);
|
||||
offsets.push_back(chars.size());
|
||||
|
||||
while (loadAtPosition(*in, object, pos) && need_more_data)
|
||||
{
|
||||
pos = find_first_symbols<'\n'>(pos, in->buffer().end());
|
||||
if (pos == in->buffer().end())
|
||||
continue;
|
||||
|
||||
if (*pos == '\n')
|
||||
need_more_data = false;
|
||||
|
||||
++pos;
|
||||
}
|
||||
|
||||
saveUpToPosition(*in, object, pos);
|
||||
loadAtPosition(*in, object, pos);
|
||||
|
||||
/// Last character is always \n.
|
||||
column.insertData(object.data(), object.size() - 1);
|
||||
if (!in->eof())
|
||||
in->ignore(); /// Skip '\n'
|
||||
}
|
||||
|
||||
bool LineAsStringRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &)
|
||||
@ -57,17 +48,16 @@ bool LineAsStringRowInputFormat::readRow(MutableColumns & columns, RowReadExtens
|
||||
return false;
|
||||
|
||||
readLineObject(*columns[0]);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void registerInputFormatLineAsString(FormatFactory & factory)
|
||||
{
|
||||
factory.registerInputFormat("LineAsString", [](
|
||||
ReadBuffer & buf,
|
||||
const Block & sample,
|
||||
const RowInputFormatParams & params,
|
||||
const FormatSettings &)
|
||||
ReadBuffer & buf,
|
||||
const Block & sample,
|
||||
const RowInputFormatParams & params,
|
||||
const FormatSettings &)
|
||||
{
|
||||
return std::make_shared<LineAsStringRowInputFormat>(sample, buf, params);
|
||||
});
|
||||
@ -76,9 +66,10 @@ void registerInputFormatLineAsString(FormatFactory & factory)
|
||||
void registerLineAsStringSchemaReader(FormatFactory & factory)
|
||||
{
|
||||
factory.registerExternalSchemaReader("LineAsString", [](
|
||||
const FormatSettings &)
|
||||
const FormatSettings &)
|
||||
{
|
||||
return std::make_shared<LinaAsStringSchemaReader>();
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -61,4 +61,3 @@ void registerRawBLOBSchemaReader(FormatFactory & factory)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user