Replace ParsingStage with readPrefix() and readSuffix()

This commit is contained in:
Alexander Tokmakov 2020-02-07 12:58:29 +03:00
parent e1e69771a0
commit 5956f7400f
3 changed files with 52 additions and 36 deletions

View File

@ -77,6 +77,8 @@ protected:
const BlockMissingValues & getMissingValues() const override { return block_missing_values; }
size_t getTotalRows() const { return total_rows; }
private:
Params params;

View File

@ -216,43 +216,35 @@ void JSONEachRowRowInputFormat::readNestedData(const String & name, MutableColum
bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext)
{
/// Set flag data_in_square_brackets if data starts with '['.
if (parsing_stage == ParsingStage::START)
{
parsing_stage = ParsingStage::PROCESS;
skipWhitespaceIfAny(in);
if (checkChar('[', in))
data_in_square_brackets = true;
}
if (!allow_new_rows)
return false;
skipWhitespaceIfAny(in);
/// We consume ;, or \n before scanning a new row, instead scanning to next row at the end.
/// We consume ;, or \n or before scanning a new row, instead scanning to next row at the end.
/// The reason is that if we want an exact number of rows read with LIMIT x
/// from a streaming table engine with text data format, like File or Kafka
/// then seeking to next ;, or \n would trigger reading of an extra row at the end.
/// Semicolon is added for convenience as it could be used at end of INSERT query.
if (!in.eof() && (*in.position() == ',' || *in.position() == ';'))
++in.position();
if (getTotalRows() && !in.eof())
{
if (*in.position() == ',')
++in.position();
else if (!data_in_square_brackets && *in.position() == ';')
{
/// ';' means the end of query (but it cannot be before ']')
return allow_new_rows = false;
}
else if (data_in_square_brackets && *in.position() == ']')
{
/// ']' means the end of query
return allow_new_rows = false;
}
}
/// Finish reading rows if data is in square brackets and ']' received.
skipWhitespaceIfAny(in);
if (data_in_square_brackets && checkChar(']', in))
{
data_in_square_brackets = false;
parsing_stage = ParsingStage::FINISH;
if (in.eof())
return false;
}
if (in.eof() || parsing_stage == ParsingStage::FINISH)
{
if (data_in_square_brackets)
throw Exception("Unexpected end of data: received end of stream instead of ']'.", ErrorCodes::INCORRECT_DATA);
return false;
}
size_t num_columns = columns.size();
@ -288,6 +280,33 @@ void JSONEachRowRowInputFormat::resetParser()
prev_positions.clear();
}
void JSONEachRowRowInputFormat::readPrefix()
{
skipWhitespaceIfAny(in);
if (!in.eof() && *in.position() == '[')
{
++in.position();
data_in_square_brackets = true;
}
}
void JSONEachRowRowInputFormat::readSuffix()
{
skipWhitespaceIfAny(in);
if (data_in_square_brackets)
{
assertChar(']', in);
skipWhitespaceIfAny(in);
}
if (!in.eof() && *in.position() == ';')
{
++in.position();
skipWhitespaceIfAny(in);
}
if (!in.eof())
assertEOF(in);
}
void registerInputFormatProcessorJSONEachRow(FormatFactory & factory)
{

View File

@ -24,6 +24,9 @@ public:
String getName() const override { return "JSONEachRowRowInputFormat"; }
void readPrefix() override;
void readSuffix() override;
bool readRow(MutableColumns & columns, RowReadExtension & ext) override;
bool allowSyncAfterError() const override { return true; }
void syncAfterError() override;
@ -71,15 +74,7 @@ private:
/// This flag is needed to know if data is in square brackets.
bool data_in_square_brackets = false;
/// This is needed to know the stage of parsing.
enum class ParsingStage
{
START,
PROCESS,
FINISH
};
ParsingStage parsing_stage = ParsingStage::START;
bool allow_new_rows = true;
};
}