mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 01:22:04 +00:00
Replace ParsingStage with readPrefix() and readSuffix()
This commit is contained in:
parent
e1e69771a0
commit
5956f7400f
@ -77,6 +77,8 @@ protected:
|
||||
|
||||
const BlockMissingValues & getMissingValues() const override { return block_missing_values; }
|
||||
|
||||
size_t getTotalRows() const { return total_rows; }
|
||||
|
||||
private:
|
||||
Params params;
|
||||
|
||||
|
@ -216,43 +216,35 @@ void JSONEachRowRowInputFormat::readNestedData(const String & name, MutableColum
|
||||
|
||||
bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext)
|
||||
{
|
||||
/// Set flag data_in_square_brackets if data starts with '['.
|
||||
if (parsing_stage == ParsingStage::START)
|
||||
{
|
||||
parsing_stage = ParsingStage::PROCESS;
|
||||
|
||||
skipWhitespaceIfAny(in);
|
||||
if (checkChar('[', in))
|
||||
data_in_square_brackets = true;
|
||||
}
|
||||
|
||||
if (!allow_new_rows)
|
||||
return false;
|
||||
skipWhitespaceIfAny(in);
|
||||
|
||||
/// We consume ;, or \n before scanning a new row, instead scanning to next row at the end.
|
||||
/// We consume ;, or \n or before scanning a new row, instead scanning to next row at the end.
|
||||
/// The reason is that if we want an exact number of rows read with LIMIT x
|
||||
/// from a streaming table engine with text data format, like File or Kafka
|
||||
/// then seeking to next ;, or \n would trigger reading of an extra row at the end.
|
||||
|
||||
/// Semicolon is added for convenience as it could be used at end of INSERT query.
|
||||
if (!in.eof() && (*in.position() == ',' || *in.position() == ';'))
|
||||
++in.position();
|
||||
if (getTotalRows() && !in.eof())
|
||||
{
|
||||
if (*in.position() == ',')
|
||||
++in.position();
|
||||
else if (!data_in_square_brackets && *in.position() == ';')
|
||||
{
|
||||
/// ';' means the end of query (but it cannot be before ']')
|
||||
return allow_new_rows = false;
|
||||
}
|
||||
else if (data_in_square_brackets && *in.position() == ']')
|
||||
{
|
||||
/// ']' means the end of query
|
||||
return allow_new_rows = false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Finish reading rows if data is in square brackets and ']' received.
|
||||
skipWhitespaceIfAny(in);
|
||||
|
||||
if (data_in_square_brackets && checkChar(']', in))
|
||||
{
|
||||
data_in_square_brackets = false;
|
||||
parsing_stage = ParsingStage::FINISH;
|
||||
if (in.eof())
|
||||
return false;
|
||||
}
|
||||
|
||||
if (in.eof() || parsing_stage == ParsingStage::FINISH)
|
||||
{
|
||||
if (data_in_square_brackets)
|
||||
throw Exception("Unexpected end of data: received end of stream instead of ']'.", ErrorCodes::INCORRECT_DATA);
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t num_columns = columns.size();
|
||||
|
||||
@ -288,6 +280,33 @@ void JSONEachRowRowInputFormat::resetParser()
|
||||
prev_positions.clear();
|
||||
}
|
||||
|
||||
void JSONEachRowRowInputFormat::readPrefix()
|
||||
{
|
||||
skipWhitespaceIfAny(in);
|
||||
if (!in.eof() && *in.position() == '[')
|
||||
{
|
||||
++in.position();
|
||||
data_in_square_brackets = true;
|
||||
}
|
||||
}
|
||||
|
||||
void JSONEachRowRowInputFormat::readSuffix()
|
||||
{
|
||||
skipWhitespaceIfAny(in);
|
||||
if (data_in_square_brackets)
|
||||
{
|
||||
assertChar(']', in);
|
||||
skipWhitespaceIfAny(in);
|
||||
}
|
||||
if (!in.eof() && *in.position() == ';')
|
||||
{
|
||||
++in.position();
|
||||
skipWhitespaceIfAny(in);
|
||||
}
|
||||
if (!in.eof())
|
||||
assertEOF(in);
|
||||
}
|
||||
|
||||
|
||||
void registerInputFormatProcessorJSONEachRow(FormatFactory & factory)
|
||||
{
|
||||
|
@ -24,6 +24,9 @@ public:
|
||||
|
||||
String getName() const override { return "JSONEachRowRowInputFormat"; }
|
||||
|
||||
void readPrefix() override;
|
||||
void readSuffix() override;
|
||||
|
||||
bool readRow(MutableColumns & columns, RowReadExtension & ext) override;
|
||||
bool allowSyncAfterError() const override { return true; }
|
||||
void syncAfterError() override;
|
||||
@ -71,15 +74,7 @@ private:
|
||||
/// This flag is needed to know if data is in square brackets.
|
||||
bool data_in_square_brackets = false;
|
||||
|
||||
/// This is needed to know the stage of parsing.
|
||||
enum class ParsingStage
|
||||
{
|
||||
START,
|
||||
PROCESS,
|
||||
FINISH
|
||||
};
|
||||
|
||||
ParsingStage parsing_stage = ParsingStage::START;
|
||||
bool allow_new_rows = true;
|
||||
};
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user