dbms: better diagnostics of errors in TabSeparated format [#METR-15574].

This commit is contained in:
Alexey Milovidov 2015-03-29 12:02:24 +03:00
parent 3d80f45b71
commit be630f9fae
3 changed files with 49 additions and 57 deletions

View File

@ -23,11 +23,6 @@ public:
bool read(Row & row) override;
void readPrefix() override;
/** В случае исключения при парсинге, вы можете вызвать эту функцию.
* Она выполняет заново парсинг последних двух строк и выводит подробную информацию о том, что происходит.
*/
void printDiagnosticInfo(WriteBuffer & out);
private:
ReadBuffer & istr;
const Block sample;
@ -46,6 +41,11 @@ private:
BufferBase::Position pos_of_current_row = nullptr;
BufferBase::Position pos_of_prev_row = nullptr;
/** В случае исключения при парсинге, вызывается эта функция.
* Она выполняет заново парсинг последних двух строк и выводит подробную информацию о том, что происходит.
*/
void printDiagnosticInfo(WriteBuffer & out);
void updateDiagnosticInfo()
{
++row_num;

View File

@ -695,41 +695,19 @@ private:
BlockInputStreamPtr async_block_input = new AsynchronousBlockInputStream(block_input);
try
async_block_input->readPrefix();
while (true)
{
async_block_input->readPrefix();
Block block = async_block_input->read();
connection->sendData(block);
processed_rows += block.rows();
while (true)
{
Block block = async_block_input->read();
connection->sendData(block);
processed_rows += block.rows();
if (!block)
break;
}
async_block_input->readSuffix();
if (!block)
break;
}
catch (...) /// TODO Более точно
{
/** В частном случае - при использовании формата TabSeparated, мы можем вывести более подробную диагностику.
*/
BlockInputStreamFromRowInputStream * concrete_block_input = dynamic_cast<BlockInputStreamFromRowInputStream *>(block_input.get());
if (!concrete_block_input)
throw;
RowInputStreamPtr & row_input = concrete_block_input->getRowInput();
TabSeparatedRowInputStream * concrete_row_input = dynamic_cast<TabSeparatedRowInputStream *>(row_input.get());
if (!concrete_row_input)
throw;
WriteBufferFromFileDescriptor stderr_out(STDERR_FILENO);
concrete_row_input->printDiagnosticInfo(stderr_out);
throw;
}
async_block_input->readSuffix();
}

View File

@ -50,10 +50,10 @@ void TabSeparatedRowInputStream::readPrefix()
static void checkForCarriageReturn(ReadBuffer & istr)
{
if (istr.position()[0] == '\r' || (istr.position() != istr.buffer().begin() && istr.position()[-1] == '\r'))
throw Exception("You have carriage return (\\r, 0x0D, ASCII 13) at end of first row."
" It's like your input data have DOS/Windows style line separators, that are illegal in TabSeparated format."
throw Exception("\nYou have carriage return (\\r, 0x0D, ASCII 13) at end of first row."
"\nIt's like your input data has DOS/Windows style line separators, that are illegal in TabSeparated format."
" You must transform your file to Unix format."
" But if you really need carriage return at end of string value of last column, you need to escape it as \\r.",
"\nBut if you really need carriage return at end of string value of last column, you need to escape it as \\r.",
ErrorCodes::INCORRECT_DATA);
}
@ -65,29 +65,43 @@ bool TabSeparatedRowInputStream::read(Row & row)
size_t size = data_types.size();
row.resize(size);
for (size_t i = 0; i < size; ++i)
try
{
if (i == 0 && istr.eof())
for (size_t i = 0; i < size; ++i)
{
row.clear();
return false;
}
data_types[i]->deserializeTextEscaped(row[i], istr);
/// пропускаем разделители
if (i + 1 == size)
{
if (!istr.eof())
if (i == 0 && istr.eof())
{
if (unlikely(row_num == 1))
checkForCarriageReturn(istr);
assertString("\n", istr);
row.clear();
return false;
}
data_types[i]->deserializeTextEscaped(row[i], istr);
/// пропускаем разделители
if (i + 1 == size)
{
if (!istr.eof())
{
if (unlikely(row_num == 1))
checkForCarriageReturn(istr);
assertString("\n", istr);
}
}
else
assertString("\t", istr);
}
else
assertString("\t", istr);
}
catch (Exception & e)
{
String verbose_diagnostic;
{
WriteBufferFromString diagnostic_out(verbose_diagnostic);
printDiagnosticInfo(diagnostic_out);
}
e.addMessage("\n" + verbose_diagnostic);
throw;
}
return true;