From 4e97fd697aa349bf36c0693dd61a8b5627e775c0 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 15 Apr 2019 05:45:57 +0300 Subject: [PATCH] Diagnostic info for Template --- .../src/Formats/TemplateBlockOutputStream.cpp | 19 ++++ dbms/src/Formats/TemplateBlockOutputStream.h | 1 + dbms/src/Formats/TemplateRowInputStream.cpp | 94 +++++++++++++++++-- dbms/src/Formats/TemplateRowInputStream.h | 17 ++-- 4 files changed, 118 insertions(+), 13 deletions(-) diff --git a/dbms/src/Formats/TemplateBlockOutputStream.cpp b/dbms/src/Formats/TemplateBlockOutputStream.cpp index c43b880e349..8018ba7bfdd 100644 --- a/dbms/src/Formats/TemplateBlockOutputStream.cpp +++ b/dbms/src/Formats/TemplateBlockOutputStream.cpp @@ -109,6 +109,25 @@ size_t ParsedTemplateFormat::columnsCount() const return format_idx_to_column_idx.size(); } +String ParsedTemplateFormat::formatToString(ParsedTemplateFormat::ColumnFormat format) +{ + switch (format) + { + case ColumnFormat::Default: + return "Escaped (Default)"; + case ColumnFormat::Escaped: + return "Escaped"; + case ColumnFormat::Quoted: + return "Quoted"; + case ColumnFormat::Json: + return "Json"; + case ColumnFormat::Xml: + return "Xml"; + case ColumnFormat::Raw: + return "Raw"; + } + __builtin_unreachable(); +} TemplateBlockOutputStream::TemplateBlockOutputStream(WriteBuffer & ostr_, const Block & sample, const FormatSettings & settings_) diff --git a/dbms/src/Formats/TemplateBlockOutputStream.h b/dbms/src/Formats/TemplateBlockOutputStream.h index 29b79979ff1..42e9ea7820b 100644 --- a/dbms/src/Formats/TemplateBlockOutputStream.h +++ b/dbms/src/Formats/TemplateBlockOutputStream.h @@ -30,6 +30,7 @@ struct ParsedTemplateFormat ParsedTemplateFormat() = default; ParsedTemplateFormat(const String & format_string, const ColumnIdxGetter & idxByName); static ColumnFormat stringToFormat(const String & format); + static String formatToString(ColumnFormat format); size_t columnsCount() const; }; diff --git a/dbms/src/Formats/TemplateRowInputStream.cpp b/dbms/src/Formats/TemplateRowInputStream.cpp index a85a79620b0..9be1eb5a993 100644 --- a/dbms/src/Formats/TemplateRowInputStream.cpp +++ b/dbms/src/Formats/TemplateRowInputStream.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include namespace DB { @@ -13,7 +15,7 @@ extern const int INVALID_TEMPLATE_FORMAT; TemplateRowInputStream::TemplateRowInputStream(ReadBuffer & istr_, const Block & header_, const FormatSettings & settings_, bool ignore_spaces_) - : buf(istr_), header(header_), types(header.getDataTypes()), settings(settings_), ignore_spaces(ignore_spaces_) + : RowInputStreamWithDiagnosticInfo(buf, header_), buf(istr_), settings(settings_), ignore_spaces(ignore_spaces_) { static const String default_format("${data}"); const String & format_str = settings.template_settings.format.empty() ? default_format : settings.template_settings.format; @@ -60,10 +62,10 @@ bool TemplateRowInputStream::read(MutableColumns & columns, RowReadExtension & e if (checkForSuffix()) return false; - if (row_count) - { + updateDiagnosticInfo(); + + if (likely(row_num != 1)) assertString(settings.template_settings.row_between_delimiter, buf); - } extra.read_columns.assign(columns.size(), false); @@ -73,7 +75,7 @@ bool TemplateRowInputStream::read(MutableColumns & columns, RowReadExtension & e assertString(row_format.delimiters[i], buf); size_t col_idx = row_format.format_idx_to_column_idx[i]; skipSpaces(); - deserializeField(*types[col_idx], *columns[col_idx], row_format.formats[i]); + deserializeField(*data_types[col_idx], *columns[col_idx], row_format.formats[i]); extra.read_columns[col_idx] = true; } @@ -84,7 +86,6 @@ bool TemplateRowInputStream::read(MutableColumns & columns, RowReadExtension & e if (!extra.read_columns[i]) header.getByPosition(i).type->insertDefaultInto(*columns[i]); - ++row_count; return true; } @@ -149,6 +150,87 @@ bool TemplateRowInputStream::compareSuffixPart(StringRef & suffix, BufferBase::P return true; } +bool TemplateRowInputStream::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out, + size_t max_length_of_column_name, size_t max_length_of_data_type_name) +{ + try + { + if (likely(row_num != 1)) + assertString(settings.template_settings.row_between_delimiter, buf); + } + catch (const DB::Exception &) + { + writeErrorStringForWrongDelimiter(out, "delimiter between rows", settings.template_settings.row_between_delimiter); + + return false; + } + for (size_t i = 0; i < row_format.columnsCount(); ++i) + { + skipSpaces(); + try + { + assertString(row_format.delimiters[i], buf); + } + catch (const DB::Exception &) + { + writeErrorStringForWrongDelimiter(out, "delimiter before field " + std::to_string(i), row_format.delimiters[i]); + return false; + } + + skipSpaces(); + size_t col_idx = row_format.format_idx_to_column_idx[i]; + if (!deserializeFieldAndPrintDiagnosticInfo(columns, out, max_length_of_column_name, max_length_of_data_type_name, col_idx)) + { + out << "Maybe it's not possible to deserialize field " + std::to_string(i) + + " as " + ParsedTemplateFormat::formatToString(row_format.formats[i]); + return false; + } + } + + skipSpaces(); + try + { + assertString(row_format.delimiters.back(), buf); + } + catch (const DB::Exception &) + { + writeErrorStringForWrongDelimiter(out, "delimiter after last field", row_format.delimiters.back()); + return false; + } + + return true; +} + +void TemplateRowInputStream::writeErrorStringForWrongDelimiter(WriteBuffer & out, const String & description, const String & delim) +{ + out << "ERROR: There is no " << description << ": expected "; + verbosePrintString(delim.data(), delim.data() + delim.size(), out); + out << ", got "; + if (buf.eof()) + out << ""; + else + verbosePrintString(buf.position(), std::min(buf.position() + delim.size() + 10, buf.buffer().end()), out); + out << '\n'; +} + +void TemplateRowInputStream::tryDeserializeFiled(MutableColumns & columns, size_t col_idx, ReadBuffer::Position & prev_pos, + ReadBuffer::Position & curr_pos) +{ + prev_pos = buf.position(); + auto format_iter = std::find(row_format.format_idx_to_column_idx.cbegin(), row_format.format_idx_to_column_idx.cend(), col_idx); + if (format_iter == row_format.format_idx_to_column_idx.cend()) + throw DB::Exception("Parse error", ErrorCodes::INVALID_TEMPLATE_FORMAT); + size_t format_idx = format_iter - row_format.format_idx_to_column_idx.begin(); + deserializeField(*data_types[col_idx], *columns[col_idx], row_format.formats[format_idx]); + curr_pos = buf.position(); +} + +bool TemplateRowInputStream::isGarbageAfterField(size_t, ReadBuffer::Position) +{ + /// Garbage will be considered as wrong delimiter + return false; +} + void registerInputFormatTemplate(FormatFactory & factory) { diff --git a/dbms/src/Formats/TemplateRowInputStream.h b/dbms/src/Formats/TemplateRowInputStream.h index e0cbb45fdb9..984ded1082e 100644 --- a/dbms/src/Formats/TemplateRowInputStream.h +++ b/dbms/src/Formats/TemplateRowInputStream.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include @@ -11,7 +11,7 @@ namespace DB { -class TemplateRowInputStream : public IRowInputStream +class TemplateRowInputStream : public RowInputStreamWithDiagnosticInfo { using ColumnFormat = ParsedTemplateFormat::ColumnFormat; public: @@ -24,7 +24,6 @@ public: // TODO //bool allowSyncAfterError() const override; //void syncAfterError() override; - //String getDiagnosticInfo() override; private: void deserializeField(const IDataType & type, IColumn & column, ColumnFormat col_format); @@ -32,17 +31,21 @@ private: bool checkForSuffix(); bool compareSuffixPart(StringRef & suffix, BufferBase::Position pos, size_t available); + bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out, + size_t max_length_of_column_name, size_t max_length_of_data_type_name) override; + void tryDeserializeFiled(MutableColumns & columns, size_t col_idx, + ReadBuffer::Position & prev_pos, ReadBuffer::Position & curr_pos) override; + bool isGarbageAfterField(size_t after_col_idx, ReadBuffer::Position pos) override; + void writeErrorStringForWrongDelimiter(WriteBuffer & out, const String & description, const String & delim); + + private: PeekableReadBuffer buf; - Block header; - DataTypes types; FormatSettings settings; ParsedTemplateFormat format; ParsedTemplateFormat row_format; const bool ignore_spaces; - - size_t row_count = 0; }; }