mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-03 13:02:00 +00:00
Diagnostic info for Template
This commit is contained in:
parent
a931e16c6c
commit
4e97fd697a
@ -109,6 +109,25 @@ size_t ParsedTemplateFormat::columnsCount() const
|
||||
return format_idx_to_column_idx.size();
|
||||
}
|
||||
|
||||
String ParsedTemplateFormat::formatToString(ParsedTemplateFormat::ColumnFormat format)
|
||||
{
|
||||
switch (format)
|
||||
{
|
||||
case ColumnFormat::Default:
|
||||
return "Escaped (Default)";
|
||||
case ColumnFormat::Escaped:
|
||||
return "Escaped";
|
||||
case ColumnFormat::Quoted:
|
||||
return "Quoted";
|
||||
case ColumnFormat::Json:
|
||||
return "Json";
|
||||
case ColumnFormat::Xml:
|
||||
return "Xml";
|
||||
case ColumnFormat::Raw:
|
||||
return "Raw";
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
|
||||
TemplateBlockOutputStream::TemplateBlockOutputStream(WriteBuffer & ostr_, const Block & sample, const FormatSettings & settings_)
|
||||
|
@ -30,6 +30,7 @@ struct ParsedTemplateFormat
|
||||
ParsedTemplateFormat() = default;
|
||||
ParsedTemplateFormat(const String & format_string, const ColumnIdxGetter & idxByName);
|
||||
static ColumnFormat stringToFormat(const String & format);
|
||||
static String formatToString(ColumnFormat format);
|
||||
size_t columnsCount() const;
|
||||
};
|
||||
|
||||
|
@ -1,6 +1,8 @@
|
||||
#include <Formats/TemplateRowInputStream.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Formats/BlockInputStreamFromRowInputStream.h>
|
||||
#include <Formats/verbosePrintString.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -13,7 +15,7 @@ extern const int INVALID_TEMPLATE_FORMAT;
|
||||
|
||||
TemplateRowInputStream::TemplateRowInputStream(ReadBuffer & istr_, const Block & header_, const FormatSettings & settings_,
|
||||
bool ignore_spaces_)
|
||||
: buf(istr_), header(header_), types(header.getDataTypes()), settings(settings_), ignore_spaces(ignore_spaces_)
|
||||
: RowInputStreamWithDiagnosticInfo(buf, header_), buf(istr_), settings(settings_), ignore_spaces(ignore_spaces_)
|
||||
{
|
||||
static const String default_format("${data}");
|
||||
const String & format_str = settings.template_settings.format.empty() ? default_format : settings.template_settings.format;
|
||||
@ -60,10 +62,10 @@ bool TemplateRowInputStream::read(MutableColumns & columns, RowReadExtension & e
|
||||
if (checkForSuffix())
|
||||
return false;
|
||||
|
||||
if (row_count)
|
||||
{
|
||||
updateDiagnosticInfo();
|
||||
|
||||
if (likely(row_num != 1))
|
||||
assertString(settings.template_settings.row_between_delimiter, buf);
|
||||
}
|
||||
|
||||
extra.read_columns.assign(columns.size(), false);
|
||||
|
||||
@ -73,7 +75,7 @@ bool TemplateRowInputStream::read(MutableColumns & columns, RowReadExtension & e
|
||||
assertString(row_format.delimiters[i], buf);
|
||||
size_t col_idx = row_format.format_idx_to_column_idx[i];
|
||||
skipSpaces();
|
||||
deserializeField(*types[col_idx], *columns[col_idx], row_format.formats[i]);
|
||||
deserializeField(*data_types[col_idx], *columns[col_idx], row_format.formats[i]);
|
||||
extra.read_columns[col_idx] = true;
|
||||
}
|
||||
|
||||
@ -84,7 +86,6 @@ bool TemplateRowInputStream::read(MutableColumns & columns, RowReadExtension & e
|
||||
if (!extra.read_columns[i])
|
||||
header.getByPosition(i).type->insertDefaultInto(*columns[i]);
|
||||
|
||||
++row_count;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -149,6 +150,87 @@ bool TemplateRowInputStream::compareSuffixPart(StringRef & suffix, BufferBase::P
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TemplateRowInputStream::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out,
|
||||
size_t max_length_of_column_name, size_t max_length_of_data_type_name)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (likely(row_num != 1))
|
||||
assertString(settings.template_settings.row_between_delimiter, buf);
|
||||
}
|
||||
catch (const DB::Exception &)
|
||||
{
|
||||
writeErrorStringForWrongDelimiter(out, "delimiter between rows", settings.template_settings.row_between_delimiter);
|
||||
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < row_format.columnsCount(); ++i)
|
||||
{
|
||||
skipSpaces();
|
||||
try
|
||||
{
|
||||
assertString(row_format.delimiters[i], buf);
|
||||
}
|
||||
catch (const DB::Exception &)
|
||||
{
|
||||
writeErrorStringForWrongDelimiter(out, "delimiter before field " + std::to_string(i), row_format.delimiters[i]);
|
||||
return false;
|
||||
}
|
||||
|
||||
skipSpaces();
|
||||
size_t col_idx = row_format.format_idx_to_column_idx[i];
|
||||
if (!deserializeFieldAndPrintDiagnosticInfo(columns, out, max_length_of_column_name, max_length_of_data_type_name, col_idx))
|
||||
{
|
||||
out << "Maybe it's not possible to deserialize field " + std::to_string(i) +
|
||||
" as " + ParsedTemplateFormat::formatToString(row_format.formats[i]);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
skipSpaces();
|
||||
try
|
||||
{
|
||||
assertString(row_format.delimiters.back(), buf);
|
||||
}
|
||||
catch (const DB::Exception &)
|
||||
{
|
||||
writeErrorStringForWrongDelimiter(out, "delimiter after last field", row_format.delimiters.back());
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void TemplateRowInputStream::writeErrorStringForWrongDelimiter(WriteBuffer & out, const String & description, const String & delim)
|
||||
{
|
||||
out << "ERROR: There is no " << description << ": expected ";
|
||||
verbosePrintString(delim.data(), delim.data() + delim.size(), out);
|
||||
out << ", got ";
|
||||
if (buf.eof())
|
||||
out << "<End of stream>";
|
||||
else
|
||||
verbosePrintString(buf.position(), std::min(buf.position() + delim.size() + 10, buf.buffer().end()), out);
|
||||
out << '\n';
|
||||
}
|
||||
|
||||
void TemplateRowInputStream::tryDeserializeFiled(MutableColumns & columns, size_t col_idx, ReadBuffer::Position & prev_pos,
|
||||
ReadBuffer::Position & curr_pos)
|
||||
{
|
||||
prev_pos = buf.position();
|
||||
auto format_iter = std::find(row_format.format_idx_to_column_idx.cbegin(), row_format.format_idx_to_column_idx.cend(), col_idx);
|
||||
if (format_iter == row_format.format_idx_to_column_idx.cend())
|
||||
throw DB::Exception("Parse error", ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
||||
size_t format_idx = format_iter - row_format.format_idx_to_column_idx.begin();
|
||||
deserializeField(*data_types[col_idx], *columns[col_idx], row_format.formats[format_idx]);
|
||||
curr_pos = buf.position();
|
||||
}
|
||||
|
||||
bool TemplateRowInputStream::isGarbageAfterField(size_t, ReadBuffer::Position)
|
||||
{
|
||||
/// Garbage will be considered as wrong delimiter
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void registerInputFormatTemplate(FormatFactory & factory)
|
||||
{
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Block.h>
|
||||
#include <Formats/IRowInputStream.h>
|
||||
#include <Formats/RowInputStreamWithDiagnosticInfo.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Formats/TemplateBlockOutputStream.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
@ -11,7 +11,7 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class TemplateRowInputStream : public IRowInputStream
|
||||
class TemplateRowInputStream : public RowInputStreamWithDiagnosticInfo
|
||||
{
|
||||
using ColumnFormat = ParsedTemplateFormat::ColumnFormat;
|
||||
public:
|
||||
@ -24,7 +24,6 @@ public:
|
||||
// TODO
|
||||
//bool allowSyncAfterError() const override;
|
||||
//void syncAfterError() override;
|
||||
//String getDiagnosticInfo() override;
|
||||
|
||||
private:
|
||||
void deserializeField(const IDataType & type, IColumn & column, ColumnFormat col_format);
|
||||
@ -32,17 +31,21 @@ private:
|
||||
bool checkForSuffix();
|
||||
bool compareSuffixPart(StringRef & suffix, BufferBase::Position pos, size_t available);
|
||||
|
||||
bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out,
|
||||
size_t max_length_of_column_name, size_t max_length_of_data_type_name) override;
|
||||
void tryDeserializeFiled(MutableColumns & columns, size_t col_idx,
|
||||
ReadBuffer::Position & prev_pos, ReadBuffer::Position & curr_pos) override;
|
||||
bool isGarbageAfterField(size_t after_col_idx, ReadBuffer::Position pos) override;
|
||||
void writeErrorStringForWrongDelimiter(WriteBuffer & out, const String & description, const String & delim);
|
||||
|
||||
|
||||
private:
|
||||
PeekableReadBuffer buf;
|
||||
Block header;
|
||||
DataTypes types;
|
||||
|
||||
FormatSettings settings;
|
||||
ParsedTemplateFormat format;
|
||||
ParsedTemplateFormat row_format;
|
||||
const bool ignore_spaces;
|
||||
|
||||
size_t row_count = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user