mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-04 05:22:17 +00:00
Diagnostic info for Template
This commit is contained in:
parent
a931e16c6c
commit
4e97fd697a
@ -109,6 +109,25 @@ size_t ParsedTemplateFormat::columnsCount() const
|
|||||||
return format_idx_to_column_idx.size();
|
return format_idx_to_column_idx.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String ParsedTemplateFormat::formatToString(ParsedTemplateFormat::ColumnFormat format)
|
||||||
|
{
|
||||||
|
switch (format)
|
||||||
|
{
|
||||||
|
case ColumnFormat::Default:
|
||||||
|
return "Escaped (Default)";
|
||||||
|
case ColumnFormat::Escaped:
|
||||||
|
return "Escaped";
|
||||||
|
case ColumnFormat::Quoted:
|
||||||
|
return "Quoted";
|
||||||
|
case ColumnFormat::Json:
|
||||||
|
return "Json";
|
||||||
|
case ColumnFormat::Xml:
|
||||||
|
return "Xml";
|
||||||
|
case ColumnFormat::Raw:
|
||||||
|
return "Raw";
|
||||||
|
}
|
||||||
|
__builtin_unreachable();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
TemplateBlockOutputStream::TemplateBlockOutputStream(WriteBuffer & ostr_, const Block & sample, const FormatSettings & settings_)
|
TemplateBlockOutputStream::TemplateBlockOutputStream(WriteBuffer & ostr_, const Block & sample, const FormatSettings & settings_)
|
||||||
|
@ -30,6 +30,7 @@ struct ParsedTemplateFormat
|
|||||||
ParsedTemplateFormat() = default;
|
ParsedTemplateFormat() = default;
|
||||||
ParsedTemplateFormat(const String & format_string, const ColumnIdxGetter & idxByName);
|
ParsedTemplateFormat(const String & format_string, const ColumnIdxGetter & idxByName);
|
||||||
static ColumnFormat stringToFormat(const String & format);
|
static ColumnFormat stringToFormat(const String & format);
|
||||||
|
static String formatToString(ColumnFormat format);
|
||||||
size_t columnsCount() const;
|
size_t columnsCount() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
#include <Formats/TemplateRowInputStream.h>
|
#include <Formats/TemplateRowInputStream.h>
|
||||||
#include <Formats/FormatFactory.h>
|
#include <Formats/FormatFactory.h>
|
||||||
#include <Formats/BlockInputStreamFromRowInputStream.h>
|
#include <Formats/BlockInputStreamFromRowInputStream.h>
|
||||||
|
#include <Formats/verbosePrintString.h>
|
||||||
|
#include <IO/Operators.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -13,7 +15,7 @@ extern const int INVALID_TEMPLATE_FORMAT;
|
|||||||
|
|
||||||
TemplateRowInputStream::TemplateRowInputStream(ReadBuffer & istr_, const Block & header_, const FormatSettings & settings_,
|
TemplateRowInputStream::TemplateRowInputStream(ReadBuffer & istr_, const Block & header_, const FormatSettings & settings_,
|
||||||
bool ignore_spaces_)
|
bool ignore_spaces_)
|
||||||
: buf(istr_), header(header_), types(header.getDataTypes()), settings(settings_), ignore_spaces(ignore_spaces_)
|
: RowInputStreamWithDiagnosticInfo(buf, header_), buf(istr_), settings(settings_), ignore_spaces(ignore_spaces_)
|
||||||
{
|
{
|
||||||
static const String default_format("${data}");
|
static const String default_format("${data}");
|
||||||
const String & format_str = settings.template_settings.format.empty() ? default_format : settings.template_settings.format;
|
const String & format_str = settings.template_settings.format.empty() ? default_format : settings.template_settings.format;
|
||||||
@ -60,10 +62,10 @@ bool TemplateRowInputStream::read(MutableColumns & columns, RowReadExtension & e
|
|||||||
if (checkForSuffix())
|
if (checkForSuffix())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (row_count)
|
updateDiagnosticInfo();
|
||||||
{
|
|
||||||
|
if (likely(row_num != 1))
|
||||||
assertString(settings.template_settings.row_between_delimiter, buf);
|
assertString(settings.template_settings.row_between_delimiter, buf);
|
||||||
}
|
|
||||||
|
|
||||||
extra.read_columns.assign(columns.size(), false);
|
extra.read_columns.assign(columns.size(), false);
|
||||||
|
|
||||||
@ -73,7 +75,7 @@ bool TemplateRowInputStream::read(MutableColumns & columns, RowReadExtension & e
|
|||||||
assertString(row_format.delimiters[i], buf);
|
assertString(row_format.delimiters[i], buf);
|
||||||
size_t col_idx = row_format.format_idx_to_column_idx[i];
|
size_t col_idx = row_format.format_idx_to_column_idx[i];
|
||||||
skipSpaces();
|
skipSpaces();
|
||||||
deserializeField(*types[col_idx], *columns[col_idx], row_format.formats[i]);
|
deserializeField(*data_types[col_idx], *columns[col_idx], row_format.formats[i]);
|
||||||
extra.read_columns[col_idx] = true;
|
extra.read_columns[col_idx] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -84,7 +86,6 @@ bool TemplateRowInputStream::read(MutableColumns & columns, RowReadExtension & e
|
|||||||
if (!extra.read_columns[i])
|
if (!extra.read_columns[i])
|
||||||
header.getByPosition(i).type->insertDefaultInto(*columns[i]);
|
header.getByPosition(i).type->insertDefaultInto(*columns[i]);
|
||||||
|
|
||||||
++row_count;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -149,6 +150,87 @@ bool TemplateRowInputStream::compareSuffixPart(StringRef & suffix, BufferBase::P
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TemplateRowInputStream::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out,
|
||||||
|
size_t max_length_of_column_name, size_t max_length_of_data_type_name)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
if (likely(row_num != 1))
|
||||||
|
assertString(settings.template_settings.row_between_delimiter, buf);
|
||||||
|
}
|
||||||
|
catch (const DB::Exception &)
|
||||||
|
{
|
||||||
|
writeErrorStringForWrongDelimiter(out, "delimiter between rows", settings.template_settings.row_between_delimiter);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < row_format.columnsCount(); ++i)
|
||||||
|
{
|
||||||
|
skipSpaces();
|
||||||
|
try
|
||||||
|
{
|
||||||
|
assertString(row_format.delimiters[i], buf);
|
||||||
|
}
|
||||||
|
catch (const DB::Exception &)
|
||||||
|
{
|
||||||
|
writeErrorStringForWrongDelimiter(out, "delimiter before field " + std::to_string(i), row_format.delimiters[i]);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
skipSpaces();
|
||||||
|
size_t col_idx = row_format.format_idx_to_column_idx[i];
|
||||||
|
if (!deserializeFieldAndPrintDiagnosticInfo(columns, out, max_length_of_column_name, max_length_of_data_type_name, col_idx))
|
||||||
|
{
|
||||||
|
out << "Maybe it's not possible to deserialize field " + std::to_string(i) +
|
||||||
|
" as " + ParsedTemplateFormat::formatToString(row_format.formats[i]);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
skipSpaces();
|
||||||
|
try
|
||||||
|
{
|
||||||
|
assertString(row_format.delimiters.back(), buf);
|
||||||
|
}
|
||||||
|
catch (const DB::Exception &)
|
||||||
|
{
|
||||||
|
writeErrorStringForWrongDelimiter(out, "delimiter after last field", row_format.delimiters.back());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TemplateRowInputStream::writeErrorStringForWrongDelimiter(WriteBuffer & out, const String & description, const String & delim)
|
||||||
|
{
|
||||||
|
out << "ERROR: There is no " << description << ": expected ";
|
||||||
|
verbosePrintString(delim.data(), delim.data() + delim.size(), out);
|
||||||
|
out << ", got ";
|
||||||
|
if (buf.eof())
|
||||||
|
out << "<End of stream>";
|
||||||
|
else
|
||||||
|
verbosePrintString(buf.position(), std::min(buf.position() + delim.size() + 10, buf.buffer().end()), out);
|
||||||
|
out << '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
void TemplateRowInputStream::tryDeserializeFiled(MutableColumns & columns, size_t col_idx, ReadBuffer::Position & prev_pos,
|
||||||
|
ReadBuffer::Position & curr_pos)
|
||||||
|
{
|
||||||
|
prev_pos = buf.position();
|
||||||
|
auto format_iter = std::find(row_format.format_idx_to_column_idx.cbegin(), row_format.format_idx_to_column_idx.cend(), col_idx);
|
||||||
|
if (format_iter == row_format.format_idx_to_column_idx.cend())
|
||||||
|
throw DB::Exception("Parse error", ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
||||||
|
size_t format_idx = format_iter - row_format.format_idx_to_column_idx.begin();
|
||||||
|
deserializeField(*data_types[col_idx], *columns[col_idx], row_format.formats[format_idx]);
|
||||||
|
curr_pos = buf.position();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TemplateRowInputStream::isGarbageAfterField(size_t, ReadBuffer::Position)
|
||||||
|
{
|
||||||
|
/// Garbage will be considered as wrong delimiter
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void registerInputFormatTemplate(FormatFactory & factory)
|
void registerInputFormatTemplate(FormatFactory & factory)
|
||||||
{
|
{
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <Core/Block.h>
|
#include <Core/Block.h>
|
||||||
#include <Formats/IRowInputStream.h>
|
#include <Formats/RowInputStreamWithDiagnosticInfo.h>
|
||||||
#include <Formats/FormatSettings.h>
|
#include <Formats/FormatSettings.h>
|
||||||
#include <Formats/TemplateBlockOutputStream.h>
|
#include <Formats/TemplateBlockOutputStream.h>
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
@ -11,7 +11,7 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
class TemplateRowInputStream : public IRowInputStream
|
class TemplateRowInputStream : public RowInputStreamWithDiagnosticInfo
|
||||||
{
|
{
|
||||||
using ColumnFormat = ParsedTemplateFormat::ColumnFormat;
|
using ColumnFormat = ParsedTemplateFormat::ColumnFormat;
|
||||||
public:
|
public:
|
||||||
@ -24,7 +24,6 @@ public:
|
|||||||
// TODO
|
// TODO
|
||||||
//bool allowSyncAfterError() const override;
|
//bool allowSyncAfterError() const override;
|
||||||
//void syncAfterError() override;
|
//void syncAfterError() override;
|
||||||
//String getDiagnosticInfo() override;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void deserializeField(const IDataType & type, IColumn & column, ColumnFormat col_format);
|
void deserializeField(const IDataType & type, IColumn & column, ColumnFormat col_format);
|
||||||
@ -32,17 +31,21 @@ private:
|
|||||||
bool checkForSuffix();
|
bool checkForSuffix();
|
||||||
bool compareSuffixPart(StringRef & suffix, BufferBase::Position pos, size_t available);
|
bool compareSuffixPart(StringRef & suffix, BufferBase::Position pos, size_t available);
|
||||||
|
|
||||||
|
bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out,
|
||||||
|
size_t max_length_of_column_name, size_t max_length_of_data_type_name) override;
|
||||||
|
void tryDeserializeFiled(MutableColumns & columns, size_t col_idx,
|
||||||
|
ReadBuffer::Position & prev_pos, ReadBuffer::Position & curr_pos) override;
|
||||||
|
bool isGarbageAfterField(size_t after_col_idx, ReadBuffer::Position pos) override;
|
||||||
|
void writeErrorStringForWrongDelimiter(WriteBuffer & out, const String & description, const String & delim);
|
||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
PeekableReadBuffer buf;
|
PeekableReadBuffer buf;
|
||||||
Block header;
|
|
||||||
DataTypes types;
|
|
||||||
|
|
||||||
FormatSettings settings;
|
FormatSettings settings;
|
||||||
ParsedTemplateFormat format;
|
ParsedTemplateFormat format;
|
||||||
ParsedTemplateFormat row_format;
|
ParsedTemplateFormat row_format;
|
||||||
const bool ignore_spaces;
|
const bool ignore_spaces;
|
||||||
|
|
||||||
size_t row_count = 0;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user