mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-24 10:40:49 +00:00
better error messages
This commit is contained in:
parent
03c83169bd
commit
d95d53b4e4
@ -95,8 +95,8 @@ bool PeekableReadBuffer::peekNext()
|
|||||||
void PeekableReadBuffer::setCheckpoint()
|
void PeekableReadBuffer::setCheckpoint()
|
||||||
{
|
{
|
||||||
checkStateCorrect();
|
checkStateCorrect();
|
||||||
#ifdef NDEBUG
|
#ifndef NDEBUG
|
||||||
if (!checkpoint)
|
if (checkpoint)
|
||||||
throw DB::Exception("Does not support recursive checkpoints.", ErrorCodes::LOGICAL_ERROR);
|
throw DB::Exception("Does not support recursive checkpoints.", ErrorCodes::LOGICAL_ERROR);
|
||||||
#endif
|
#endif
|
||||||
checkpoint_in_own_memory = currentlyReadFromOwnMemory();
|
checkpoint_in_own_memory = currentlyReadFromOwnMemory();
|
||||||
@ -112,7 +112,7 @@ void PeekableReadBuffer::setCheckpoint()
|
|||||||
void PeekableReadBuffer::dropCheckpoint()
|
void PeekableReadBuffer::dropCheckpoint()
|
||||||
{
|
{
|
||||||
checkStateCorrect();
|
checkStateCorrect();
|
||||||
#ifdef NDEBUG
|
#ifndef NDEBUG
|
||||||
if (!checkpoint)
|
if (!checkpoint)
|
||||||
throw DB::Exception("There is no checkpoint", ErrorCodes::LOGICAL_ERROR);
|
throw DB::Exception("There is no checkpoint", ErrorCodes::LOGICAL_ERROR);
|
||||||
#endif
|
#endif
|
||||||
@ -185,7 +185,7 @@ bool PeekableReadBuffer::useSubbufferOnly() const
|
|||||||
|
|
||||||
void PeekableReadBuffer::checkStateCorrect() const
|
void PeekableReadBuffer::checkStateCorrect() const
|
||||||
{
|
{
|
||||||
#ifdef NDEBUG
|
#ifndef NDEBUG
|
||||||
if (checkpoint)
|
if (checkpoint)
|
||||||
{
|
{
|
||||||
if (checkpointInOwnMemory())
|
if (checkpointInOwnMemory())
|
||||||
|
@ -4,20 +4,25 @@
|
|||||||
#include <IO/ConcatReadBuffer.h>
|
#include <IO/ConcatReadBuffer.h>
|
||||||
#include <IO/PeekableReadBuffer.h>
|
#include <IO/PeekableReadBuffer.h>
|
||||||
|
|
||||||
|
void assertTrue(bool b)
|
||||||
|
{
|
||||||
|
if (!b)
|
||||||
|
throw DB::Exception("assert failed", DB::ErrorCodes::LOGICAL_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
void readAndAssert(DB::ReadBuffer & buf, const char * str)
|
void readAndAssert(DB::ReadBuffer & buf, const char * str)
|
||||||
{
|
{
|
||||||
size_t n = strlen(str);
|
size_t n = strlen(str);
|
||||||
char tmp[n];
|
char tmp[n];
|
||||||
buf.readStrict(tmp, n);
|
buf.readStrict(tmp, n);
|
||||||
assert(strncmp(tmp, str, n) == 0);
|
assertTrue(strncmp(tmp, str, n) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void assertAvailable(DB::ReadBuffer & buf, const char * str)
|
void assertAvailable(DB::ReadBuffer & buf, const char * str)
|
||||||
{
|
{
|
||||||
size_t n = strlen(str);
|
size_t n = strlen(str);
|
||||||
assert(buf.available() == n);
|
assertTrue(buf.available() == n);
|
||||||
assert(strncmp(buf.position(), str, n) == 0);
|
assertTrue(strncmp(buf.position(), str, n) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int, char **)
|
int main(int, char **)
|
||||||
@ -36,7 +41,7 @@ int main(int, char **)
|
|||||||
DB::ConcatReadBuffer concat({&b1, &b2, &b3, &b4});
|
DB::ConcatReadBuffer concat({&b1, &b2, &b3, &b4});
|
||||||
DB::PeekableReadBuffer peekable(concat, 0, 16);
|
DB::PeekableReadBuffer peekable(concat, 0, 16);
|
||||||
|
|
||||||
assert(!peekable.eof());
|
assertTrue(!peekable.eof());
|
||||||
assertAvailable(peekable, "0123456789");
|
assertAvailable(peekable, "0123456789");
|
||||||
{
|
{
|
||||||
DB::PeekableReadBufferCheckpoint checkpoint{peekable};
|
DB::PeekableReadBufferCheckpoint checkpoint{peekable};
|
||||||
@ -53,7 +58,7 @@ int main(int, char **)
|
|||||||
throw;
|
throw;
|
||||||
exception = true;
|
exception = true;
|
||||||
}
|
}
|
||||||
assert(exception);
|
assertTrue(exception);
|
||||||
assertAvailable(peekable, "56789");
|
assertAvailable(peekable, "56789");
|
||||||
|
|
||||||
readAndAssert(peekable, "56");
|
readAndAssert(peekable, "56");
|
||||||
@ -65,8 +70,8 @@ int main(int, char **)
|
|||||||
assertAvailable(peekable, "789");
|
assertAvailable(peekable, "789");
|
||||||
peekable.peekNext();
|
peekable.peekNext();
|
||||||
assertAvailable(peekable, "789qwertyuiop");
|
assertAvailable(peekable, "789qwertyuiop");
|
||||||
assert(peekable.lastPeeked().size() == 10);
|
assertTrue(peekable.lastPeeked().size() == 10);
|
||||||
assert(strncmp(peekable.lastPeeked().begin(), "asdfghjkl;", 10) == 0);
|
assertTrue(strncmp(peekable.lastPeeked().begin(), "asdfghjkl;", 10) == 0);
|
||||||
|
|
||||||
exception = false;
|
exception = false;
|
||||||
try
|
try
|
||||||
@ -80,10 +85,10 @@ int main(int, char **)
|
|||||||
throw;
|
throw;
|
||||||
exception = true;
|
exception = true;
|
||||||
}
|
}
|
||||||
assert(exception);
|
assertTrue(exception);
|
||||||
assertAvailable(peekable, "789qwertyuiop");
|
assertAvailable(peekable, "789qwertyuiop");
|
||||||
assert(peekable.lastPeeked().size() == 10);
|
assertTrue(peekable.lastPeeked().size() == 10);
|
||||||
assert(strncmp(peekable.lastPeeked().begin(), "asdfghjkl;", 10) == 0);
|
assertTrue(strncmp(peekable.lastPeeked().begin(), "asdfghjkl;", 10) == 0);
|
||||||
|
|
||||||
readAndAssert(peekable, "789qwertyu");
|
readAndAssert(peekable, "789qwertyu");
|
||||||
peekable.setCheckpoint();
|
peekable.setCheckpoint();
|
||||||
@ -93,9 +98,9 @@ int main(int, char **)
|
|||||||
|
|
||||||
peekable.setCheckpoint();
|
peekable.setCheckpoint();
|
||||||
readAndAssert(peekable, "kl;zxcvbnm,./");
|
readAndAssert(peekable, "kl;zxcvbnm,./");
|
||||||
assert(peekable.eof());
|
assertTrue(peekable.eof());
|
||||||
assert(peekable.eof());
|
assertTrue(peekable.eof());
|
||||||
assert(peekable.eof());
|
assertTrue(peekable.eof());
|
||||||
peekable.rollbackToCheckpoint();
|
peekable.rollbackToCheckpoint();
|
||||||
readAndAssert(peekable, "kl;zxcvbnm");
|
readAndAssert(peekable, "kl;zxcvbnm");
|
||||||
peekable.dropCheckpoint();
|
peekable.dropCheckpoint();
|
||||||
@ -111,15 +116,15 @@ int main(int, char **)
|
|||||||
throw;
|
throw;
|
||||||
exception = true;
|
exception = true;
|
||||||
}
|
}
|
||||||
assert(exception);
|
assertTrue(exception);
|
||||||
|
|
||||||
auto buf_ptr = peekable.takeUnreadData();
|
auto buf_ptr = peekable.takeUnreadData();
|
||||||
assert(peekable.eof());
|
assertTrue(peekable.eof());
|
||||||
assert(peekable.eof());
|
assertTrue(peekable.eof());
|
||||||
assert(peekable.eof());
|
assertTrue(peekable.eof());
|
||||||
|
|
||||||
readAndAssert(*buf_ptr, ",./");
|
readAndAssert(*buf_ptr, ",./");
|
||||||
assert(buf_ptr->eof());
|
assertTrue(buf_ptr->eof());
|
||||||
|
|
||||||
peekable.assertCanBeDestructed();
|
peekable.assertCanBeDestructed();
|
||||||
}
|
}
|
||||||
|
217
dbms/src/Parsers/ParsedTemplateFormatString.cpp
Normal file
217
dbms/src/Parsers/ParsedTemplateFormatString.cpp
Normal file
@ -0,0 +1,217 @@
|
|||||||
|
#include <Parsers/ParsedTemplateFormatString.h>
|
||||||
|
#include <Formats/verbosePrintString.h>
|
||||||
|
#include <IO/ReadBufferFromMemory.h>
|
||||||
|
#include <IO/Operators.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int INVALID_TEMPLATE_FORMAT;
|
||||||
|
}
|
||||||
|
|
||||||
|
ParsedTemplateFormatString::ParsedTemplateFormatString(const String & format_string, const ColumnIdxGetter & idx_by_name)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
parse(format_string, idx_by_name);
|
||||||
|
}
|
||||||
|
catch (DB::Exception & e)
|
||||||
|
{
|
||||||
|
if (e.code() != ErrorCodes::INVALID_TEMPLATE_FORMAT)
|
||||||
|
throwInvalidFormat(e.message(), columnsCount());
|
||||||
|
else
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ParsedTemplateFormatString::parse(const String & format_string, const ColumnIdxGetter & idx_by_name)
|
||||||
|
{
|
||||||
|
enum ParserState
|
||||||
|
{
|
||||||
|
Delimiter,
|
||||||
|
Column,
|
||||||
|
Format
|
||||||
|
};
|
||||||
|
|
||||||
|
const char * pos = format_string.c_str();
|
||||||
|
const char * end = format_string.c_str() + format_string.size();
|
||||||
|
const char * token_begin = pos;
|
||||||
|
ParserState state = Delimiter;
|
||||||
|
delimiters.emplace_back();
|
||||||
|
for (; *pos; ++pos)
|
||||||
|
{
|
||||||
|
switch (state)
|
||||||
|
{
|
||||||
|
case Delimiter:
|
||||||
|
if (*pos == '$')
|
||||||
|
{
|
||||||
|
delimiters.back().append(token_begin, pos - token_begin);
|
||||||
|
++pos;
|
||||||
|
if (*pos == '{')
|
||||||
|
{
|
||||||
|
token_begin = pos + 1;
|
||||||
|
state = Column;
|
||||||
|
}
|
||||||
|
else if (*pos == '$')
|
||||||
|
{
|
||||||
|
token_begin = pos;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
throwInvalidFormat("at pos " + std::to_string(pos - format_string.c_str()) +
|
||||||
|
": expected '{' or '$' after '$'", columnsCount());
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Column:
|
||||||
|
column_names.emplace_back();
|
||||||
|
pos = readMayBeQuotedColumnNameInto(pos, end - pos, column_names.back());
|
||||||
|
|
||||||
|
if (*pos == ':')
|
||||||
|
state = Format;
|
||||||
|
else if (*pos == '}')
|
||||||
|
{
|
||||||
|
formats.push_back(ColumnFormat::None);
|
||||||
|
delimiters.emplace_back();
|
||||||
|
state = Delimiter;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
throwInvalidFormat("Expected ':' or '}' after column name: \"" + column_names.back() + "\"", columnsCount());
|
||||||
|
|
||||||
|
token_begin = pos + 1;
|
||||||
|
format_idx_to_column_idx.emplace_back(idx_by_name(column_names.back()));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Format:
|
||||||
|
if (*pos == '}')
|
||||||
|
{
|
||||||
|
formats.push_back(stringToFormat(String(token_begin, pos - token_begin)));
|
||||||
|
token_begin = pos + 1;
|
||||||
|
delimiters.emplace_back();
|
||||||
|
state = Delimiter;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (state != Delimiter)
|
||||||
|
throwInvalidFormat("Unbalanced parentheses", columnsCount());
|
||||||
|
delimiters.back().append(token_begin, pos - token_begin);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ParsedTemplateFormatString::ColumnFormat ParsedTemplateFormatString::stringToFormat(const String & col_format) const
|
||||||
|
{
|
||||||
|
if (col_format.empty())
|
||||||
|
return ColumnFormat::None;
|
||||||
|
else if (col_format == "None")
|
||||||
|
return ColumnFormat::None;
|
||||||
|
else if (col_format == "Escaped")
|
||||||
|
return ColumnFormat::Escaped;
|
||||||
|
else if (col_format == "Quoted")
|
||||||
|
return ColumnFormat::Quoted;
|
||||||
|
else if (col_format == "CSV")
|
||||||
|
return ColumnFormat::Csv;
|
||||||
|
else if (col_format == "JSON")
|
||||||
|
return ColumnFormat::Json;
|
||||||
|
else if (col_format == "XML")
|
||||||
|
return ColumnFormat::Xml;
|
||||||
|
else if (col_format == "Raw")
|
||||||
|
return ColumnFormat::Raw;
|
||||||
|
else
|
||||||
|
throwInvalidFormat("Unknown field format " + col_format, columnsCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t ParsedTemplateFormatString::columnsCount() const
|
||||||
|
{
|
||||||
|
return format_idx_to_column_idx.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
String ParsedTemplateFormatString::formatToString(ParsedTemplateFormatString::ColumnFormat format)
|
||||||
|
{
|
||||||
|
switch (format)
|
||||||
|
{
|
||||||
|
case ColumnFormat::None:
|
||||||
|
return "None";
|
||||||
|
case ColumnFormat::Escaped:
|
||||||
|
return "Escaped";
|
||||||
|
case ColumnFormat::Quoted:
|
||||||
|
return "Quoted";
|
||||||
|
case ColumnFormat::Csv:
|
||||||
|
return "CSV";
|
||||||
|
case ColumnFormat::Json:
|
||||||
|
return "Json";
|
||||||
|
case ColumnFormat::Xml:
|
||||||
|
return "Xml";
|
||||||
|
case ColumnFormat::Raw:
|
||||||
|
return "Raw";
|
||||||
|
}
|
||||||
|
__builtin_unreachable();
|
||||||
|
}
|
||||||
|
|
||||||
|
const char * ParsedTemplateFormatString::readMayBeQuotedColumnNameInto(const char * pos, size_t size, String & s)
|
||||||
|
{
|
||||||
|
s.clear();
|
||||||
|
if (!size)
|
||||||
|
return pos;
|
||||||
|
ReadBufferFromMemory buf{pos, size};
|
||||||
|
if (*pos == '"')
|
||||||
|
readDoubleQuotedStringWithSQLStyle(s, buf);
|
||||||
|
else if (*pos == '`')
|
||||||
|
readBackQuotedStringWithSQLStyle(s, buf);
|
||||||
|
else if (isWordCharASCII(*pos))
|
||||||
|
{
|
||||||
|
size_t name_size = 1;
|
||||||
|
while (name_size < size && isWordCharASCII(*(pos + name_size)))
|
||||||
|
++name_size;
|
||||||
|
s = String{pos, name_size};
|
||||||
|
return pos + name_size;
|
||||||
|
}
|
||||||
|
return pos + buf.count();
|
||||||
|
}
|
||||||
|
|
||||||
|
String ParsedTemplateFormatString::dump() const
|
||||||
|
{
|
||||||
|
WriteBufferFromOwnString res;
|
||||||
|
res << "Delimiter " << 0 << ": ";
|
||||||
|
verbosePrintString(delimiters.front().c_str(), delimiters.front().c_str() + delimiters.front().size(), res);
|
||||||
|
|
||||||
|
size_t num_columns = std::max(formats.size(), format_idx_to_column_idx.size());
|
||||||
|
for (size_t i = 0; i < num_columns; ++i)
|
||||||
|
{
|
||||||
|
res << "\nColumn " << i << ": \"";
|
||||||
|
if (column_names.size() <= i)
|
||||||
|
res << "<ERROR>";
|
||||||
|
else if (column_names[i].empty())
|
||||||
|
res << "<SKIPPED>";
|
||||||
|
else
|
||||||
|
res << column_names[i];
|
||||||
|
|
||||||
|
res << "\" (mapped to table column ";
|
||||||
|
if (format_idx_to_column_idx.size() <= i)
|
||||||
|
res << "<ERROR>";
|
||||||
|
else if (!format_idx_to_column_idx[i])
|
||||||
|
res << "<SKIPPED>";
|
||||||
|
else
|
||||||
|
res << *format_idx_to_column_idx[i];
|
||||||
|
|
||||||
|
res << "), Format " << (i < formats.size() ? formatToString(formats[i]) : "<ERROR>");
|
||||||
|
|
||||||
|
res << "\nDelimiter " << i + 1 << ": ";
|
||||||
|
if (delimiters.size() <= i + 1)
|
||||||
|
res << "<ERROR>";
|
||||||
|
else
|
||||||
|
verbosePrintString(delimiters[i + 1].c_str(), delimiters[i + 1].c_str() + delimiters[i + 1].size(), res);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ParsedTemplateFormatString::throwInvalidFormat(const String & message, size_t column) const
|
||||||
|
{
|
||||||
|
throw Exception("Invalid format string for Template: " + message + " (near column " + std::to_string(column) +
|
||||||
|
")" + ". Parsed format string:\n" + dump() + "\n",
|
||||||
|
ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
51
dbms/src/Parsers/ParsedTemplateFormatString.h
Normal file
51
dbms/src/Parsers/ParsedTemplateFormatString.h
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Core/Types.h>
|
||||||
|
#include <functional>
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
struct ParsedTemplateFormatString
|
||||||
|
{
|
||||||
|
enum class ColumnFormat
|
||||||
|
{
|
||||||
|
None,
|
||||||
|
Escaped,
|
||||||
|
Quoted,
|
||||||
|
Csv,
|
||||||
|
Json,
|
||||||
|
Xml,
|
||||||
|
Raw
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Format string has syntax: "Delimiter0 ${ColumnName0:Format0} Delimiter1 ${ColumnName1:Format1} Delimiter2"
|
||||||
|
/// The following vectors is filled with corresponding values, delimiters.size() - 1 = formats.size() = format_idx_to_column_idx.size()
|
||||||
|
/// If format_idx_to_column_idx[i] has no value, then TemplateRowInputFormat will skip i-th column.
|
||||||
|
|
||||||
|
std::vector<String> delimiters;
|
||||||
|
std::vector<ColumnFormat> formats;
|
||||||
|
std::vector<std::optional<size_t>> format_idx_to_column_idx;
|
||||||
|
|
||||||
|
/// For diagnostic info
|
||||||
|
Strings column_names;
|
||||||
|
|
||||||
|
typedef std::function<std::optional<size_t>(const String &)> ColumnIdxGetter;
|
||||||
|
|
||||||
|
ParsedTemplateFormatString() = default;
|
||||||
|
ParsedTemplateFormatString(const String & format_string, const ColumnIdxGetter & idx_by_name);
|
||||||
|
|
||||||
|
void parse(const String & format_string, const ColumnIdxGetter & idx_by_name);
|
||||||
|
|
||||||
|
ColumnFormat stringToFormat(const String & format) const;
|
||||||
|
static String formatToString(ColumnFormat format);
|
||||||
|
static const char * readMayBeQuotedColumnNameInto(const char * pos, size_t size, String & s);
|
||||||
|
size_t columnsCount() const;
|
||||||
|
|
||||||
|
String dump() const;
|
||||||
|
[[noreturn]] void throwInvalidFormat(const String & message, size_t column) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -1,10 +1,7 @@
|
|||||||
#include <Processors/Formats/Impl/TemplateBlockOutputFormat.h>
|
#include <Processors/Formats/Impl/TemplateBlockOutputFormat.h>
|
||||||
#include <Formats/FormatFactory.h>
|
#include <Formats/FormatFactory.h>
|
||||||
#include <Interpreters/Context.h>
|
|
||||||
#include <IO/ReadHelpers.h>
|
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
#include <DataTypes/DataTypesNumber.h>
|
#include <DataTypes/DataTypesNumber.h>
|
||||||
#include <IO/ReadBufferFromMemory.h>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -12,157 +9,9 @@ namespace DB
|
|||||||
|
|
||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int INVALID_TEMPLATE_FORMAT;
|
extern const int SYNTAX_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
ParsedTemplateFormatString::ParsedTemplateFormatString(const String & format_string, const ColumnIdxGetter & idxByName)
|
|
||||||
{
|
|
||||||
enum ParserState
|
|
||||||
{
|
|
||||||
Delimiter,
|
|
||||||
Column,
|
|
||||||
Format
|
|
||||||
};
|
|
||||||
|
|
||||||
const char * pos = format_string.c_str();
|
|
||||||
const char * end = format_string.c_str() + format_string.size();
|
|
||||||
const char * token_begin = pos;
|
|
||||||
String column_name;
|
|
||||||
ParserState state = Delimiter;
|
|
||||||
delimiters.emplace_back();
|
|
||||||
for (; *pos; ++pos)
|
|
||||||
{
|
|
||||||
switch (state)
|
|
||||||
{
|
|
||||||
case Delimiter:
|
|
||||||
if (*pos == '$')
|
|
||||||
{
|
|
||||||
delimiters.back().append(token_begin, pos - token_begin);
|
|
||||||
++pos;
|
|
||||||
if (*pos == '{')
|
|
||||||
{
|
|
||||||
token_begin = pos + 1;
|
|
||||||
state = Column;
|
|
||||||
}
|
|
||||||
else if (*pos == '$')
|
|
||||||
{
|
|
||||||
token_begin = pos;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
throw Exception("Invalid template format string: pos " + std::to_string(pos - format_string.c_str()) +
|
|
||||||
": expected '{' or '$' after '$'", ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case Column:
|
|
||||||
pos = readMayBeQuotedColumnNameInto(pos, end - pos, column_name);
|
|
||||||
|
|
||||||
if (*pos == ':')
|
|
||||||
state = Format;
|
|
||||||
else if (*pos == '}')
|
|
||||||
{
|
|
||||||
formats.push_back(ColumnFormat::None);
|
|
||||||
delimiters.emplace_back();
|
|
||||||
state = Delimiter;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
throw Exception("Invalid template format string: Expected ':' or '}' after column name: \"" + column_name + "\"",
|
|
||||||
ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
|
||||||
|
|
||||||
token_begin = pos + 1;
|
|
||||||
format_idx_to_column_idx.emplace_back(idxByName(column_name));
|
|
||||||
break;
|
|
||||||
|
|
||||||
case Format:
|
|
||||||
if (*pos == '}')
|
|
||||||
{
|
|
||||||
formats.push_back(stringToFormat(String(token_begin, pos - token_begin)));
|
|
||||||
token_begin = pos + 1;
|
|
||||||
delimiters.emplace_back();
|
|
||||||
state = Delimiter;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (state != Delimiter)
|
|
||||||
throw Exception("Invalid template format string: check parentheses balance", ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
|
||||||
delimiters.back().append(token_begin, pos - token_begin);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
ParsedTemplateFormatString::ColumnFormat ParsedTemplateFormatString::stringToFormat(const String & col_format)
|
|
||||||
{
|
|
||||||
if (col_format.empty())
|
|
||||||
return ColumnFormat::None;
|
|
||||||
else if (col_format == "None")
|
|
||||||
return ColumnFormat::None;
|
|
||||||
else if (col_format == "Escaped")
|
|
||||||
return ColumnFormat::Escaped;
|
|
||||||
else if (col_format == "Quoted")
|
|
||||||
return ColumnFormat::Quoted;
|
|
||||||
else if (col_format == "CSV")
|
|
||||||
return ColumnFormat::Csv;
|
|
||||||
else if (col_format == "JSON")
|
|
||||||
return ColumnFormat::Json;
|
|
||||||
else if (col_format == "XML")
|
|
||||||
return ColumnFormat::Xml;
|
|
||||||
else if (col_format == "Raw")
|
|
||||||
return ColumnFormat::Raw;
|
|
||||||
else
|
|
||||||
throw Exception("Invalid template format string: unknown field format " + col_format,
|
|
||||||
ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t ParsedTemplateFormatString::columnsCount() const
|
|
||||||
{
|
|
||||||
return format_idx_to_column_idx.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
String ParsedTemplateFormatString::formatToString(ParsedTemplateFormatString::ColumnFormat format)
|
|
||||||
{
|
|
||||||
switch (format)
|
|
||||||
{
|
|
||||||
case ColumnFormat::None:
|
|
||||||
return "None";
|
|
||||||
case ColumnFormat::Escaped:
|
|
||||||
return "Escaped";
|
|
||||||
case ColumnFormat::Quoted:
|
|
||||||
return "Quoted";
|
|
||||||
case ColumnFormat::Csv:
|
|
||||||
return "CSV";
|
|
||||||
case ColumnFormat::Json:
|
|
||||||
return "Json";
|
|
||||||
case ColumnFormat::Xml:
|
|
||||||
return "Xml";
|
|
||||||
case ColumnFormat::Raw:
|
|
||||||
return "Raw";
|
|
||||||
}
|
|
||||||
__builtin_unreachable();
|
|
||||||
}
|
|
||||||
|
|
||||||
const char * ParsedTemplateFormatString::readMayBeQuotedColumnNameInto(const char * pos, size_t size, String & s)
|
|
||||||
{
|
|
||||||
s.clear();
|
|
||||||
if (!size)
|
|
||||||
return pos;
|
|
||||||
ReadBufferFromMemory buf{pos, size};
|
|
||||||
if (*pos == '"')
|
|
||||||
readDoubleQuotedStringWithSQLStyle(s, buf);
|
|
||||||
else if (*pos == '`')
|
|
||||||
readBackQuotedStringWithSQLStyle(s, buf);
|
|
||||||
else if (isWordCharASCII(*pos))
|
|
||||||
{
|
|
||||||
size_t name_size = 1;
|
|
||||||
while (name_size < size && isWordCharASCII(*(pos + name_size)))
|
|
||||||
++name_size;
|
|
||||||
s = String{pos, name_size};
|
|
||||||
return pos + name_size;
|
|
||||||
}
|
|
||||||
return pos + buf.count();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
TemplateBlockOutputFormat::TemplateBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & settings_)
|
TemplateBlockOutputFormat::TemplateBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & settings_)
|
||||||
: IOutputFormat(header_, out_), settings(settings_)
|
: IOutputFormat(header_, out_), settings(settings_)
|
||||||
{
|
{
|
||||||
@ -185,7 +34,7 @@ TemplateBlockOutputFormat::TemplateBlockOutputFormat(WriteBuffer & out_, const B
|
|||||||
for (size_t i = 0; i < format.format_idx_to_column_idx.size(); ++i)
|
for (size_t i = 0; i < format.format_idx_to_column_idx.size(); ++i)
|
||||||
{
|
{
|
||||||
if (!format.format_idx_to_column_idx[i])
|
if (!format.format_idx_to_column_idx[i])
|
||||||
throw Exception("Output part name cannot be empty, it's a bug.", ErrorCodes::LOGICAL_ERROR);
|
format.throwInvalidFormat("Output part name cannot be empty, it's a bug.", i);
|
||||||
switch (static_cast<OutputPart>(*format.format_idx_to_column_idx[i]))
|
switch (static_cast<OutputPart>(*format.format_idx_to_column_idx[i]))
|
||||||
{
|
{
|
||||||
case OutputPart::Data:
|
case OutputPart::Data:
|
||||||
@ -195,17 +44,17 @@ TemplateBlockOutputFormat::TemplateBlockOutputFormat(WriteBuffer & out_, const B
|
|||||||
case OutputPart::ExtremesMin:
|
case OutputPart::ExtremesMin:
|
||||||
case OutputPart::ExtremesMax:
|
case OutputPart::ExtremesMax:
|
||||||
if (format.formats[i] != ColumnFormat::None)
|
if (format.formats[i] != ColumnFormat::None)
|
||||||
throw Exception("invalid template: wrong serialization type for data, totals, min or max",
|
format.throwInvalidFormat("Serialization type for data, totals, min and max must be empty or None", i);
|
||||||
ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
if (format.formats[i] == ColumnFormat::None)
|
if (format.formats[i] == ColumnFormat::None)
|
||||||
throw Exception("Serialization type for output part rows, rows_before_limit, time, rows_read or bytes_read not specified", ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
format.throwInvalidFormat("Serialization type for output part rows, rows_before_limit, time, "
|
||||||
|
"rows_read or bytes_read is not specified", i);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (data_idx != 0)
|
if (data_idx != 0)
|
||||||
throw Exception("invalid template: ${data} must be the first output part", ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
format.throwInvalidFormat("${data} must be the first output part", 0);
|
||||||
|
|
||||||
/// Parse format string for rows
|
/// Parse format string for rows
|
||||||
row_format = ParsedTemplateFormatString(settings.template_settings.row_format, [&](const String & colName)
|
row_format = ParsedTemplateFormatString(settings.template_settings.row_format, [&](const String & colName)
|
||||||
@ -215,13 +64,13 @@ TemplateBlockOutputFormat::TemplateBlockOutputFormat(WriteBuffer & out_, const B
|
|||||||
|
|
||||||
/// Validate format string for rows
|
/// Validate format string for rows
|
||||||
if (row_format.delimiters.size() == 1)
|
if (row_format.delimiters.size() == 1)
|
||||||
throw Exception("invalid template: no columns specified", ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
row_format.throwInvalidFormat("No columns specified", 0);
|
||||||
for (size_t i = 0; i < row_format.columnsCount(); ++i)
|
for (size_t i = 0; i < row_format.columnsCount(); ++i)
|
||||||
{
|
{
|
||||||
if (!row_format.format_idx_to_column_idx[i])
|
if (!row_format.format_idx_to_column_idx[i])
|
||||||
throw Exception("Cannot skip format field for output, it's a bug.", ErrorCodes::LOGICAL_ERROR);
|
row_format.throwInvalidFormat("Cannot skip format field for output, it's a bug.", i);
|
||||||
if (row_format.formats[i] == ColumnFormat::None)
|
if (row_format.formats[i] == ColumnFormat::None)
|
||||||
throw Exception("Serialization type for file column " + std::to_string(i) + " not specified", ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
row_format.throwInvalidFormat("Serialization type for file column is not specified", i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -246,7 +95,7 @@ TemplateBlockOutputFormat::OutputPart TemplateBlockOutputFormat::stringToOutputP
|
|||||||
else if (part == "bytes_read")
|
else if (part == "bytes_read")
|
||||||
return OutputPart::BytesRead;
|
return OutputPart::BytesRead;
|
||||||
else
|
else
|
||||||
throw Exception("invalid template: unknown output part " + part, ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
throw Exception("Unknown output part " + part, ErrorCodes::SYNTAX_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TemplateBlockOutputFormat::writeRow(const Chunk & chunk, size_t row_num)
|
void TemplateBlockOutputFormat::writeRow(const Chunk & chunk, size_t row_num)
|
||||||
@ -331,48 +180,48 @@ void TemplateBlockOutputFormat::finalize()
|
|||||||
|
|
||||||
size_t parts = format.format_idx_to_column_idx.size();
|
size_t parts = format.format_idx_to_column_idx.size();
|
||||||
|
|
||||||
for (size_t j = 0; j < parts; ++j)
|
for (size_t i = 0; i < parts; ++i)
|
||||||
{
|
{
|
||||||
auto type = std::make_shared<DataTypeUInt64>();
|
auto type = std::make_shared<DataTypeUInt64>();
|
||||||
ColumnWithTypeAndName col(type->createColumnConst(1, row_count), type, String("tmp"));
|
ColumnWithTypeAndName col(type->createColumnConst(1, row_count), type, String("tmp"));
|
||||||
switch (static_cast<OutputPart>(*format.format_idx_to_column_idx[j]))
|
switch (static_cast<OutputPart>(*format.format_idx_to_column_idx[i]))
|
||||||
{
|
{
|
||||||
case OutputPart::Totals:
|
case OutputPart::Totals:
|
||||||
if (!totals)
|
if (!totals)
|
||||||
throw Exception("invalid template: cannot print totals for this request", ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
format.throwInvalidFormat("Cannot print totals for this request", i);
|
||||||
writeRow(totals, 0);
|
writeRow(totals, 0);
|
||||||
break;
|
break;
|
||||||
case OutputPart::ExtremesMin:
|
case OutputPart::ExtremesMin:
|
||||||
if (!extremes)
|
if (!extremes)
|
||||||
throw Exception("invalid template: cannot print extremes for this request", ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
format.throwInvalidFormat("Cannot print extremes for this request", i);
|
||||||
writeRow(extremes, 0);
|
writeRow(extremes, 0);
|
||||||
break;
|
break;
|
||||||
case OutputPart::ExtremesMax:
|
case OutputPart::ExtremesMax:
|
||||||
if (!extremes)
|
if (!extremes)
|
||||||
throw Exception("invalid template: cannot print extremes for this request", ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
format.throwInvalidFormat("Cannot print extremes for this request", i);
|
||||||
writeRow(extremes, 1);
|
writeRow(extremes, 1);
|
||||||
break;
|
break;
|
||||||
case OutputPart::Rows:
|
case OutputPart::Rows:
|
||||||
writeValue<size_t, DataTypeUInt64>(row_count, format.formats[j]);
|
writeValue<size_t, DataTypeUInt64>(row_count, format.formats[i]);
|
||||||
break;
|
break;
|
||||||
case OutputPart::RowsBeforeLimit:
|
case OutputPart::RowsBeforeLimit:
|
||||||
if (!rows_before_limit_set)
|
if (!rows_before_limit_set)
|
||||||
throw Exception("invalid template: cannot print rows_before_limit for this request", ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
format.throwInvalidFormat("Cannot print rows_before_limit for this request", i);
|
||||||
writeValue<size_t, DataTypeUInt64>(rows_before_limit, format.formats[j]);
|
writeValue<size_t, DataTypeUInt64>(rows_before_limit, format.formats[i]);
|
||||||
break;
|
break;
|
||||||
case OutputPart::TimeElapsed:
|
case OutputPart::TimeElapsed:
|
||||||
writeValue<double, DataTypeFloat64>(watch.elapsedSeconds(), format.formats[j]);
|
writeValue<double, DataTypeFloat64>(watch.elapsedSeconds(), format.formats[i]);
|
||||||
break;
|
break;
|
||||||
case OutputPart::RowsRead:
|
case OutputPart::RowsRead:
|
||||||
writeValue<size_t, DataTypeUInt64>(progress.read_rows.load(), format.formats[j]);
|
writeValue<size_t, DataTypeUInt64>(progress.read_rows.load(), format.formats[i]);
|
||||||
break;
|
break;
|
||||||
case OutputPart::BytesRead:
|
case OutputPart::BytesRead:
|
||||||
writeValue<size_t, DataTypeUInt64>(progress.read_bytes.load(), format.formats[j]);
|
writeValue<size_t, DataTypeUInt64>(progress.read_bytes.load(), format.formats[i]);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
writeString(format.delimiters[j + 1], out);
|
writeString(format.delimiters[i + 1], out);
|
||||||
}
|
}
|
||||||
|
|
||||||
finalized = true;
|
finalized = true;
|
||||||
|
@ -4,42 +4,12 @@
|
|||||||
#include <Core/Block.h>
|
#include <Core/Block.h>
|
||||||
#include <Formats/FormatSettings.h>
|
#include <Formats/FormatSettings.h>
|
||||||
#include <Processors/Formats/IOutputFormat.h>
|
#include <Processors/Formats/IOutputFormat.h>
|
||||||
|
#include <Parsers/ParsedTemplateFormatString.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
struct ParsedTemplateFormatString
|
|
||||||
{
|
|
||||||
enum class ColumnFormat
|
|
||||||
{
|
|
||||||
None,
|
|
||||||
Escaped,
|
|
||||||
Quoted,
|
|
||||||
Csv,
|
|
||||||
Json,
|
|
||||||
Xml,
|
|
||||||
Raw
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Format string has syntax: "Delimiter0 ${ColumnName0:Format0} Delimiter1 ${ColumnName1:Format1} Delimiter2"
|
|
||||||
/// The following vectors is filled with corresponding values, delimiters.size() - 1 = formats.size() = format_idx_to_column_idx.size()
|
|
||||||
/// If format_idx_to_column_idx[i] has no value, then TemplateRowInputStream will skip i-th column.
|
|
||||||
|
|
||||||
std::vector<String> delimiters;
|
|
||||||
std::vector<ColumnFormat> formats;
|
|
||||||
std::vector<std::optional<size_t>> format_idx_to_column_idx;
|
|
||||||
|
|
||||||
typedef std::function<std::optional<size_t>(const String &)> ColumnIdxGetter;
|
|
||||||
|
|
||||||
ParsedTemplateFormatString() = default;
|
|
||||||
ParsedTemplateFormatString(const String & format_string, const ColumnIdxGetter & idxByName);
|
|
||||||
static ColumnFormat stringToFormat(const String & format);
|
|
||||||
static String formatToString(ColumnFormat format);
|
|
||||||
static const char * readMayBeQuotedColumnNameInto(const char * pos, size_t size, String & s);
|
|
||||||
size_t columnsCount() const;
|
|
||||||
};
|
|
||||||
|
|
||||||
class TemplateBlockOutputFormat : public IOutputFormat
|
class TemplateBlockOutputFormat : public IOutputFormat
|
||||||
{
|
{
|
||||||
using ColumnFormat = ParsedTemplateFormatString::ColumnFormat;
|
using ColumnFormat = ParsedTemplateFormatString::ColumnFormat;
|
||||||
|
@ -9,11 +9,11 @@ namespace DB
|
|||||||
|
|
||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int INVALID_TEMPLATE_FORMAT;
|
|
||||||
extern const int ATTEMPT_TO_READ_AFTER_EOF;
|
extern const int ATTEMPT_TO_READ_AFTER_EOF;
|
||||||
extern const int CANNOT_READ_ALL_DATA;
|
extern const int CANNOT_READ_ALL_DATA;
|
||||||
extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
|
extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
|
||||||
extern const int CANNOT_PARSE_QUOTED_STRING;
|
extern const int CANNOT_PARSE_QUOTED_STRING;
|
||||||
|
extern const int SYNTAX_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -30,8 +30,8 @@ TemplateRowInputFormat::TemplateRowInputFormat(ReadBuffer & in_, const Block & h
|
|||||||
if (partName == "data")
|
if (partName == "data")
|
||||||
return 0;
|
return 0;
|
||||||
else if (partName.empty()) /// For skipping some values in prefix and suffix
|
else if (partName.empty()) /// For skipping some values in prefix and suffix
|
||||||
return {};
|
return std::optional<size_t>();
|
||||||
throw Exception("invalid template format: unknown input part " + partName, ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
throw Exception("Unknown input part " + partName, ErrorCodes::SYNTAX_ERROR);
|
||||||
});
|
});
|
||||||
|
|
||||||
/// Validate format string for whole input
|
/// Validate format string for whole input
|
||||||
@ -41,16 +41,16 @@ TemplateRowInputFormat::TemplateRowInputFormat(ReadBuffer & in_, const Block & h
|
|||||||
if (format.format_idx_to_column_idx[i])
|
if (format.format_idx_to_column_idx[i])
|
||||||
{
|
{
|
||||||
if (has_data)
|
if (has_data)
|
||||||
throw Exception("${data} can occur only once", ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
format.throwInvalidFormat("${data} can occur only once", i);
|
||||||
if (format.formats[i] != ColumnFormat::None)
|
if (format.formats[i] != ColumnFormat::None)
|
||||||
throw Exception("invalid template format: ${data} must have empty or None serialization type", ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
format.throwInvalidFormat("${data} must have empty or None deserialization type", i);
|
||||||
has_data = true;
|
has_data = true;
|
||||||
format_data_idx = i;
|
format_data_idx = i;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (format.formats[i] == ColumnFormat::Xml || format.formats[i] == ColumnFormat::Raw)
|
if (format.formats[i] == ColumnFormat::Xml || format.formats[i] == ColumnFormat::Raw)
|
||||||
throw Exception("None, XML and Raw deserialization is not supported", ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
format.throwInvalidFormat("XML and Raw deserialization is not supported", i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -58,7 +58,7 @@ TemplateRowInputFormat::TemplateRowInputFormat(ReadBuffer & in_, const Block & h
|
|||||||
row_format = ParsedTemplateFormatString(settings.template_settings.row_format, [&](const String & colName) -> std::optional<size_t>
|
row_format = ParsedTemplateFormatString(settings.template_settings.row_format, [&](const String & colName) -> std::optional<size_t>
|
||||||
{
|
{
|
||||||
if (colName.empty())
|
if (colName.empty())
|
||||||
return {};
|
return std::optional<size_t>();
|
||||||
return header_.getPositionByName(colName);
|
return header_.getPositionByName(colName);
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -67,17 +67,16 @@ TemplateRowInputFormat::TemplateRowInputFormat(ReadBuffer & in_, const Block & h
|
|||||||
for (size_t i = 0; i < row_format.columnsCount(); ++i)
|
for (size_t i = 0; i < row_format.columnsCount(); ++i)
|
||||||
{
|
{
|
||||||
if (row_format.formats[i] == ColumnFormat::Xml || row_format.formats[i] == ColumnFormat::Raw)
|
if (row_format.formats[i] == ColumnFormat::Xml || row_format.formats[i] == ColumnFormat::Raw)
|
||||||
throw Exception("invalid template format: None, XML and Raw deserialization is not supported", ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
row_format.throwInvalidFormat("XML and Raw deserialization is not supported", i);
|
||||||
|
|
||||||
if (row_format.format_idx_to_column_idx[i])
|
if (row_format.format_idx_to_column_idx[i])
|
||||||
{
|
{
|
||||||
if (row_format.formats[i] == ColumnFormat::None)
|
if (row_format.formats[i] == ColumnFormat::None)
|
||||||
throw Exception("invalid template format: None, XML and Raw deserialization is not supported", ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
row_format.throwInvalidFormat("Column is not skipped, but deserialization type is None", i);
|
||||||
|
|
||||||
size_t col_idx = *row_format.format_idx_to_column_idx[i];
|
size_t col_idx = *row_format.format_idx_to_column_idx[i];
|
||||||
if (column_in_format[col_idx])
|
if (column_in_format[col_idx])
|
||||||
throw Exception("invalid template format: duplicate column " + header_.getColumnsWithTypeAndName()[col_idx].name,
|
row_format.throwInvalidFormat("Duplicate column", i);
|
||||||
ErrorCodes::INVALID_TEMPLATE_FORMAT);
|
|
||||||
column_in_format[col_idx] = true;
|
column_in_format[col_idx] = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -85,14 +84,22 @@ TemplateRowInputFormat::TemplateRowInputFormat(ReadBuffer & in_, const Block & h
|
|||||||
|
|
||||||
void TemplateRowInputFormat::readPrefix()
|
void TemplateRowInputFormat::readPrefix()
|
||||||
{
|
{
|
||||||
tryReadPrefixOrSuffix<void>(0, format_data_idx);
|
size_t last_successfully_parsed_idx = 0;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
tryReadPrefixOrSuffix<void>(last_successfully_parsed_idx, format_data_idx);
|
||||||
|
}
|
||||||
|
catch (Exception & e)
|
||||||
|
{
|
||||||
|
format.throwInvalidFormat(e.message() + " While parsing prefix", last_successfully_parsed_idx);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Asserts delimiters and skips fields in prefix or suffix.
|
/// Asserts delimiters and skips fields in prefix or suffix.
|
||||||
/// tryReadPrefixOrSuffix<bool>(...) is used in checkForSuffix() to avoid throwing an exception after read of each row
|
/// tryReadPrefixOrSuffix<bool>(...) is used in checkForSuffix() to avoid throwing an exception after read of each row
|
||||||
/// (most likely false will be returned on first call of checkString(...))
|
/// (most likely false will be returned on first call of checkString(...))
|
||||||
template <typename ReturnType>
|
template <typename ReturnType>
|
||||||
ReturnType TemplateRowInputFormat::tryReadPrefixOrSuffix(size_t input_part_beg, size_t input_part_end)
|
ReturnType TemplateRowInputFormat::tryReadPrefixOrSuffix(size_t & input_part_beg, size_t input_part_end)
|
||||||
{
|
{
|
||||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||||
|
|
||||||
@ -261,9 +268,10 @@ bool TemplateRowInputFormat::checkForSuffix()
|
|||||||
{
|
{
|
||||||
PeekableReadBufferCheckpoint checkpoint{buf};
|
PeekableReadBufferCheckpoint checkpoint{buf};
|
||||||
bool suffix_found = false;
|
bool suffix_found = false;
|
||||||
|
size_t last_successfully_parsed_idx = format_data_idx + 1;
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
suffix_found = tryReadPrefixOrSuffix<bool>(format_data_idx + 1, format.columnsCount());
|
suffix_found = tryReadPrefixOrSuffix<bool>(last_successfully_parsed_idx, format.columnsCount());
|
||||||
}
|
}
|
||||||
catch (const Exception & e)
|
catch (const Exception & e)
|
||||||
{
|
{
|
||||||
@ -273,8 +281,6 @@ bool TemplateRowInputFormat::checkForSuffix()
|
|||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// TODO better diagnostic in case of invalid suffix
|
|
||||||
|
|
||||||
if (unlikely(suffix_found))
|
if (unlikely(suffix_found))
|
||||||
{
|
{
|
||||||
skipSpaces();
|
skipSpaces();
|
||||||
@ -288,6 +294,24 @@ bool TemplateRowInputFormat::checkForSuffix()
|
|||||||
|
|
||||||
bool TemplateRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out)
|
bool TemplateRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out)
|
||||||
{
|
{
|
||||||
|
out << "Suffix does not match: ";
|
||||||
|
size_t last_successfully_parsed_idx = format_data_idx + 1;
|
||||||
|
bool catched = false;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
tryReadPrefixOrSuffix<void>(last_successfully_parsed_idx, format.columnsCount());
|
||||||
|
}
|
||||||
|
catch (Exception & e)
|
||||||
|
{
|
||||||
|
out << e.message() << " Near column " << last_successfully_parsed_idx;
|
||||||
|
catched = true;
|
||||||
|
}
|
||||||
|
if (!catched)
|
||||||
|
out << " There is some data after suffix (EOF expected). ";
|
||||||
|
out << " Format string (from format_schema): \n" << format.dump() << "\n";
|
||||||
|
out << "Trying to parse next row, because suffix does not match:\n";
|
||||||
|
|
||||||
|
out << "Using format string (from format_schema_rows): " << row_format.dump() << "\n";
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
if (likely(row_num != 1))
|
if (likely(row_num != 1))
|
||||||
|
@ -33,7 +33,7 @@ private:
|
|||||||
inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(buf); }
|
inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(buf); }
|
||||||
|
|
||||||
template <typename ReturnType = void>
|
template <typename ReturnType = void>
|
||||||
ReturnType tryReadPrefixOrSuffix(size_t input_part_beg, size_t input_part_end);
|
ReturnType tryReadPrefixOrSuffix(size_t & input_part_beg, size_t input_part_end);
|
||||||
bool checkForSuffix();
|
bool checkForSuffix();
|
||||||
[[noreturn]] void throwUnexpectedEof();
|
[[noreturn]] void throwUnexpectedEof();
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user