ClickHouse/dbms/src/DataStreams/ValuesRowInputStream.cpp

165 lines
4.9 KiB
C++
Raw Normal View History

2011-10-30 05:19:41 +00:00
#include <DB/IO/ReadHelpers.h>
#include <DB/Interpreters/evaluateConstantExpression.h>
#include <DB/Interpreters/convertFieldToType.h>
#include <DB/Parsers/ExpressionListParsers.h>
2011-10-30 05:19:41 +00:00
#include <DB/DataStreams/ValuesRowInputStream.h>
#include <DB/DataTypes/DataTypeArray.h>
#include <DB/Core/FieldVisitors.h>
#include <DB/Core/Block.h>
2011-10-30 05:19:41 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
extern const int CANNOT_PARSE_QUOTED_STRING;
extern const int CANNOT_PARSE_DATE;
extern const int CANNOT_PARSE_DATETIME;
extern const int CANNOT_READ_ARRAY_FROM_TEXT;
extern const int CANNOT_PARSE_DATE;
extern const int SYNTAX_ERROR;
extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE;
}
2011-10-30 05:19:41 +00:00
ValuesRowInputStream::ValuesRowInputStream(ReadBuffer & istr_, const Context & context_, bool interpret_expressions_)
: istr(istr_), context(context_), interpret_expressions(interpret_expressions_)
2011-10-30 05:19:41 +00:00
{
/// In this format, BOM at beginning of stream cannot be confused with value, so it is safe to skip it.
skipBOMIfExists(istr);
2011-10-30 05:19:41 +00:00
}
bool ValuesRowInputStream::read(Block & block)
2011-10-30 05:19:41 +00:00
{
size_t size = block.columns();
2011-10-30 05:19:41 +00:00
skipWhitespaceIfAny(istr);
2012-05-21 06:49:05 +00:00
if (istr.eof() || *istr.position() == ';')
return false;
2012-05-21 06:49:05 +00:00
2017-03-25 20:12:56 +00:00
/** Typically, this is the usual format for streaming parsing.
* But as an exception, it also supports processing arbitrary expressions instead of values.
* This is very inefficient. But if there are no expressions, then there is no overhead.
*/
ParserExpressionWithOptionalAlias parser(false);
assertChar('(', istr);
2011-10-30 05:19:41 +00:00
for (size_t i = 0; i < size; ++i)
{
skipWhitespaceIfAny(istr);
char * prev_istr_position = istr.position();
size_t prev_istr_bytes = istr.count() - istr.offset();
auto & col = block.getByPosition(i);
bool rollback_on_exception = false;
try
{
col.type.get()->deserializeTextQuoted(*col.column.get(), istr);
rollback_on_exception = true;
skipWhitespaceIfAny(istr);
if (i != size - 1)
assertChar(',', istr);
else
assertChar(')', istr);
}
catch (const Exception & e)
{
if (!interpret_expressions)
throw;
2017-03-25 20:12:56 +00:00
/** The normal streaming parser could not parse the value.
* Let's try to parse it with a SQL parser as a constant expression.
* This is an exceptional case.
*/
if (e.code() == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED
|| e.code() == ErrorCodes::CANNOT_PARSE_QUOTED_STRING
|| e.code() == ErrorCodes::CANNOT_PARSE_DATE
|| e.code() == ErrorCodes::CANNOT_PARSE_DATETIME
Text formats allow to skip errors (#407) * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Added test [#CLICKHOUSE-2778].
2017-01-27 04:29:47 +00:00
|| e.code() == ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT)
{
2017-03-25 20:12:56 +00:00
/// TODO Performance if the expression does not fit entirely to the end of the buffer.
2017-03-25 20:12:56 +00:00
/// If the beginning of the value is no longer in the buffer.
if (istr.count() - istr.offset() != prev_istr_bytes)
throw;
if (rollback_on_exception)
col.column.get()->popBack(1);
IDataType & type = *block.safeGetByPosition(i).type;
IParser::Pos pos = prev_istr_position;
Expected expected = "";
IParser::Pos max_parsed_pos = pos;
ASTPtr ast;
if (!parser.parse(pos, istr.buffer().end(), ast, max_parsed_pos, expected))
throw Exception("Cannot parse expression of type " + type.getName() + " here: "
+ String(prev_istr_position, std::min(SHOW_CHARS_ON_SYNTAX_ERROR, istr.buffer().end() - prev_istr_position)),
ErrorCodes::SYNTAX_ERROR);
istr.position() = const_cast<char *>(max_parsed_pos);
std::pair<Field, DataTypePtr> value_raw = evaluateConstantExpression(ast, context);
Field value = convertFieldToType(value_raw.first, type, value_raw.second.get());
if (value.isNull())
{
/// Check that we are indeed allowed to insert a NULL.
bool is_null_allowed = false;
if (type.isNullable())
is_null_allowed = true;
else
{
/// NOTE: For now we support only one level of null values, i.e.
/// there are not yet such things as Array(Nullable(Array(Nullable(T))).
/// Therefore the code below is valid within the current limitations.
const auto array_type = typeid_cast<const DataTypeArray *>(&type);
if (array_type != nullptr)
{
const auto & nested_type = array_type->getMostNestedType();
if (nested_type->isNullable())
is_null_allowed = true;
}
}
if (!is_null_allowed)
Squashed commit of the following: commit c567d4e1fe8d54e6363e47548f1e3927cc5ee78f Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 20:35:01 2017 +0300 Style [#METR-2944]. commit 26bf3e1228e03f46c29b13edb0e3770bd453e3f1 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 20:33:11 2017 +0300 Miscellaneous [#METR-2944]. commit eb946f4c6fd4bb0e9e5c7fb1468d36be3dfca5a5 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 20:30:19 2017 +0300 Miscellaneous [#METR-2944]. commit 78c867a14744b5af2db8d37caf7804fc2057ea51 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 20:11:41 2017 +0300 Miscellaneous [#METR-2944]. commit 6604c5c83cfcedc81c8da4da026711920d5963b4 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 19:56:15 2017 +0300 Miscellaneous [#METR-2944]. commit 23fbf05c1d4bead636458ec21b05a101b1152e33 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 19:47:52 2017 +0300 Miscellaneous [#METR-2944]. commit 98772faf11a7d450d473f7fa84f8a9ae24f7b59b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 19:46:05 2017 +0300 Miscellaneous [#METR-2944]. commit 3dc636ab9f9359dbeac2e8d997ae563d4ca147e2 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 19:39:46 2017 +0300 Miscellaneous [#METR-2944]. commit 3e16aee95482f374ee3eda1a4dbe9ba5cdce02e8 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 19:38:03 2017 +0300 Miscellaneous [#METR-2944]. commit ae7e7e90eb1f82bd0fe0f887708d08b9e7755612 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 19:34:15 2017 +0300 Miscellaneous [#METR-2944].
2017-01-06 17:41:19 +00:00
throw Exception{"Expression returns value " + applyVisitor(FieldVisitorToString(), value)
+ ", that is out of range of type " + type.getName()
+ ", at: " + String(prev_istr_position, std::min(SHOW_CHARS_ON_SYNTAX_ERROR, istr.buffer().end() - prev_istr_position)),
ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE};
}
col.column->insert(value);
skipWhitespaceIfAny(istr);
if (i != size - 1)
assertChar(',', istr);
else
assertChar(')', istr);
}
else
throw;
}
}
2011-10-30 05:19:41 +00:00
skipWhitespaceIfAny(istr);
if (!istr.eof() && *istr.position() == ',')
++istr.position();
return true;
2011-10-30 05:19:41 +00:00
}
}