better cast operator

This commit is contained in:
Anton Popov 2021-05-06 21:21:10 +03:00
parent d4ea53f642
commit e51cd90c6f
14 changed files with 187 additions and 193 deletions

View File

@ -14,6 +14,7 @@
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/FieldToDataType.h>
#include <DataTypes/DataTypeFactory.h>
#include <Columns/ColumnSet.h>
#include <Columns/ColumnConst.h>
@ -1061,10 +1062,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */,
Data & data)
{
DataTypePtr type = literal.data_type_hint
? literal.data_type_hint
: applyVisitor(FieldToDataType(), literal.value);
DataTypePtr type = applyVisitor(FieldToDataType(), literal.value);
const auto value = convertFieldToType(literal.value, *type);
// FIXME why do we have a second pass with a clean sample block over the same

View File

@ -76,8 +76,6 @@ void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const
void ASTLiteral::formatImplWithoutAlias(const FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const
{
settings.ostr << applyVisitor(FieldVisitorToString(), value);
if (data_type_hint)
settings.ostr << "::" << data_type_hint->getName();
}
}

View File

@ -34,9 +34,6 @@ public:
*/
String unique_column_name;
/// Hint for data type of literal, that can be set by operator "::".
DataTypePtr data_type_hint;
/** Get the text that identifies this element. */
String getID(char delim) const override { return "Literal" + (delim + applyVisitor(FieldVisitorDump(), value)); }

View File

@ -65,17 +65,4 @@ bool ParserKeyword::parseImpl(Pos & pos, ASTPtr & /*node*/, Expected & expected)
return true;
}
bool ParserDoubleColon::parseImpl(Pos & pos, ASTPtr & /*node*/, Expected & expected)
{
/// Do not move position if only one colon mathced.
Pos begin = pos;
if (parser_colon.ignore(begin, expected) && parser_colon.ignore(begin, expected))
{
pos = begin;
return true;
}
return false;
}
}

View File

@ -56,15 +56,4 @@ public:
bool parseImpl(Pos & /*pos*/, ASTPtr & /*node*/, Expected & /*expected*/) override { return true; }
};
class ParserDoubleColon : public IParserBase
{
public:
const char * getName() const override { return "double colon"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
private:
ParserToken parser_colon{TokenType::Colon};
};
}

View File

@ -814,92 +814,77 @@ ASTPtr createFunctionCast(const ASTPtr & expr_ast, const ASTPtr & type_ast)
return func_node;
}
template <TokenType ...tokens>
static bool isOneOf(TokenType token)
{
return ((token == tokens) || ...);
}
bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
/// Numbers, strings, tuples and arrays of them.
/// Types, that doesn't have representation in Field, e.g.: Date, DateTime,
/// can't be read from text as literals.
auto is_good_token = [](const auto & token)
{
return token == TokenType::Number
|| token == TokenType::StringLiteral
|| token == TokenType::Comma
|| token == TokenType::OpeningSquareBracket
|| token == TokenType::ClosingSquareBracket
|| token == TokenType::OpeningRoundBracket
|| token == TokenType::ClosingRoundBracket;
};
auto is_number_or_string = [](const auto & type) { return isNumber(type) || isStringOrFixedString(type); };
auto is_good_type = [&is_number_or_string](const auto & type)
{
if (is_number_or_string(type))
return true;
if (const auto * type_array = typeid_cast<const DataTypeArray *>(type.get()))
return is_number_or_string(type_array->getNestedType());
if (const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type.get()))
{
const auto & elems = type_tuple->getElements();
return std::all_of(elems.begin(), elems.end(), [&](const auto & elem) { return is_number_or_string(elem); });
}
return false;
};
/// Parse numbers (including decimals), strings and arrays of them.
const char * data_begin = pos->begin;
bool is_number_literal = pos->type == TokenType::Number;
bool is_string_literal = pos->type == TokenType::StringLiteral;
size_t skipped_tokens = 0;
while (pos.isValid() && is_good_token(pos->type))
if (pos->type == TokenType::Number || is_string_literal)
{
++pos;
++skipped_tokens;
}
else if (pos->type == TokenType::OpeningSquareBracket)
{
TokenType last_token = TokenType::OpeningSquareBracket;
while (pos.isValid())
{
if (pos->type == TokenType::OpeningSquareBracket)
{
if (!isOneOf<TokenType::OpeningSquareBracket, TokenType::Comma>(last_token))
return false;
}
else if (pos->type == TokenType::ClosingSquareBracket)
{
if (last_token == TokenType::Comma)
return false;
}
else if (pos->type == TokenType::Comma)
{
if (isOneOf<TokenType::OpeningSquareBracket, TokenType::Comma>(last_token))
return false;
}
else if (isOneOf<TokenType::Number, TokenType::StringLiteral>(pos->type))
{
if (!isOneOf<TokenType::OpeningSquareBracket, TokenType::Comma>(last_token))
return false;
}
else
{
break;
}
if (!pos.isValid())
return false;
if ((is_string_literal || is_number_literal) && skipped_tokens != 1)
return false;
last_token = pos->type;
++pos;
}
}
ASTPtr type_ast;
const char * data_end = pos->begin;
if (ParserDoubleColon().ignore(pos, expected)
if (ParserToken(TokenType::DoubleColon).ignore(pos, expected)
&& ParserDataType().parse(pos, type_ast, expected))
{
auto type = DataTypeFactory::instance().get(type_ast);
if (!is_good_type(type))
return false;
/// Allow to parse numbers only from number literals,
/// because SerializationNumber uses unsafe version of int deserialization
/// and it won't throw an exception in case of error.
if (isNumber(type) && !is_number_literal)
return false;
ReadBufferFromMemory buf(data_begin, data_end - data_begin);
auto column = type->createColumn();
try
String s;
size_t data_size = data_end - data_begin;
if (is_string_literal)
{
if (is_string_literal)
type->getDefaultSerialization()->deserializeTextQuoted(*column, buf, {});
else
type->getDefaultSerialization()->deserializeTextEscaped(*column, buf, {});
}
catch (const Exception &)
{
expected.add(pos, "literal with operator ::");
return false;
ReadBufferFromMemory buf(data_begin, data_size);
readQuotedStringWithSQLStyle(s, buf);
assert(buf.count() == data_size);
}
else
s = String(data_begin, data_size);
auto literal = std::make_shared<ASTLiteral>((*column)[0]);
literal->data_type_hint = type;
node = std::move(literal);
auto literal = std::make_shared<ASTLiteral>(std::move(s));
node = createFunctionCast(literal, type_ast);
return true;
}

View File

@ -564,7 +564,7 @@ bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect
return false;
ASTPtr type_ast;
if (ParserDoubleColon().ignore(pos, expected)
if (ParserToken(TokenType::DoubleColon).ignore(pos, expected)
&& ParserDataType().parse(pos, type_ast, expected))
{
node = createFunctionCast(expr_ast, type_ast);

View File

@ -316,7 +316,12 @@ Token Lexer::nextTokenImpl()
case '?':
return Token(TokenType::QuestionMark, token_begin, ++pos);
case ':':
return Token(TokenType::Colon, token_begin, ++pos);
{
++pos;
if (pos < end && *pos == ':')
return Token(TokenType::DoubleColon, token_begin, ++pos);
return Token(TokenType::Colon, token_begin, pos);
}
case '|':
{
++pos;

View File

@ -40,6 +40,7 @@ namespace DB
M(Arrow) /** ->. Should be distinguished from minus operator. */ \
M(QuestionMark) \
M(Colon) \
M(DoubleColon) \
M(Equals) \
M(NotEquals) \
M(Less) \

File diff suppressed because one or more lines are too long

View File

@ -1,55 +1,34 @@
0.10000000000000000000000000000000000000 Decimal(38, 38)
SELECT
\'0.10000000000000000000000000000000000000\'::Decimal(38, 38) AS c,
toTypeName(c)
[1,2,3] Array(UInt32)
SELECT
[1, 2, 3]::Array(UInt32) AS c,
toTypeName(c)
abc FixedString(3)
SELECT
\'abc\'::FixedString(3) AS c,
toTypeName(c)
123 String
SELECT
\'123\'::String AS c,
toTypeName(c)
1 Int8
SELECT
1::Int8 AS c,
toTypeName(c)
[1,2,3] Array(UInt32)
SELECT
CAST([1, 1 + 1, 1 + 2], \'Array(UInt32)\') AS c,
toTypeName(c)
2010-10-10 Date
SELECT
CAST(\'2010-10-10\', \'Date\') AS c,
toTypeName(c)
2010-10-10 00:00:00 DateTime
SELECT
CAST(\'2010-10-10\', \'DateTime\') AS c,
toTypeName(c)
['2010-10-10','2010-10-10'] Array(Date)
SELECT CAST([\'2010-10-10\', \'2010-10-10\'], \'Array(Date)\')
3 UInt32
SELECT
CAST(1 + 2, \'UInt32\') AS c,
toTypeName(c)
0.5 Float64
SELECT
CAST(\'0.1000\'::Decimal(4, 4) * 5, \'Float64\') AS c,
toTypeName(c)
0.10000000000000000000000000000000000000
SELECT CAST(\'0.1\', \'Decimal(38, 38)\') AS c
[1,2,3]
SELECT CAST(\'[1, 2, 3]\', \'Array(UInt32)\') AS c
abc
SELECT CAST(\'abc\', \'FixedString(3)\') AS c
123
SELECT CAST(\'123\', \'String\') AS c
1
SELECT CAST(\'1\', \'Int8\') AS c
[1,2,3]
SELECT CAST([1, 1 + 1, 1 + 2], \'Array(UInt32)\') AS c
2010-10-10
SELECT CAST(\'2010-10-10\', \'Date\') AS c
2010-10-10 00:00:00
SELECT CAST(\'2010-10-10\', \'DateTime\') AS c
['2010-10-10','2010-10-10']
SELECT CAST(\'[\\\'2010-10-10\\\', \\\'2010-10-10\\\']\', \'Array(Date)\')
3
SELECT CAST(1 + 2, \'UInt32\') AS c
0.5
SELECT CAST(CAST(\'0.1\', \'Decimal(4, 4)\') * 5, \'Float64\') AS c
0 UInt8
SELECT
CAST(number, \'UInt8\') AS c,
toTypeName(c)
FROM numbers(1)
1970-01-11 Date
SELECT
CAST((((0 + 1) + 2) + 3) + 4, \'Date\') AS c,
toTypeName(c)
0.6000 Decimal(4, 4)
SELECT
CAST((\'0.1000\'::Decimal(4, 4) + \'0.2000\'::Decimal(4, 4)) + \'0.3000\'::Decimal(4, 4), \'Decimal(4, 4)\') AS c,
toTypeName(c)
1970-01-11
SELECT CAST((((0 + 1) + 2) + 3) + 4, \'Date\') AS c
0.6000
SELECT CAST((CAST(\'0.1\', \'Decimal(4, 4)\') + CAST(\'0.2\', \'Decimal(4, 4)\')) + CAST(\'0.3\', \'Decimal(4, 4)\'), \'Decimal(4, 4)\') AS c
[1]
[[1,2,3],[],[1]]
[[],[]]

View File

@ -1,41 +1,45 @@
SELECT 0.1::Decimal(38, 38) AS c, toTypeName(c);
EXPLAIN SYNTAX SELECT 0.1::Decimal(38, 38) AS c, toTypeName(c);
SELECT 0.1::Decimal(38, 38) AS c;
EXPLAIN SYNTAX SELECT 0.1::Decimal(38, 38) AS c;
SELECT [1, 2, 3]::Array(UInt32) AS c, toTypeName(c);
EXPLAIN SYNTAX SELECT [1, 2, 3]::Array(UInt32) AS c, toTypeName(c);
SELECT [1, 2, 3]::Array(UInt32) AS c;
EXPLAIN SYNTAX SELECT [1, 2, 3]::Array(UInt32) AS c;
SELECT 'abc'::FixedString(3) AS c, toTypeName(c);
EXPLAIN SYNTAX SELECT 'abc'::FixedString(3) AS c, toTypeName(c);
SELECT 'abc'::FixedString(3) AS c;
EXPLAIN SYNTAX SELECT 'abc'::FixedString(3) AS c;
SELECT 123::String AS c, toTypeName(c);
EXPLAIN SYNTAX SELECT 123::String AS c, toTypeName(c);
SELECT 123::String AS c;
EXPLAIN SYNTAX SELECT 123::String AS c;
SELECT 1::Int8 AS c, toTypeName(c);
EXPLAIN SYNTAX SELECT 1::Int8 AS c, toTypeName(c);
SELECT 1::Int8 AS c;
EXPLAIN SYNTAX SELECT 1::Int8 AS c;
SELECT [1, 1 + 1, 1 + 2]::Array(UInt32) AS c, toTypeName(c);
EXPLAIN SYNTAX SELECT [1, 1 + 1, 1 + 2]::Array(UInt32) AS c, toTypeName(c);
SELECT [1, 1 + 1, 1 + 2]::Array(UInt32) AS c;
EXPLAIN SYNTAX SELECT [1, 1 + 1, 1 + 2]::Array(UInt32) AS c;
SELECT '2010-10-10'::Date AS c, toTypeName(c);
EXPLAIN SYNTAX SELECT '2010-10-10'::Date AS c, toTypeName(c);
SELECT '2010-10-10'::Date AS c;
EXPLAIN SYNTAX SELECT '2010-10-10'::Date AS c;
SELECT '2010-10-10'::DateTime AS c, toTypeName(c);
EXPLAIN SYNTAX SELECT '2010-10-10'::DateTime AS c, toTypeName(c);
SELECT '2010-10-10'::DateTime AS c;
EXPLAIN SYNTAX SELECT '2010-10-10'::DateTime AS c;
SELECT ['2010-10-10', '2010-10-10']::Array(Date) AS c, toTypeName(c);
SELECT ['2010-10-10', '2010-10-10']::Array(Date) AS c;
EXPLAIN SYNTAX SELECT ['2010-10-10', '2010-10-10']::Array(Date);
SELECT (1 + 2)::UInt32 AS c, toTypeName(c);
EXPLAIN SYNTAX SELECT (1 + 2)::UInt32 AS c, toTypeName(c);
SELECT (1 + 2)::UInt32 AS c;
EXPLAIN SYNTAX SELECT (1 + 2)::UInt32 AS c;
SELECT (0.1::Decimal(4, 4) * 5)::Float64 AS c, toTypeName(c);
EXPLAIN SYNTAX SELECT (0.1::Decimal(4, 4) * 5)::Float64 AS c, toTypeName(c);
SELECT (0.1::Decimal(4, 4) * 5)::Float64 AS c;
EXPLAIN SYNTAX SELECT (0.1::Decimal(4, 4) * 5)::Float64 AS c;
SELECT number::UInt8 AS c, toTypeName(c) FROM numbers(1);
EXPLAIN SYNTAX SELECT number::UInt8 AS c, toTypeName(c) FROM numbers(1);
SELECT (0 + 1 + 2 + 3 + 4)::Date AS c, toTypeName(c);
EXPLAIN SYNTAX SELECT (0 + 1 + 2 + 3 + 4)::Date AS c, toTypeName(c);
SELECT (0 + 1 + 2 + 3 + 4)::Date AS c;
EXPLAIN SYNTAX SELECT (0 + 1 + 2 + 3 + 4)::Date AS c;
SELECT (0.1::Decimal(4, 4) + 0.2::Decimal(4, 4) + 0.3::Decimal(4, 4))::Decimal(4, 4) AS c, toTypeName(c);
EXPLAIN SYNTAX SELECT (0.1::Decimal(4, 4) + 0.2::Decimal(4, 4) + 0.3::Decimal(4, 4))::Decimal(4, 4) AS c, toTypeName(c);
SELECT (0.1::Decimal(4, 4) + 0.2::Decimal(4, 4) + 0.3::Decimal(4, 4))::Decimal(4, 4) AS c;
EXPLAIN SYNTAX SELECT (0.1::Decimal(4, 4) + 0.2::Decimal(4, 4) + 0.3::Decimal(4, 4))::Decimal(4, 4) AS c;
SELECT [[1][1]]::Array(UInt32);
SELECT [[1, 2, 3], [], [1]]::Array(Array(UInt32));
SELECT [[], []]::Array(Array(UInt32));

View File

@ -0,0 +1,10 @@
Syntax error
Syntax error
Syntax error
Syntax error
Syntax error
Syntax error
Syntax error
Syntax error
Syntax error
Code: 6

View File

@ -0,0 +1,17 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT --query="SELECT [1,]::Array(UInt8)" 2>&1 | grep -o -m1 'Syntax error'
$CLICKHOUSE_CLIENT --query="SELECT [1, 2]]::Array(UInt8)" 2>&1 | grep -o -m1 'Syntax error'
$CLICKHOUSE_CLIENT --query="SELECT [[1, 2]::Array(UInt8)" 2>&1 | grep -o -m1 'Syntax error'
$CLICKHOUSE_CLIENT --query="SELECT [[1, 2],, []]::Array(Array(UInt8))" 2>&1 | grep -o -m1 'Syntax error'
$CLICKHOUSE_CLIENT --query="SELECT [[1, 2][]]::Array(Array(UInt8))" 2>&1 | grep -o -m1 'Syntax error'
$CLICKHOUSE_CLIENT --query="SELECT [1,,2]::Array(UInt8)" 2>&1 | grep -o -m1 'Syntax error'
$CLICKHOUSE_CLIENT --query="SELECT [1 2]::Array(UInt8)" 2>&1 | grep -o -m1 'Syntax error'
$CLICKHOUSE_CLIENT --query="SELECT 1 4::UInt32" 2>&1 | grep -o 'Syntax error'
$CLICKHOUSE_CLIENT --query="SELECT '1' '4'::UInt32" 2>&1 | grep -o -m1 'Syntax error'
$CLICKHOUSE_CLIENT --query="SELECT '1''4'::UInt32" 2>&1 | grep -o -m1 'Code: 6'