mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Merge pull request #43925 from jh0x/improve-number-separator
Fixes https://github.com/ClickHouse/ClickHouse/issues/28967
This commit is contained in:
commit
ef45590413
@ -156,6 +156,20 @@ inline bool isValidIdentifier(std::string_view str)
|
||||
&& !(str.size() == strlen("null") && 0 == strncasecmp(str.data(), "null", strlen("null")));
|
||||
}
|
||||
|
||||
|
||||
inline bool isNumberSeparator(bool is_start_of_block, bool is_hex, const char * pos, const char * end)
|
||||
{
|
||||
if (*pos != '_')
|
||||
return false;
|
||||
if (is_start_of_block && *pos == '_')
|
||||
return false; // e.g. _123, 12e_3
|
||||
if (pos + 1 < end && !(is_hex ? isHexDigit(pos[1]) : isNumericASCII(pos[1])))
|
||||
return false; // e.g. 1__2, 1_., 1_e, 1_p, 1_;
|
||||
if (pos + 1 == end)
|
||||
return false; // e.g. 12_
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Works assuming isAlphaASCII.
|
||||
inline char toLowerIfAlphaASCII(char c)
|
||||
{
|
||||
|
@ -830,21 +830,65 @@ bool ParserNumber::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
if (!pos.isValid())
|
||||
return false;
|
||||
|
||||
/** Maximum length of number. 319 symbols is enough to write maximum double in decimal form.
|
||||
* Copy is needed to use strto* functions, which require 0-terminated string.
|
||||
*/
|
||||
static constexpr size_t MAX_LENGTH_OF_NUMBER = 319;
|
||||
auto try_read_float = [&](const char * it, const char * end)
|
||||
{
|
||||
char * str_end;
|
||||
errno = 0; /// Functions strto* don't clear errno.
|
||||
Float64 float_value = std::strtod(it, &str_end);
|
||||
if (str_end == end && errno != ERANGE)
|
||||
{
|
||||
if (float_value < 0)
|
||||
throw Exception("Logical error: token number cannot begin with minus, but parsed float number is less than zero.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (pos->size() > MAX_LENGTH_OF_NUMBER)
|
||||
if (negative)
|
||||
float_value = -float_value;
|
||||
|
||||
res = float_value;
|
||||
|
||||
auto literal = std::make_shared<ASTLiteral>(res);
|
||||
literal->begin = literal_begin;
|
||||
literal->end = ++pos;
|
||||
node = literal;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
expected.add(pos, "number");
|
||||
return false;
|
||||
};
|
||||
|
||||
/// NaN and Inf
|
||||
if (pos->type == TokenType::BareWord)
|
||||
{
|
||||
return try_read_float(pos->begin, pos->end);
|
||||
}
|
||||
|
||||
if (pos->type != TokenType::Number)
|
||||
{
|
||||
expected.add(pos, "number");
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Maximum length of number. 319 symbols is enough to write maximum double in decimal form.
|
||||
* Copy is needed to use strto* functions, which require 0-terminated string.
|
||||
*/
|
||||
static constexpr size_t MAX_LENGTH_OF_NUMBER = 319;
|
||||
|
||||
char buf[MAX_LENGTH_OF_NUMBER + 1];
|
||||
|
||||
size_t size = pos->size();
|
||||
memcpy(buf, pos->begin, size);
|
||||
size_t buf_size = 0;
|
||||
for (const auto * it = pos->begin; it != pos->end; ++it)
|
||||
{
|
||||
if (*it != '_')
|
||||
buf[buf_size++] = *it;
|
||||
if (unlikely(buf_size > MAX_LENGTH_OF_NUMBER))
|
||||
{
|
||||
expected.add(pos, "number");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
size_t size = buf_size;
|
||||
buf[size] = 0;
|
||||
char * start_pos = buf;
|
||||
|
||||
@ -915,29 +959,7 @@ bool ParserNumber::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
return true;
|
||||
}
|
||||
|
||||
char * pos_double = buf;
|
||||
errno = 0; /// Functions strto* don't clear errno.
|
||||
Float64 float_value = std::strtod(buf, &pos_double);
|
||||
if (pos_double == buf + pos->size() && errno != ERANGE)
|
||||
{
|
||||
if (float_value < 0)
|
||||
throw Exception("Logical error: token number cannot begin with minus, but parsed float number is less than zero.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (negative)
|
||||
float_value = -float_value;
|
||||
|
||||
res = float_value;
|
||||
|
||||
auto literal = std::make_shared<ASTLiteral>(res);
|
||||
literal->begin = literal_begin;
|
||||
literal->end = ++pos;
|
||||
node = literal;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
expected.add(pos, "number");
|
||||
return false;
|
||||
return try_read_float(buf, buf + buf_size);
|
||||
}
|
||||
|
||||
|
||||
|
@ -105,44 +105,71 @@ Token Lexer::nextTokenImpl()
|
||||
if (prev_significant_token_type == TokenType::Dot)
|
||||
{
|
||||
++pos;
|
||||
while (pos < end && isNumericASCII(*pos))
|
||||
while (pos < end && (isNumericASCII(*pos) || isNumberSeparator(false, false, pos, end)))
|
||||
++pos;
|
||||
}
|
||||
else
|
||||
{
|
||||
bool start_of_block = false;
|
||||
/// 0x, 0b
|
||||
bool hex = false;
|
||||
if (pos + 2 < end && *pos == '0' && (pos[1] == 'x' || pos[1] == 'b' || pos[1] == 'X' || pos[1] == 'B'))
|
||||
{
|
||||
bool is_valid = false;
|
||||
if (pos[1] == 'x' || pos[1] == 'X')
|
||||
hex = true;
|
||||
pos += 2;
|
||||
{
|
||||
if (isHexDigit(pos[2]))
|
||||
{
|
||||
hex = true;
|
||||
is_valid = true; // hex
|
||||
}
|
||||
}
|
||||
else if (pos[2] == '0' || pos[2] == '1')
|
||||
is_valid = true; // bin
|
||||
if (is_valid)
|
||||
{
|
||||
pos += 2;
|
||||
start_of_block = true;
|
||||
}
|
||||
else
|
||||
++pos; // consume the leading zero - could be an identifier
|
||||
}
|
||||
else
|
||||
++pos;
|
||||
|
||||
while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos)))
|
||||
while (pos < end && ((hex ? isHexDigit(*pos) : isNumericASCII(*pos)) || isNumberSeparator(start_of_block, hex, pos, end)))
|
||||
{
|
||||
++pos;
|
||||
start_of_block = false;
|
||||
}
|
||||
|
||||
/// decimal point
|
||||
if (pos < end && *pos == '.')
|
||||
{
|
||||
start_of_block = true;
|
||||
++pos;
|
||||
while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos)))
|
||||
while (pos < end && ((hex ? isHexDigit(*pos) : isNumericASCII(*pos)) || isNumberSeparator(start_of_block, hex, pos, end)))
|
||||
{
|
||||
++pos;
|
||||
start_of_block = false;
|
||||
}
|
||||
}
|
||||
|
||||
/// exponentiation (base 10 or base 2)
|
||||
if (pos + 1 < end && (hex ? (*pos == 'p' || *pos == 'P') : (*pos == 'e' || *pos == 'E')))
|
||||
{
|
||||
start_of_block = true;
|
||||
++pos;
|
||||
|
||||
/// sign of exponent. It is always decimal.
|
||||
if (pos + 1 < end && (*pos == '-' || *pos == '+'))
|
||||
++pos;
|
||||
|
||||
while (pos < end && isNumericASCII(*pos))
|
||||
while (pos < end && (isNumericASCII(*pos) || isNumberSeparator(start_of_block, false, pos, end)))
|
||||
{
|
||||
++pos;
|
||||
start_of_block = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -201,21 +228,29 @@ Token Lexer::nextTokenImpl()
|
||||
|| prev_significant_token_type == TokenType::Number))
|
||||
return Token(TokenType::Dot, token_begin, ++pos);
|
||||
|
||||
bool start_of_block = true;
|
||||
++pos;
|
||||
while (pos < end && isNumericASCII(*pos))
|
||||
while (pos < end && (isNumericASCII(*pos) || isNumberSeparator(start_of_block, false, pos, end)))
|
||||
{
|
||||
++pos;
|
||||
start_of_block = false;
|
||||
}
|
||||
|
||||
/// exponentiation
|
||||
if (pos + 1 < end && (*pos == 'e' || *pos == 'E'))
|
||||
{
|
||||
start_of_block = true;
|
||||
++pos;
|
||||
|
||||
/// sign of exponent
|
||||
if (pos + 1 < end && (*pos == '-' || *pos == '+'))
|
||||
++pos;
|
||||
|
||||
while (pos < end && isNumericASCII(*pos))
|
||||
while (pos < end && (isNumericASCII(*pos) || isNumberSeparator(start_of_block, false, pos, end)))
|
||||
{
|
||||
++pos;
|
||||
start_of_block = false;
|
||||
}
|
||||
}
|
||||
|
||||
return Token(TokenType::Number, token_begin, pos);
|
||||
|
40
tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect
Executable file
40
tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect
Executable file
@ -0,0 +1,40 @@
|
||||
#!/usr/bin/expect -f
|
||||
|
||||
set basedir [file dirname $argv0]
|
||||
set basename [file tail $argv0]
|
||||
exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
|
||||
|
||||
log_user 0
|
||||
set timeout 60
|
||||
match_max 100000
|
||||
set stty_init "rows 25 cols 120"
|
||||
|
||||
expect_after {
|
||||
eof { exp_continue }
|
||||
timeout { exit 1 }
|
||||
}
|
||||
|
||||
spawn bash
|
||||
send "source $basedir/../shell_config.sh\r"
|
||||
|
||||
send "\$CLICKHOUSE_CLIENT --query 'select 0b'\r"
|
||||
expect "DB::Exception: Missing columns: '0b' while processing query: 'SELECT `0b`', required columns: '0b'. (UNKNOWN_IDENTIFIER)"
|
||||
|
||||
send "\$CLICKHOUSE_CLIENT --query 'select 0b;'\r"
|
||||
expect "DB::Exception: Missing columns: '0b' while processing query: 'SELECT `0b`', required columns: '0b'. (UNKNOWN_IDENTIFIER)"
|
||||
|
||||
send "\$CLICKHOUSE_CLIENT --query 'select 0b ;'\r"
|
||||
expect "DB::Exception: Missing columns: '0b' while processing query: 'SELECT `0b`', required columns: '0b'. (UNKNOWN_IDENTIFIER)"
|
||||
|
||||
|
||||
send "\$CLICKHOUSE_CLIENT --query 'select 0x'\r"
|
||||
expect "DB::Exception: Missing columns: '0x' while processing query: 'SELECT `0x`', required columns: '0x'. (UNKNOWN_IDENTIFIER)"
|
||||
|
||||
send "\$CLICKHOUSE_CLIENT --query 'select 0x;'\r"
|
||||
expect "DB::Exception: Missing columns: '0x' while processing query: 'SELECT `0x`', required columns: '0x'. (UNKNOWN_IDENTIFIER)"
|
||||
|
||||
send "\$CLICKHOUSE_CLIENT --query 'select 0x ;'\r"
|
||||
expect "DB::Exception: Missing columns: '0x' while processing query: 'SELECT `0x`', required columns: '0x'. (UNKNOWN_IDENTIFIER)"
|
||||
|
||||
send "exit\r"
|
||||
expect eof
|
@ -0,0 +1,126 @@
|
||||
1234
|
||||
1234
|
||||
1234
|
||||
1234
|
||||
1234
|
||||
1234
|
||||
-1234
|
||||
-1234
|
||||
-1234
|
||||
12.34
|
||||
12.34
|
||||
12.34
|
||||
12.34
|
||||
-12.34
|
||||
-12.34
|
||||
-12.34
|
||||
-12.34
|
||||
3.4e22
|
||||
3.4e22
|
||||
3.4e22
|
||||
3.4e22
|
||||
3.4e22
|
||||
3.4e22
|
||||
3.4e22
|
||||
3.4e22
|
||||
3.4e-20
|
||||
3.4e-20
|
||||
3.4e-20
|
||||
3.4e-20
|
||||
-3.4e22
|
||||
-3.4e22
|
||||
-3.4e22
|
||||
-3.4e22
|
||||
-3.4e22
|
||||
-3.4e22
|
||||
-3.4e22
|
||||
-3.4e22
|
||||
-3.4e-20
|
||||
-3.4e-20
|
||||
-3.4e-20
|
||||
-3.4e-20
|
||||
1.34e21
|
||||
1.34e21
|
||||
1.34e21
|
||||
1.34e21
|
||||
1.34e21
|
||||
1.34e21
|
||||
1.34e21
|
||||
1.34e21
|
||||
1.34e-21
|
||||
1.34e-21
|
||||
1.34e-21
|
||||
1.34e-21
|
||||
-1.34e21
|
||||
-1.34e21
|
||||
-1.34e21
|
||||
-1.34e21
|
||||
-1.34e21
|
||||
-1.34e21
|
||||
-1.34e21
|
||||
-1.34e21
|
||||
-1.34e-21
|
||||
-1.34e-21
|
||||
-1.34e-21
|
||||
-1.34e-21
|
||||
-340000000000000000000
|
||||
-340000000000000000000
|
||||
-340000000000000000000
|
||||
-340000000000000000000
|
||||
-340000000000000000000
|
||||
-340000000000000000000
|
||||
-340000000000000000000
|
||||
-340000000000000000000
|
||||
-3.4e-22
|
||||
-3.4e-22
|
||||
-3.4e-22
|
||||
-3.4e-22
|
||||
nan
|
||||
nan
|
||||
inf
|
||||
inf
|
||||
-inf
|
||||
inf
|
||||
inf
|
||||
-inf
|
||||
inf
|
||||
inf
|
||||
-inf
|
||||
15
|
||||
15
|
||||
15
|
||||
-15
|
||||
-15
|
||||
-15
|
||||
4660
|
||||
4660
|
||||
4660
|
||||
-4660
|
||||
-4660
|
||||
-4660
|
||||
238
|
||||
238
|
||||
1.1376953125
|
||||
1.1376953125
|
||||
-1.1376953125
|
||||
-1.1376953125
|
||||
0.9296875
|
||||
0.9296875
|
||||
2.275390625
|
||||
2.275390625
|
||||
2.275390625
|
||||
2.275390625
|
||||
2.275390625
|
||||
2.275390625
|
||||
0.56884765625
|
||||
0.56884765625
|
||||
0.56884765625
|
||||
-2.275390625
|
||||
-2.275390625
|
||||
-2.275390625
|
||||
-2.275390625
|
||||
-2.275390625
|
||||
-2.275390625
|
||||
-0.56884765625
|
||||
-0.56884765625
|
||||
-0.56884765625
|
@ -0,0 +1,154 @@
|
||||
SELECT 1234; -- Positive integer (+ implied)
|
||||
SELECT 1_234;
|
||||
SELECT 1_2_3_4;
|
||||
SELECT +1234; -- Positive integer (+ explicit)
|
||||
SELECT +1_234;
|
||||
SELECT +1_2_3_4;
|
||||
SELECT -1234; -- Negative integer
|
||||
SELECT -1_234;
|
||||
SELECT -1_2_3_4;
|
||||
SELECT 12.34; -- Positive floating point with . notation
|
||||
SELECT 12.3_4;
|
||||
SELECT 1_2.34;
|
||||
SELECT 1_2.3_4;
|
||||
SELECT -12.34; -- Negative floating point with . notation
|
||||
SELECT -12.3_4;
|
||||
SELECT -1_2.34;
|
||||
SELECT -1_2.3_4;
|
||||
SELECT 34e21; -- Positive floating point with positive scientific notation (+ implied)
|
||||
SELECT 3_4e21;
|
||||
SELECT 34e2_1;
|
||||
SELECT 3_4e2_1;
|
||||
SELECT 34e+21; -- Positive floating point with positive scientific notation (+ explicit)
|
||||
SELECT 3_4e+21;
|
||||
SELECT 34e+2_1;
|
||||
SELECT 3_4e+2_1;
|
||||
SELECT 34e-21; -- Positive floating point with negative scientific notation
|
||||
SELECT 3_4e-21;
|
||||
SELECT 34e-2_1;
|
||||
SELECT 3_4e-2_1;
|
||||
SELECT -34e21; -- Negative floating point with positive scientific notation (+ implied)
|
||||
SELECT -3_4e21;
|
||||
SELECT -34e2_1;
|
||||
SELECT -3_4e2_1;
|
||||
SELECT -34e+21; -- Negative floating point with positive scientific notation (+ explicit)
|
||||
SELECT -3_4e+21;
|
||||
SELECT -34e+2_1;
|
||||
SELECT -3_4e+2_1;
|
||||
SELECT -34e-21; -- Negative floating point with negative scientific notation
|
||||
SELECT -3_4e-21;
|
||||
SELECT -34e-2_1;
|
||||
SELECT -3_4e-2_1;
|
||||
SELECT 1.34e21; -- Positive floating point (with .) with positive scientific notation (+ implied)
|
||||
SELECT 1.3_4e21;
|
||||
SELECT 1.34e2_1;
|
||||
SELECT 1.3_4e2_1;
|
||||
SELECT 1.34e+21; -- Positive floating point (with .) with positive scientific notation (+ explicit)
|
||||
SELECT 1.3_4e+21;
|
||||
SELECT 1.34e+2_1;
|
||||
SELECT 1.3_4e+2_1;
|
||||
SELECT 1.34e-21; -- Positive floating point (with .) with negative scientific notation
|
||||
SELECT 1.3_4e-21;
|
||||
SELECT 1.34e-2_1;
|
||||
SELECT 1.3_4e-2_1;
|
||||
SELECT -1.34e21; -- Negative floating point (with .) with positive scientific notation (+ implied)
|
||||
SELECT -1.3_4e21;
|
||||
SELECT -1.34e2_1;
|
||||
SELECT -1.3_4e2_1;
|
||||
SELECT -1.34e+21; -- Negative floating point (with .) with positive scientific notation (+ explicit)
|
||||
SELECT -1.3_4e+21;
|
||||
SELECT -1.34e+2_1;
|
||||
SELECT -1.3_4e+2_1;
|
||||
SELECT -1.34e-21; -- Negative floating point (with .) with negative scientific notation
|
||||
SELECT -1.3_4e-21;
|
||||
SELECT -1.34e-2_1;
|
||||
SELECT -1.3_4e-2_1;
|
||||
SELECT -.34e21; -- Negative floating point (with .) with positive scientific notation (+ implied)
|
||||
SELECT -.3_4e21;
|
||||
SELECT -.34e2_1;
|
||||
SELECT -.3_4e2_1;
|
||||
SELECT -.34e+21; -- Negative floating point (with .) with positive scientific notation (+ explicit)
|
||||
SELECT -.3_4e+21;
|
||||
SELECT -.34e+2_1;
|
||||
SELECT -.3_4e+2_1;
|
||||
SELECT -.34e-21; -- Negative floating point (with .) with negative scientific notation
|
||||
SELECT -.3_4e-21;
|
||||
SELECT -.34e-2_1;
|
||||
SELECT -.3_4e-2_1;
|
||||
SELECT NaN; -- Specials
|
||||
SELECT nan;
|
||||
SELECT inf;
|
||||
SELECT +inf;
|
||||
SELECT -inf;
|
||||
SELECT Inf;
|
||||
SELECT +Inf;
|
||||
SELECT -Inf;
|
||||
SELECT INF;
|
||||
SELECT +INF;
|
||||
SELECT -INF;
|
||||
SELECT 0b1111; -- Binary
|
||||
SELECT 0b1_111;
|
||||
SELECT 0b1_1_1_1;
|
||||
SELECT -0b1111;
|
||||
SELECT -0b1_111;
|
||||
SELECT -0b1_1_1_1;
|
||||
SELECT 0x1234; -- Hex
|
||||
SELECT 0x1_234;
|
||||
SELECT 0x1_2_3_4;
|
||||
SELECT -0x1234;
|
||||
SELECT -0x1_234;
|
||||
SELECT -0x1_2_3_4;
|
||||
SELECT 0xee;
|
||||
SELECT 0xe_e;
|
||||
SELECT 0x1.234; -- Hex fractions
|
||||
SELECT 0x1.2_3_4;
|
||||
SELECT -0x1.234;
|
||||
SELECT -0x1.2_3_4;
|
||||
SELECT 0x0.ee;
|
||||
SELECT 0x0.e_e;
|
||||
SELECT 0x1.234p01; -- Hex scientific notation
|
||||
SELECT 0x1.2_34p01;
|
||||
SELECT 0x1.234p0_1;
|
||||
SELECT 0x1.234p+01;
|
||||
SELECT 0x1.2_34p+01;
|
||||
SELECT 0x1.2_34p+0_1;
|
||||
SELECT 0x1.234p-01;
|
||||
SELECT 0x1.2_34p-01;
|
||||
SELECT 0x1.2_34p-0_1;
|
||||
SELECT -0x1.234p01;
|
||||
SELECT -0x1.2_34p01;
|
||||
SELECT -0x1.2_34p0_1;
|
||||
SELECT -0x1.234p+01;
|
||||
SELECT -0x1.2_34p+01;
|
||||
SELECT -0x1.2_34p+0_1;
|
||||
SELECT -0x1.234p-01;
|
||||
SELECT -0x1.2_34p-01;
|
||||
SELECT -0x1.2_34p-0_1;
|
||||
|
||||
-- Things that are not a number
|
||||
|
||||
select _1000; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select _1000 FROM (SELECT 1 AS _1000) FORMAT Null;
|
||||
select -_1; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select -_1 FROM (SELECT -1 AS _1) FORMAT Null;
|
||||
select +_1; -- { clientError SYNTAX_ERROR }
|
||||
select 1__0; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select 1_; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select 1_ ; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select 10_; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select 1_e5; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select 1e_5; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select 1e5_; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select 1e_; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select 1_.; -- { clientError SYNTAX_ERROR }
|
||||
select 1e_1; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select 0_x2; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select 0x2_p2; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select 0x2p_2; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select 0x2p2_; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select 0b; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select 0b ; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select 0x; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select 0x ; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select 0x_; -- { serverError UNKNOWN_IDENTIFIER }
|
||||
select 0x_1; -- { serverError UNKNOWN_IDENTIFIER }
|
Loading…
Reference in New Issue
Block a user