mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
fix
This commit is contained in:
parent
c4ff7eec01
commit
9d2801e0d8
@ -44,8 +44,9 @@ public:
|
|||||||
"Function " + getName() + " requires at most two arguments: the size of resulting string and optional disambiguation tag",
|
"Function " + getName() + " requires at most two arguments: the size of resulting string and optional disambiguation tag",
|
||||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||||
|
|
||||||
if (!isUnsignedInteger(*arguments[0]))
|
const IDataType & length_type = *arguments[0];
|
||||||
throw Exception("First argument for function " + getName() + " must be unsigned integer", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
if (!isNumber(length_type))
|
||||||
|
throw Exception("First argument of function " + getName() + " must have numeric type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||||
|
|
||||||
return std::make_shared<DataTypeString>();
|
return std::make_shared<DataTypeString>();
|
||||||
}
|
}
|
||||||
|
@ -38,8 +38,8 @@ public:
|
|||||||
|
|
||||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||||
{
|
{
|
||||||
if (!isUnsignedInteger(*arguments[0]))
|
if (!isNumber(*arguments[0]))
|
||||||
throw Exception("First argument for function " + getName() + " must be unsigned integer", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
throw Exception("First argument of function " + getName() + " must have numeric type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||||
|
|
||||||
return std::make_shared<DataTypeString>();
|
return std::make_shared<DataTypeString>();
|
||||||
}
|
}
|
||||||
@ -86,7 +86,7 @@ public:
|
|||||||
/// Generate highest byte in [0, 6]
|
/// Generate highest byte in [0, 6]
|
||||||
/// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
|
/// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
|
||||||
UInt32 code_point = (rand >> 16) * 7u;
|
UInt32 code_point = (rand >> 16) * 7u;
|
||||||
code_point &= ((-1) ^ 0xFFFF);
|
code_point &= ~0xFFFF;
|
||||||
code_point |= rand & 0xFFFF; // and other bytes obtaining in a simple way
|
code_point |= rand & 0xFFFF; // and other bytes obtaining in a simple way
|
||||||
|
|
||||||
if (code_point >= 0x40000)
|
if (code_point >= 0x40000)
|
||||||
@ -97,14 +97,9 @@ public:
|
|||||||
|
|
||||||
if (0xD7FF < code_point && code_point < 0xE000) // this range will not be valid in isValidUTF8
|
if (0xD7FF < code_point && code_point < 0xE000) // this range will not be valid in isValidUTF8
|
||||||
{
|
{
|
||||||
/* TODO(reviewer) choose with @axolm variant:
|
|
||||||
* 1. Not to do this if (isValidUTF8 can return 0)
|
|
||||||
* 2. just return 0
|
|
||||||
* 3. capture rng in lambda and do while(code_point is bad) { recalc... }
|
|
||||||
* 4. ...
|
|
||||||
* */
|
|
||||||
return 0u;
|
return 0u;
|
||||||
}
|
}
|
||||||
|
|
||||||
return code_point;
|
return code_point;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -114,7 +109,7 @@ public:
|
|||||||
size_t utf8_len = length_column.getUInt(row_num);
|
size_t utf8_len = length_column.getUInt(row_num);
|
||||||
auto * pos = data_to.data() + offset;
|
auto * pos = data_to.data() + offset;
|
||||||
|
|
||||||
size_t last_writed_bytes = 0;
|
size_t last_writen_bytes = 0;
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
for (; i < utf8_len; i += 2)
|
for (; i < utf8_len; i += 2)
|
||||||
{
|
{
|
||||||
@ -125,13 +120,13 @@ public:
|
|||||||
|
|
||||||
/// We have padding in column buffers that we can overwrite.
|
/// We have padding in column buffers that we can overwrite.
|
||||||
pos += UTF8::convert(*reinterpret_cast<int *>(&code_point1), pos, sizeof(int));
|
pos += UTF8::convert(*reinterpret_cast<int *>(&code_point1), pos, sizeof(int));
|
||||||
last_writed_bytes = UTF8::convert(*reinterpret_cast<int *>(&code_point2), pos, sizeof(int));
|
last_writen_bytes = UTF8::convert(*reinterpret_cast<int *>(&code_point2), pos, sizeof(int));
|
||||||
pos += last_writed_bytes;
|
pos += last_writen_bytes;
|
||||||
}
|
}
|
||||||
offset = pos - data_to.data() + 1;
|
offset = pos - data_to.data() + 1;
|
||||||
if (i > utf8_len)
|
if (i > utf8_len)
|
||||||
{
|
{
|
||||||
offset -= last_writed_bytes;
|
offset -= last_writen_bytes;
|
||||||
}
|
}
|
||||||
offsets_to[row_num] = offset;
|
offsets_to[row_num] = offset;
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,5 @@
|
|||||||
SELECT randomStringUTF8('string'); -- { serverError 43 }
|
SELECT randomStringUTF8('string'); -- { serverError 43 }
|
||||||
SELECT randomStringUTF8(-10); -- { serverError 43 }
|
|
||||||
SELECT lengthUTF8(randomStringUTF8(100));
|
SELECT lengthUTF8(randomStringUTF8(100));
|
||||||
SELECT toTypeName(randomStringUTF8(10));
|
SELECT toTypeName(randomStringUTF8(10));
|
||||||
SELECT isValidUTF8(randomStringUTF8(100000));
|
SELECT isValidUTF8(randomStringUTF8(100000));
|
||||||
SELECT randomStringUTF8(0);
|
SELECT randomStringUTF8(0);
|
||||||
-- SELECT DISTINCT c > 30000 FROM (SELECT arrayJoin(arrayMap(x -> reinterpretAsUInt8(substring(randomStringUTF8(100), x + 1, 1)), range(100))) AS byte, count() AS c FROM numbers(100000) GROUP BY byte ORDER BY byte);
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user