Minor fixes

This commit is contained in:
Robert Schulze 2024-01-07 08:25:19 +00:00
parent d54e500832
commit dd2d9ff168
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
3 changed files with 13 additions and 14 deletions

View File

@ -30,7 +30,6 @@ namespace ErrorCodes
/// - idnaEncode(), tryIdnaEncode() and idnaDecode(), see https://en.wikipedia.org/wiki/Internationalized_domain_name#ToASCII_and_ToUnicode
/// and [3] https://www.unicode.org/reports/tr46/#ToUnicode
enum class ErrorHandling
{
Throw, /// Throw exception
@ -71,7 +70,7 @@ struct IdnaEncode
{
if constexpr (error_handling == ErrorHandling::Throw)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' cannot be converted to Punycode", std::string_view(value, value_length));
throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' cannot be converted to ASCII", value_view);
}
else
{
@ -96,6 +95,7 @@ struct IdnaEncode
}
};
/// Translates an ASII-encoded IDNA string back to its UTF-8 representation.
struct IdnaDecode
{
/// As per the specification, invalid inputs are returned as is, i.e. there is no special error handling.
@ -113,11 +113,11 @@ struct IdnaDecode
std::string unicode;
for (size_t row = 0; row < rows; ++row)
{
const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
const size_t value_length = offsets[row] - prev_offset - 1;
std::string_view value_view(value, value_length);
const char * ascii = reinterpret_cast<const char *>(&data[prev_offset]);
const size_t ascii_length = offsets[row] - prev_offset - 1;
std::string_view ascii_view(ascii, ascii_length);
unicode = ada::idna::to_unicode(value_view);
unicode = ada::idna::to_unicode(ascii_view);
res_data.insert(unicode.c_str(), unicode.c_str() + unicode.size() + 1);
res_offsets.push_back(res_data.size());
@ -149,7 +149,7 @@ REGISTER_FUNCTION(Idna)
Computes an ASCII representation of an Internationalized Domain Name. Throws an exception in case of error.)",
.syntax="idnaEncode(str)",
.arguments={{"str", "Input string"}},
.returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
.returned_value="An ASCII-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
.examples={
{"simple",
"SELECT idnaEncode('straße.münchen.de') AS ascii;",
@ -166,7 +166,7 @@ Computes an ASCII representation of an Internationalized Domain Name. Throws an
Computes a ASCII representation of an Internationalized Domain Name. Returns an empty string in case of error)",
.syntax="punycodeEncode(str)",
.arguments={{"str", "Input string"}},
.returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
.returned_value="An ASCII-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
.examples={
{"simple",
"SELECT idnaEncodeOrNull('München') AS ascii;",
@ -180,7 +180,7 @@ Computes a ASCII representation of an Internationalized Domain Name. Returns an
factory.registerFunction<FunctionIdnaDecode>(FunctionDocumentation{
.description=R"(
Computes a Unicode representation of an Internationalized Domain Name.)",
Computes the Unicode representation of ASCII-encoded Internationalized Domain Name.)",
.syntax="idnaDecode(str)",
.arguments={{"str", "Input string"}},
.returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",

View File

@ -37,7 +37,7 @@ enum class ErrorHandling
struct PunycodeEncode
{
/// Encoding-as-punycode can only fail if the input isn't valid UTF8. In that case, returnn undefined output, i.e. garbage-in, garbage-out.
/// Encoding-as-punycode can only fail if the input isn't valid UTF8. In that case, return undefined output, i.e. garbage-in, garbage-out.
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
@ -60,7 +60,7 @@ struct PunycodeEncode
value_utf32.resize(value_utf32_length);
const size_t codepoints = ada::idna::utf8_to_utf32(value, value_length, value_utf32.data());
if (codepoints == 0)
value_utf32.clear(); /// input was empty or it is not valid UTF-8
value_utf32.clear(); /// input was empty or no valid UTF-8
const bool ok = ada::idna::utf32_to_punycode(value_utf32, value_puny);
if (!ok)

View File

@ -1657,7 +1657,6 @@ icudata
idempotency
idnaDecode
idnaEncode
idnaEncodeOrNull
ifNotFinite
ifNull
iframe
@ -2083,9 +2082,7 @@ pseudorandomize
psql
ptrs
punycodeDecode
punycodeDecodeOrNull
punycodeEncode
punycodeEncodeOrNull
pushdown
pwrite
py
@ -2532,6 +2529,8 @@ trimRight
trunc
tryBase
tryDecrypt
tryIdnaEncode
tryPunycodeDecode
tskv
tsv
tui