Merge pull request #58454 from rschu1ze/punycode-revert-revert

Implement IDNA and punycode encoding/decoding (2nd attempt)
This commit is contained in:
Robert Schulze 2024-01-08 14:26:03 +01:00 committed by GitHub
commit fb4fd29110
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 954 additions and 4 deletions

3
.gitmodules vendored
View File

@ -360,3 +360,6 @@
[submodule "contrib/sqids-cpp"] [submodule "contrib/sqids-cpp"]
path = contrib/sqids-cpp path = contrib/sqids-cpp
url = https://github.com/sqids/sqids-cpp.git url = https://github.com/sqids/sqids-cpp.git
[submodule "contrib/idna"]
path = contrib/idna
url = https://github.com/ada-url/idna.git

View File

@ -154,6 +154,7 @@ add_contrib (libpqxx-cmake libpqxx)
add_contrib (libpq-cmake libpq) add_contrib (libpq-cmake libpq)
add_contrib (nuraft-cmake NuRaft) add_contrib (nuraft-cmake NuRaft)
add_contrib (fast_float-cmake fast_float) add_contrib (fast_float-cmake fast_float)
add_contrib (idna-cmake idna)
add_contrib (datasketches-cpp-cmake datasketches-cpp) add_contrib (datasketches-cpp-cmake datasketches-cpp)
add_contrib (incbin-cmake incbin) add_contrib (incbin-cmake incbin)
add_contrib (sqids-cpp-cmake sqids-cpp) add_contrib (sqids-cpp-cmake sqids-cpp)

1
contrib/idna vendored Submodule

@ -0,0 +1 @@
Subproject commit 3c8be01d42b75649f1ac9b697d0ef757eebfe667

View File

@ -0,0 +1,24 @@
option(ENABLE_IDNA "Enable idna support" ${ENABLE_LIBRARIES})
if ((NOT ENABLE_IDNA))
message (STATUS "Not using idna")
return()
endif()
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/idna")
set (SRCS
"${LIBRARY_DIR}/src/idna.cpp"
"${LIBRARY_DIR}/src/mapping.cpp"
"${LIBRARY_DIR}/src/mapping_tables.cpp"
"${LIBRARY_DIR}/src/normalization.cpp"
"${LIBRARY_DIR}/src/normalization_tables.cpp"
"${LIBRARY_DIR}/src/punycode.cpp"
"${LIBRARY_DIR}/src/to_ascii.cpp"
"${LIBRARY_DIR}/src/to_unicode.cpp"
"${LIBRARY_DIR}/src/unicode_transcoding.cpp"
"${LIBRARY_DIR}/src/validity.cpp"
)
add_library (_idna ${SRCS})
target_include_directories(_idna PUBLIC "${LIBRARY_DIR}/include")
add_library (ch_contrib::idna ALIAS _idna)

View File

@ -1383,6 +1383,148 @@ Result:
└──────────────────┘ └──────────────────┘
``` ```
## punycodeEncode
Returns the [Punycode](https://en.wikipedia.org/wiki/Punycode) representation of a string.
The string must be UTF8-encoded, otherwise the behavior is undefined.
**Syntax**
``` sql
punycodeEncode(val)
```
**Arguments**
- `val` - Input value. [String](../data-types/string.md)
**Returned value**
- A Punycode representation of the input value. [String](../data-types/string.md)
**Example**
``` sql
select punycodeEncode('München');
```
Result:
```result
┌─punycodeEncode('München')─┐
│ Mnchen-3ya │
└───────────────────────────┘
```
## punycodeDecode
Returns the UTF8-encoded plaintext of a [Punycode](https://en.wikipedia.org/wiki/Punycode)-encoded string.
If no valid Punycode-encoded string is given, an exception is thrown.
**Syntax**
``` sql
punycodeEncode(val)
```
**Arguments**
- `val` - Punycode-encoded string. [String](../data-types/string.md)
**Returned value**
- The plaintext of the input value. [String](../data-types/string.md)
**Example**
``` sql
select punycodeDecode('Mnchen-3ya');
```
Result:
```result
┌─punycodeDecode('Mnchen-3ya')─┐
│ München │
└──────────────────────────────┘
```
## tryPunycodeDecode
Like `punycodeDecode` but returns an empty string if no valid Punycode-encoded string is given.
## idnaEncode
Returns the the ASCII representation (ToASCII algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism.
The input string must be UTF-encoded and translatable to an ASCII string, otherwise an exception is thrown.
Note: No percent decoding or trimming of tabs, spaces or control characters is performed.
**Syntax**
```sql
idnaEncode(val)
```
**Arguments**
- `val` - Input value. [String](../data-types/string.md)
**Returned value**
- A ASCII representation according to the IDNA mechanism of the input value. [String](../data-types/string.md)
**Example**
``` sql
select idnaEncode('straße.münchen.de');
```
Result:
```result
┌─idnaEncode('straße.münchen.de')─────┐
│ xn--strae-oqa.xn--mnchen-3ya.de │
└─────────────────────────────────────┘
```
## tryIdnaEncode
Like `idnaEncode` but returns an empty string in case of an error instead of throwing an exception.
## idnaDecode
Returns the the Unicode (UTF-8) representation (ToUnicode algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism.
In case of an error (e.g. because the input is invalid), the input string is returned.
Note that repeated application of `idnaEncode()` and `idnaDecode()` does not necessarily return the original string due to case normalization.
**Syntax**
```sql
idnaDecode(val)
```
**Arguments**
- `val` - Input value. [String](../data-types/string.md)
**Returned value**
- A Unicode (UTF-8) representation according to the IDNA mechanism of the input value. [String](../data-types/string.md)
**Example**
``` sql
select idnaDecode('xn--strae-oqa.xn--mnchen-3ya.de');
```
Result:
```result
┌─idnaDecode('xn--strae-oqa.xn--mnchen-3ya.de')─┐
│ straße.münchen.de │
└───────────────────────────────────────────────┘
```
## byteHammingDistance ## byteHammingDistance
Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings. Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings.

View File

@ -28,6 +28,7 @@
#cmakedefine01 USE_S2_GEOMETRY #cmakedefine01 USE_S2_GEOMETRY
#cmakedefine01 USE_FASTOPS #cmakedefine01 USE_FASTOPS
#cmakedefine01 USE_SQIDS #cmakedefine01 USE_SQIDS
#cmakedefine01 USE_IDNA
#cmakedefine01 USE_NLP #cmakedefine01 USE_NLP
#cmakedefine01 USE_VECTORSCAN #cmakedefine01 USE_VECTORSCAN
#cmakedefine01 USE_LIBURING #cmakedefine01 USE_LIBURING

View File

@ -83,6 +83,10 @@ if (TARGET ch_contrib::sqids)
list (APPEND PRIVATE_LIBS ch_contrib::sqids) list (APPEND PRIVATE_LIBS ch_contrib::sqids)
endif() endif()
if (TARGET ch_contrib::idna)
list (APPEND PRIVATE_LIBS ch_contrib::idna)
endif()
if (TARGET ch_contrib::h3) if (TARGET ch_contrib::h3)
list (APPEND PRIVATE_LIBS ch_contrib::h3) list (APPEND PRIVATE_LIBS ch_contrib::h3)
endif() endif()

202
src/Functions/idna.cpp Normal file
View File

@ -0,0 +1,202 @@
#include "config.h"
#if USE_IDNA
#include <Columns/ColumnString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wnewline-eof"
#endif
# include <ada/idna/to_ascii.h>
# include <ada/idna/to_unicode.h>
# include <ada/idna/unicode_transcoding.h>
#ifdef __clang__
# pragma clang diagnostic pop
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int NOT_IMPLEMENTED;
}
/// Implementation of
/// - idnaEncode(), tryIdnaEncode() and idnaDecode(), see https://en.wikipedia.org/wiki/Internationalized_domain_name#ToASCII_and_ToUnicode
/// and [3] https://www.unicode.org/reports/tr46/#ToUnicode
enum class ErrorHandling
{
Throw, /// Throw exception
Empty /// Return empty string
};
/// Translates a UTF-8 string (typically an Internationalized Domain Name for Applications, IDNA) to an ASCII-encoded equivalent. The
/// encoding is performed per domain component and based on Punycode with ASCII Compatible Encoding (ACE) prefix "xn--".
/// Example: "straße.münchen.de" --> "xn--strae-oqa.xn--mnchen-3ya.de"
/// Note: doesn't do percent decoding. Doesn't trim tabs, spaces or control characters. Expects non-empty inputs.
template <ErrorHandling error_handling>
struct IdnaEncode
{
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
const size_t rows = offsets.size();
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
res_offsets.reserve(rows);
size_t prev_offset = 0;
std::string ascii;
for (size_t row = 0; row < rows; ++row)
{
const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
const size_t value_length = offsets[row] - prev_offset - 1;
std::string_view value_view(value, value_length);
if (!value_view.empty()) /// to_ascii() expects non-empty input
{
ascii = ada::idna::to_ascii(value_view);
const bool ok = !ascii.empty();
if (!ok)
{
if constexpr (error_handling == ErrorHandling::Throw)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' cannot be converted to ASCII", value_view);
}
else
{
static_assert(error_handling == ErrorHandling::Empty);
ascii.clear();
}
}
}
res_data.insert(ascii.c_str(), ascii.c_str() + ascii.size() + 1);
res_offsets.push_back(res_data.size());
prev_offset = offsets[row];
ascii.clear();
}
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
}
};
/// Translates an ASII-encoded IDNA string back to its UTF-8 representation.
struct IdnaDecode
{
/// As per the specification, invalid inputs are returned as is, i.e. there is no special error handling.
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
const size_t rows = offsets.size();
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
res_offsets.reserve(rows);
size_t prev_offset = 0;
std::string unicode;
for (size_t row = 0; row < rows; ++row)
{
const char * ascii = reinterpret_cast<const char *>(&data[prev_offset]);
const size_t ascii_length = offsets[row] - prev_offset - 1;
std::string_view ascii_view(ascii, ascii_length);
unicode = ada::idna::to_unicode(ascii_view);
res_data.insert(unicode.c_str(), unicode.c_str() + unicode.size() + 1);
res_offsets.push_back(res_data.size());
prev_offset = offsets[row];
unicode.clear();
}
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
}
};
struct NameIdnaEncode { static constexpr auto name = "idnaEncode"; };
struct NameTryIdnaEncode { static constexpr auto name = "tryIdnaEncode"; };
struct NameIdnaDecode { static constexpr auto name = "idnaDecode"; };
using FunctionIdnaEncode = FunctionStringToString<IdnaEncode<ErrorHandling::Throw>, NameIdnaEncode>;
using FunctionTryIdnaEncode = FunctionStringToString<IdnaEncode<ErrorHandling::Empty>, NameTryIdnaEncode>;
using FunctionIdnaDecode = FunctionStringToString<IdnaDecode, NameIdnaDecode>;
REGISTER_FUNCTION(Idna)
{
factory.registerFunction<FunctionIdnaEncode>(FunctionDocumentation{
.description=R"(
Computes an ASCII representation of an Internationalized Domain Name. Throws an exception in case of error.)",
.syntax="idnaEncode(str)",
.arguments={{"str", "Input string"}},
.returned_value="An ASCII-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
.examples={
{"simple",
"SELECT idnaEncode('straße.münchen.de') AS ascii;",
R"(
ascii
xn--strae-oqa.xn--mnchen-3ya.de
)"
}}
});
factory.registerFunction<FunctionTryIdnaEncode>(FunctionDocumentation{
.description=R"(
Computes a ASCII representation of an Internationalized Domain Name. Returns an empty string in case of error)",
.syntax="punycodeEncode(str)",
.arguments={{"str", "Input string"}},
.returned_value="An ASCII-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
.examples={
{"simple",
"SELECT idnaEncodeOrNull('München') AS ascii;",
R"(
ascii
xn--strae-oqa.xn--mnchen-3ya.de
)"
}}
});
factory.registerFunction<FunctionIdnaDecode>(FunctionDocumentation{
.description=R"(
Computes the Unicode representation of ASCII-encoded Internationalized Domain Name.)",
.syntax="idnaDecode(str)",
.arguments={{"str", "Input string"}},
.returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
.examples={
{"simple",
"SELECT idnaDecode('xn--strae-oqa.xn--mnchen-3ya.de') AS unicode;",
R"(
unicode
straße.münchen.de
)"
}}
});
}
}
#endif

206
src/Functions/punycode.cpp Normal file
View File

@ -0,0 +1,206 @@
#include "config.h"
#if USE_IDNA
#include <Columns/ColumnString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wnewline-eof"
#endif
# include <ada/idna/punycode.h>
# include <ada/idna/unicode_transcoding.h>
#ifdef __clang__
# pragma clang diagnostic pop
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int NOT_IMPLEMENTED;
}
/// Implementation of
/// - punycodeEncode(), punycodeDecode() and tryPunycodeDecode(), see https://en.wikipedia.org/wiki/Punycode
enum class ErrorHandling
{
Throw, /// Throw exception
Empty /// Return empty string
};
struct PunycodeEncode
{
/// Encoding-as-punycode can only fail if the input isn't valid UTF8. In that case, return undefined output, i.e. garbage-in, garbage-out.
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
const size_t rows = offsets.size();
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
res_offsets.reserve(rows);
size_t prev_offset = 0;
std::u32string value_utf32;
std::string value_puny;
for (size_t row = 0; row < rows; ++row)
{
const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
const size_t value_length = offsets[row] - prev_offset - 1;
const size_t value_utf32_length = ada::idna::utf32_length_from_utf8(value, value_length);
value_utf32.resize(value_utf32_length);
const size_t codepoints = ada::idna::utf8_to_utf32(value, value_length, value_utf32.data());
if (codepoints == 0)
value_utf32.clear(); /// input was empty or no valid UTF-8
const bool ok = ada::idna::utf32_to_punycode(value_utf32, value_puny);
if (!ok)
value_puny.clear();
res_data.insert(value_puny.c_str(), value_puny.c_str() + value_puny.size() + 1);
res_offsets.push_back(res_data.size());
prev_offset = offsets[row];
value_utf32.clear();
value_puny.clear(); /// utf32_to_punycode() appends to its output string
}
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
}
};
template <ErrorHandling error_handling>
struct PunycodeDecode
{
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
const size_t rows = offsets.size();
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
res_offsets.reserve(rows);
size_t prev_offset = 0;
std::u32string value_utf32;
std::string value_utf8;
for (size_t row = 0; row < rows; ++row)
{
const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
const size_t value_length = offsets[row] - prev_offset - 1;
const std::string_view value_punycode(value, value_length);
const bool ok = ada::idna::punycode_to_utf32(value_punycode, value_utf32);
if (!ok)
{
if constexpr (error_handling == ErrorHandling::Throw)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' is not a valid Punycode-encoded string", value_punycode);
}
else
{
static_assert(error_handling == ErrorHandling::Empty);
value_utf32.clear();
}
}
const size_t utf8_length = ada::idna::utf8_length_from_utf32(value_utf32.data(), value_utf32.size());
value_utf8.resize(utf8_length);
ada::idna::utf32_to_utf8(value_utf32.data(), value_utf32.size(), value_utf8.data());
res_data.insert(value_utf8.c_str(), value_utf8.c_str() + value_utf8.size() + 1);
res_offsets.push_back(res_data.size());
prev_offset = offsets[row];
value_utf32.clear(); /// punycode_to_utf32() appends to its output string
value_utf8.clear();
}
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
}
};
struct NamePunycodeEncode { static constexpr auto name = "punycodeEncode"; };
struct NamePunycodeDecode { static constexpr auto name = "punycodeDecode"; };
struct NameTryPunycodeDecode { static constexpr auto name = "tryPunycodeDecode"; };
using FunctionPunycodeEncode = FunctionStringToString<PunycodeEncode, NamePunycodeEncode>;
using FunctionPunycodeDecode = FunctionStringToString<PunycodeDecode<ErrorHandling::Throw>, NamePunycodeDecode>;
using FunctionTryPunycodeDecode = FunctionStringToString<PunycodeDecode<ErrorHandling::Empty>, NameTryPunycodeDecode>;
REGISTER_FUNCTION(Punycode)
{
factory.registerFunction<FunctionPunycodeEncode>(FunctionDocumentation{
.description=R"(
Computes a Punycode representation of a string.)",
.syntax="punycodeEncode(str)",
.arguments={{"str", "Input string"}},
.returned_value="The punycode representation [String](/docs/en/sql-reference/data-types/string.md).",
.examples={
{"simple",
"SELECT punycodeEncode('München') AS puny;",
R"(
puny
Mnchen-3ya
)"
}}
});
factory.registerFunction<FunctionPunycodeDecode>(FunctionDocumentation{
.description=R"(
Computes a Punycode representation of a string. Throws an exception if the input is not valid Punycode.)",
.syntax="punycodeDecode(str)",
.arguments={{"str", "A Punycode-encoded string"}},
.returned_value="The plaintext representation [String](/docs/en/sql-reference/data-types/string.md).",
.examples={
{"simple",
"SELECT punycodeDecode('Mnchen-3ya') AS plain;",
R"(
plain
München
)"
}}
});
factory.registerFunction<FunctionTryPunycodeDecode>(FunctionDocumentation{
.description=R"(
Computes a Punycode representation of a string. Returns an empty string if the input is not valid Punycode.)",
.syntax="punycodeDecode(str)",
.arguments={{"str", "A Punycode-encoded string"}},
.returned_value="The plaintext representation [String](/docs/en/sql-reference/data-types/string.md).",
.examples={
{"simple",
"SELECT tryPunycodeDecode('Mnchen-3ya') AS plain;",
R"(
plain
München
)"
}}
});
}
}
#endif

View File

@ -129,6 +129,9 @@ endif()
if (TARGET ch_contrib::sqids) if (TARGET ch_contrib::sqids)
set(USE_SQIDS 1) set(USE_SQIDS 1)
endif() endif()
if (TARGET ch_contrib::idna)
set(USE_IDNA 1)
endif()
if (TARGET ch_contrib::vectorscan) if (TARGET ch_contrib::vectorscan)
set(USE_VECTORSCAN 1) set(USE_VECTORSCAN 1)
endif() endif()

View File

@ -0,0 +1,88 @@
-- Negative tests
-- Regular cases
straße.de xn--strae-oqa.de xn--strae-oqa.de straße.de straße.de
2001:4860:4860::8888 2001:4860:4860::8888 2001:4860:4860::8888 2001:4860:4860::8888 2001:4860:4860::8888
AMAZON amazon amazon amazon amazon
aa-- aa-- aa-- aa-- aa--
a†-- xn--a---kp0a xn--a---kp0a a†-- a†--
ab--c ab--c ab--c ab--c ab--c
-† xn----xhn xn----xhn -† -†
-x.xn--zca -x.xn--zca -x.xn--zca -x.ß -x.ß
x-.xn--zca x-.xn--zca x-.xn--zca x-.ß x-.ß
x-.ß x-.xn--zca x-.xn--zca x-.ß x-.ß
x..ß x..xn--zca x..xn--zca x..ß x..ß
128.0,0.1 128.0,0.1 128.0,0.1 128.0,0.1 128.0,0.1
xn--zca.xn--zca xn--zca.xn--zca xn--zca.xn--zca ß.ß ß.ß
xn--zca.ß xn--zca.xn--zca xn--zca.xn--zca ß.ß ß.ß
x01234567890123456789012345678901234567890123456789012345678901x x01234567890123456789012345678901234567890123456789012345678901x x01234567890123456789012345678901234567890123456789012345678901x x01234567890123456789012345678901234567890123456789012345678901x x01234567890123456789012345678901234567890123456789012345678901x
x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.ß x01234567890123456789012345678901234567890123456789012345678901x.ß
x01234567890123456789012345678901234567890123456789012345678901x.ß x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.ß x01234567890123456789012345678901234567890123456789012345678901x.ß
01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x 01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x 01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x 01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x 01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x
≠ xn--1ch xn--1ch ≠ ≠
aa-- aa-- aa-- aa--
ab--c ab--c ab--c ab--c
-x -x -x -x
xn--1ch ≠ xn--1ch xn--1ch
xn--dqd20apc ᄎᆞᆷ xn--dqd20apc xn--dqd20apc
xn--gdh ≮ xn--gdh xn--gdh
xn--80aaa0ahbbeh4c йайзаакпий xn--80aaa0ahbbeh4c xn--80aaa0ahbbeh4c
xn--3bs854c 团淄 xn--3bs854c xn--3bs854c
xn--mgb9awbf عمان xn--mgb9awbf xn--mgb9awbf
xn--mgbaam7a8h امارات xn--mgbaam7a8h xn--mgbaam7a8h
xn--mgbbh1a71e بھارت xn--mgbbh1a71e xn--mgbbh1a71e
xn--s7y.com 短.com xn--s7y.com xn--s7y.com
xn--55qx5d.xn--tckwe 公司.コム xn--55qx5d.xn--tckwe xn--55qx5d.xn--tckwe
xn--4dbrk0ce ישראל xn--4dbrk0ce xn--4dbrk0ce
xn--zckzah テスト xn--zckzah xn--zckzah
xn--p1ai.com рф.com xn--p1ai.com xn--p1ai.com
xn--mxahbxey0c.gr εχαμπλε.gr xn--mxahbxey0c.gr xn--mxahbxey0c.gr
xn--h2brj9c भारत xn--h2brj9c xn--h2brj9c
xn--d1acpjx3f.xn--p1ai яндекс.рф xn--d1acpjx3f.xn--p1ai xn--d1acpjx3f.xn--p1ai
xn--q9jyb4c みんな xn--q9jyb4c xn--q9jyb4c
xn--sterreich-z7a.at österreich.at xn--sterreich-z7a.at xn--sterreich-z7a.at
xn--h2breg3eve.xn--h2brj9c भारतम्.भारत xn--h2breg3eve.xn--h2brj9c xn--h2breg3eve.xn--h2brj9c
ejemplo.xn--q9jyb4c ejemplo.みんな ejemplo.xn--q9jyb4c ejemplo.xn--q9jyb4c
xn--9t4b11yi5a.com 테스트.com xn--9t4b11yi5a.com xn--9t4b11yi5a.com
xn--gk3at1e.com 通販.com xn--gk3at1e.com xn--gk3at1e.com
xn--42c2d9a คอม xn--42c2d9a xn--42c2d9a
1xn-- 1xn-- 1xn-- 1xn--
xn--bih.com ⌘.com xn--bih.com xn--bih.com
xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c موقع.وزارة-الأتصالات.مصر xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c
xn--mgbb9fbpob موبايلي xn--mgbb9fbpob xn--mgbb9fbpob
xn--55qw42g.xn--55qw42g 公益.公益 xn--55qw42g.xn--55qw42g xn--55qw42g.xn--55qw42g
≠ ≠ xn--1ch xn--1ch
ファッション.biz ファッション.biz xn--bck1b9a5dre4c.biz xn--bck1b9a5dre4c.biz
-- Special cases
---- Empty input
---- NULL input
\N
\N
\N
---- Garbage inputs for idnaEncode
---- Long input
Row 1:
──────
idna: Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
ascii: wenn sie ...xn-- vom hauptbahnhof in mnchen -n7c...xn-- mit zehn minuten, ohne, dass sie am flughafen noch einchecken mssen, dann starten sie im grunde genommen am flughafen -8gm... am ...xn-- am hauptbahnhof in mnchen starten sie ihren flug-0cf. zehn minuten.xn-- schauen sie sich mal die groen flughfen an, wenn sie in heathrow in london oder sonst wo, meine se -83h23c...xn-- charles de gaulle h in frankreich oder in -jvd...xn--h-zfa... in ... in...xn--h-zfa...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ...xn-- an den flughafen franz josef strau-z2c.xn-- dann starten sie praktisch hier am hauptbahnhof in mnchen-t9f.xn-- das bedeutet natrlich, dass der hauptbahnhof im grunde genommen nher an bayern -lxg23q...xn-- an die bayerischen stdte heranwchst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen-1hkk.
ascii_try: wenn sie ...xn-- vom hauptbahnhof in mnchen -n7c...xn-- mit zehn minuten, ohne, dass sie am flughafen noch einchecken mssen, dann starten sie im grunde genommen am flughafen -8gm... am ...xn-- am hauptbahnhof in mnchen starten sie ihren flug-0cf. zehn minuten.xn-- schauen sie sich mal die groen flughfen an, wenn sie in heathrow in london oder sonst wo, meine se -83h23c...xn-- charles de gaulle h in frankreich oder in -jvd...xn--h-zfa... in ... in...xn--h-zfa...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ...xn-- an den flughafen franz josef strau-z2c.xn-- dann starten sie praktisch hier am hauptbahnhof in mnchen-t9f.xn-- das bedeutet natrlich, dass der hauptbahnhof im grunde genommen nher an bayern -lxg23q...xn-- an die bayerischen stdte heranwchst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen-1hkk.
original: wenn sie ... vom hauptbahnhof in münchen ... mit zehn minuten, ohne, dass sie am flughafen noch einchecken müssen, dann starten sie im grunde genommen am flughafen ... am ... am hauptbahnhof in münchen starten sie ihren flug. zehn minuten. schauen sie sich mal die großen flughäfen an, wenn sie in heathrow in london oder sonst wo, meine se ... charles de gaulle äh in frankreich oder in ...äh... in ... in...äh...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ... an den flughafen franz josef strauß. dann starten sie praktisch hier am hauptbahnhof in münchen. das bedeutet natürlich, dass der hauptbahnhof im grunde genommen näher an bayern ... an die bayerischen städte heranwächst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen.
original_try: wenn sie ... vom hauptbahnhof in münchen ... mit zehn minuten, ohne, dass sie am flughafen noch einchecken müssen, dann starten sie im grunde genommen am flughafen ... am ... am hauptbahnhof in münchen starten sie ihren flug. zehn minuten. schauen sie sich mal die großen flughäfen an, wenn sie in heathrow in london oder sonst wo, meine se ... charles de gaulle äh in frankreich oder in ...äh... in ... in...äh...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ... an den flughafen franz josef strauß. dann starten sie praktisch hier am hauptbahnhof in münchen. das bedeutet natürlich, dass der hauptbahnhof im grunde genommen näher an bayern ... an die bayerischen städte heranwächst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen.
---- Non-const input
münchen xn--mnchen-3ya xn--mnchen-3ya münchen münchen
straße.münchen.de xn--strae-oqa.xn--mnchen-3ya.de xn--strae-oqa.xn--mnchen-3ya.de straße.münchen.de straße.münchen.de
---- Non-const input with invalid values sprinkled in
london.co.uk london.co.uk london.co.uk
microsoft.com microsoft.com microsoft.com
xn--
xn--
xn--tešla
ytraße.münchen.de xn--ytrae-oqa.xn--mnchen-3ya.de ytraße.münchen.de

View File

@ -0,0 +1,124 @@
-- Tags: no-fasttest
-- no-fasttest: requires idna library
-- See also 02932_punycode.sql
SELECT '-- Negative tests';
SELECT idnaEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT tryIdnaEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT idnaDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT idnaEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT tryIdnaEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT idnaDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT idnaEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT tryIdnaEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT idnaDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT idnaEncode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED }
SELECT tryIdnaEncode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED }
SELECT idnaDecode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED }
SELECT '-- Regular cases';
-- The test cases originate from the ada idna unit tests:
-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/to_ascii_alternating.txt
-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/to_unicode_alternating.txt
--
SELECT 'straße.de' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
SELECT '2001:4860:4860::8888' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
SELECT 'AMAZON' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
SELECT 'aa--' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
SELECT 'a†--' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
SELECT 'ab--c' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
SELECT '-†' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
SELECT '-x.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
SELECT 'x-.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
SELECT 'x-.ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
SELECT 'x..ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
SELECT '128.0,0.1' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
SELECT 'xn--zca.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
SELECT 'xn--zca.ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
SELECT 'x01234567890123456789012345678901234567890123456789012345678901x' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
SELECT 'x01234567890123456789012345678901234567890123456789012345678901x.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
SELECT 'x01234567890123456789012345678901234567890123456789012345678901x.ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
SELECT '01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
SELECT '' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
SELECT 'aa--' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'ab--c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT '-x' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT '' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--1ch' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--dqd20apc' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--gdh' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--80aaa0ahbbeh4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--3bs854c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--mgb9awbf' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--mgbaam7a8h' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--mgbbh1a71e' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--s7y.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--55qx5d.xn--tckwe' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--4dbrk0ce' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--zckzah' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--p1ai.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--mxahbxey0c.gr' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--h2brj9c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--d1acpjx3f.xn--p1ai' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--q9jyb4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--sterreich-z7a.at' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--h2breg3eve.xn--h2brj9c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'ejemplo.xn--q9jyb4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--9t4b11yi5a.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--gk3at1e.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--42c2d9a' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT '1xn--' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--bih.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--mgbb9fbpob' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'xn--55qw42g.xn--55qw42g' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT '' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
SELECT 'ファッション.biz' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
--
SELECT '-- Special cases';
SELECT '---- Empty input';
SELECT idnaEncode('');
SELECT tryIdnaEncode('');
SELECT idnaDecode('');
SELECT '---- NULL input';
SELECT idnaEncode(NULL);
SELECT tryIdnaEncode(NULL);
SELECT idnaDecode(NULL);
SELECT '---- Garbage inputs for idnaEncode';
-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/to_ascii_invalid.txt
SELECT idnaEncode('xn--'); -- { serverError BAD_ARGUMENTS }
SELECT tryIdnaEncode('xn--');
SELECT idnaEncode('ﻱa'); -- { serverError BAD_ARGUMENTS }
SELECT tryIdnaEncode('ﻱa');
SELECT idnaEncode('xn--a-yoc'); -- { serverError BAD_ARGUMENTS }
SELECT tryIdnaEncode('xn--a-yoc');
SELECT idnaEncode('xn--tešla'); -- { serverError BAD_ARGUMENTS }
SELECT tryIdnaEncode('xn--tešla');
SELECT '---- Long input';
SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(ascii) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try FORMAT Vertical;
SELECT '---- Non-const input';
DROP TABLE IF EXISTS tab;
CREATE TABLE tab (idna String) ENGINE=MergeTree ORDER BY idna;
INSERT INTO tab VALUES ('straße.münchen.de') ('') ('münchen');
SELECT idna, idnaEncode(idna) AS ascii, tryIdnaEncode(ascii) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try FROM tab;
DROP TABLE tab;
SELECT '---- Non-const input with invalid values sprinkled in';
DROP TABLE IF EXISTS tab;
CREATE TABLE tab (idna String) ENGINE=MergeTree ORDER BY idna;
INSERT INTO tab VALUES ('xn--') ('london.co.uk') ('ytraße.münchen.de') ('xn--tešla') ('microsoft.com') ('xn--');
SELECT idna, idnaEncode(idna) AS ascii FROM tab; -- { serverError BAD_ARGUMENTS }
SELECT idna, tryIdnaEncode(idna) AS ascii, idnaDecode(ascii) AS original FROM tab;
DROP TABLE tab;

View File

@ -0,0 +1,55 @@
-- Negative tests
-- Regular cases
a a- a a
A A- A A
-- --- -- --
London London- London London
Lloyd-Atkinson Lloyd-Atkinson- Lloyd-Atkinson Lloyd-Atkinson
This has spaces This has spaces- This has spaces This has spaces
-> $1.00 <- -> $1.00 <-- -> $1.00 <- -> $1.00 <-
а 80a а а
ü tda ü ü
α mxa α α
例 fsq 例 例
😉 n28h 😉 😉
αβγ mxacd αβγ αβγ
München Mnchen-3ya München München
Mnchen-3ya Mnchen-3ya- Mnchen-3ya Mnchen-3ya
München-Ost Mnchen-Ost-9db München-Ost München-Ost
Bahnhof München-Ost Bahnhof Mnchen-Ost-u6b Bahnhof München-Ost Bahnhof München-Ost
abæcdöef abcdef-qua4k abæcdöef abæcdöef
правда 80aafi6cg правда правда
ยจฆฟคฏข 22cdfh1b8fsa ยจฆฟคฏข ยจฆฟคฏข
ドメイン名例 eckwd4c7cu47r2wf ドメイン名例 ドメイン名例
MajiでKoiする5秒前 MajiKoi5-783gue6qz075azm5e MajiでKoiする5秒前 MajiでKoiする5秒前
「bücher」 bcher-kva8445foa 「bücher」 「bücher」
团淄 3bs854c 团淄 团淄
-- Special cases
---- Empty input
---- NULL input
\N
\N
\N
---- Garbage Punycode-encoded input
---- Long input
Row 1:
──────
str: Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
puny: Wenn Sie ... vom Hauptbahnhof in Mnchen ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken mssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in Mnchen starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die groen Flughfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle h in Frankreich oder in ...h... in ... in...h...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strau. Dann starten Sie praktisch hier am Hauptbahnhof in Mnchen. Das bedeutet natrlich, dass der Hauptbahnhof im Grunde genommen nher an Bayern ... an die bayerischen Stdte heranwchst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.-pu7fjtp0npc1ar54cibk471wdc9d18axa
original: Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
original_try: Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
---- Non-const values
München Mnchen-3ya München München
abc abc- abc abc
aäoöuü aou-qla5gqb aäoöuü aäoöuü
---- Non-const values with invalid values sprinkled in
Also no punycode
London- London
Mnchen-3ya München
No punycode
Rtting-3ya Rütting
XYZ no punycode

View File

@ -0,0 +1,86 @@
-- Tags: no-fasttest
-- no-fasttest: requires idna library
-- See also 02932_idna.sql
SELECT '-- Negative tests';
SELECT punycodeEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT punycodeDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT tryPunycodeDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT punycodeEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT punycodeDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT tryPunycodeDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT punycodeEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT punycodeDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT tryPunycodeDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT punycodeEncode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED }
SELECT punycodeDecode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED }
SELECT tryPunycodeDecode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED }
SELECT '-- Regular cases';
-- The test cases originate from the ada idna unit tests:
-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/utf8_punycode_alternating.txt
SELECT 'a' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT 'A' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT '--' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT 'London' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT 'Lloyd-Atkinson' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT 'This has spaces' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT '-> $1.00 <-' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT 'а' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT 'ü' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT 'α' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT '' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT '😉' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT 'αβγ' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT 'München' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT 'Mnchen-3ya' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT 'München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT 'Bahnhof München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT 'abæcdöef' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT 'правда' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT 'ยจฆฟคฏข' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT 'ドメイン名例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT 'MajiでKoiする5秒前' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT '「bücher」' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
SELECT '团淄' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
--
SELECT '-- Special cases';
SELECT '---- Empty input';
SELECT punycodeEncode('');
SELECT punycodeDecode('');
SELECT tryPunycodeDecode('');
SELECT '---- NULL input';
SELECT punycodeEncode(NULL);
SELECT punycodeDecode(NULL);
SELECT tryPunycodeDecode(NULL);
SELECT '---- Garbage Punycode-encoded input';
SELECT punycodeDecode('no punycode'); -- { serverError BAD_ARGUMENTS }
SELECT tryPunycodeDecode('no punycode');
SELECT '---- Long input';
SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try FORMAT Vertical;
SELECT '---- Non-const values';
DROP TABLE IF EXISTS tab;
CREATE TABLE tab (str String) ENGINE=MergeTree ORDER BY str;
INSERT INTO tab VALUES ('abc') ('aäoöuü') ('München');
SELECT str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try FROM tab;
DROP TABLE tab;
SELECT '---- Non-const values with invalid values sprinkled in';
DROP TABLE IF EXISTS tab;
CREATE TABLE tab (puny String) ENGINE=MergeTree ORDER BY puny;
INSERT INTO tab VALUES ('Also no punycode') ('London-') ('Mnchen-3ya') ('No punycode') ('Rtting-3ya') ('XYZ no punycode');
SELECT puny, punycodeDecode(puny) AS original FROM tab; -- { serverError BAD_ARGUMENTS }
SELECT puny, tryPunycodeDecode(puny) AS original FROM tab;
DROP TABLE tab;

View File

@ -344,6 +344,7 @@ Hypot
IANA IANA
IDE IDE
IDEs IDEs
IDNA
IMDS IMDS
INFILE INFILE
INSERTed INSERTed
@ -701,8 +702,6 @@ PrettySpaceMonoBlock
PrettySpaceNoEscapes PrettySpaceNoEscapes
PrettySpaceNoEscapesMonoBlock PrettySpaceNoEscapesMonoBlock
Prewhere Prewhere
TotalPrimaryKeyBytesInMemory
TotalPrimaryKeyBytesInMemoryAllocated
PrivateKeyPassphraseHandler PrivateKeyPassphraseHandler
ProfileEvents ProfileEvents
Profiler Profiler
@ -714,6 +713,7 @@ Promtail
Protobuf Protobuf
ProtobufSingle ProtobufSingle
ProxySQL ProxySQL
Punycode
PyArrow PyArrow
PyCharm PyCharm
QEMU QEMU
@ -912,6 +912,7 @@ ThreadsInOvercommitTracker
Timeunit Timeunit
TinyLog TinyLog
Tkachenko Tkachenko
ToASCII
ToCenterChild ToCenterChild
ToChildren ToChildren
ToGeo ToGeo
@ -920,10 +921,13 @@ ToIPv
ToParent ToParent
ToSnowflake ToSnowflake
ToString ToString
ToUnicode
Toolset Toolset
TopK TopK
TotalBytesOfMergeTreeTables TotalBytesOfMergeTreeTables
TotalPartsOfMergeTreeTables TotalPartsOfMergeTreeTables
TotalPrimaryKeyBytesInMemory
TotalPrimaryKeyBytesInMemoryAllocated
TotalRowsOfMergeTreeTables TotalRowsOfMergeTreeTables
TotalTemporaryFiles TotalTemporaryFiles
Tradeoff Tradeoff
@ -1651,6 +1655,8 @@ hyvor
icosahedron icosahedron
icudata icudata
idempotency idempotency
idnaDecode
idnaEncode
ifNotFinite ifNotFinite
ifNull ifNull
iframe iframe
@ -1848,14 +1854,14 @@ metrica
metroHash metroHash
mfedotov mfedotov
minMap minMap
minSampleSizeContinuous
minSampleSizeConversion
mindsdb mindsdb
minimalistic minimalistic
mininum mininum
miniselect miniselect
minmap minmap
minmax minmax
minSampleSizeContinuous
minSampleSizeConversion
mins mins
misconfiguration misconfiguration
mispredictions mispredictions
@ -2075,6 +2081,8 @@ pseudorandom
pseudorandomize pseudorandomize
psql psql
ptrs ptrs
punycodeDecode
punycodeEncode
pushdown pushdown
pwrite pwrite
py py
@ -2521,6 +2529,8 @@ trimRight
trunc trunc
tryBase tryBase
tryDecrypt tryDecrypt
tryIdnaEncode
tryPunycodeDecode
tskv tskv
tsv tsv
tui tui