mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 00:30:49 +00:00
Merge pull request #58454 from rschu1ze/punycode-revert-revert
Implement IDNA and punycode encoding/decoding (2nd attempt)
This commit is contained in:
commit
fb4fd29110
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -360,3 +360,6 @@
|
||||
[submodule "contrib/sqids-cpp"]
|
||||
path = contrib/sqids-cpp
|
||||
url = https://github.com/sqids/sqids-cpp.git
|
||||
[submodule "contrib/idna"]
|
||||
path = contrib/idna
|
||||
url = https://github.com/ada-url/idna.git
|
||||
|
1
contrib/CMakeLists.txt
vendored
1
contrib/CMakeLists.txt
vendored
@ -154,6 +154,7 @@ add_contrib (libpqxx-cmake libpqxx)
|
||||
add_contrib (libpq-cmake libpq)
|
||||
add_contrib (nuraft-cmake NuRaft)
|
||||
add_contrib (fast_float-cmake fast_float)
|
||||
add_contrib (idna-cmake idna)
|
||||
add_contrib (datasketches-cpp-cmake datasketches-cpp)
|
||||
add_contrib (incbin-cmake incbin)
|
||||
add_contrib (sqids-cpp-cmake sqids-cpp)
|
||||
|
1
contrib/idna
vendored
Submodule
1
contrib/idna
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 3c8be01d42b75649f1ac9b697d0ef757eebfe667
|
24
contrib/idna-cmake/CMakeLists.txt
Normal file
24
contrib/idna-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,24 @@
|
||||
option(ENABLE_IDNA "Enable idna support" ${ENABLE_LIBRARIES})
|
||||
if ((NOT ENABLE_IDNA))
|
||||
message (STATUS "Not using idna")
|
||||
return()
|
||||
endif()
|
||||
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/idna")
|
||||
|
||||
set (SRCS
|
||||
"${LIBRARY_DIR}/src/idna.cpp"
|
||||
"${LIBRARY_DIR}/src/mapping.cpp"
|
||||
"${LIBRARY_DIR}/src/mapping_tables.cpp"
|
||||
"${LIBRARY_DIR}/src/normalization.cpp"
|
||||
"${LIBRARY_DIR}/src/normalization_tables.cpp"
|
||||
"${LIBRARY_DIR}/src/punycode.cpp"
|
||||
"${LIBRARY_DIR}/src/to_ascii.cpp"
|
||||
"${LIBRARY_DIR}/src/to_unicode.cpp"
|
||||
"${LIBRARY_DIR}/src/unicode_transcoding.cpp"
|
||||
"${LIBRARY_DIR}/src/validity.cpp"
|
||||
)
|
||||
|
||||
add_library (_idna ${SRCS})
|
||||
target_include_directories(_idna PUBLIC "${LIBRARY_DIR}/include")
|
||||
|
||||
add_library (ch_contrib::idna ALIAS _idna)
|
@ -1383,6 +1383,148 @@ Result:
|
||||
└──────────────────┘
|
||||
```
|
||||
|
||||
## punycodeEncode
|
||||
|
||||
Returns the [Punycode](https://en.wikipedia.org/wiki/Punycode) representation of a string.
|
||||
The string must be UTF8-encoded, otherwise the behavior is undefined.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
punycodeEncode(val)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `val` - Input value. [String](../data-types/string.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A Punycode representation of the input value. [String](../data-types/string.md)
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
select punycodeEncode('München');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```result
|
||||
┌─punycodeEncode('München')─┐
|
||||
│ Mnchen-3ya │
|
||||
└───────────────────────────┘
|
||||
```
|
||||
|
||||
## punycodeDecode
|
||||
|
||||
Returns the UTF8-encoded plaintext of a [Punycode](https://en.wikipedia.org/wiki/Punycode)-encoded string.
|
||||
If no valid Punycode-encoded string is given, an exception is thrown.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
punycodeEncode(val)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `val` - Punycode-encoded string. [String](../data-types/string.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The plaintext of the input value. [String](../data-types/string.md)
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
select punycodeDecode('Mnchen-3ya');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```result
|
||||
┌─punycodeDecode('Mnchen-3ya')─┐
|
||||
│ München │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
## tryPunycodeDecode
|
||||
|
||||
Like `punycodeDecode` but returns an empty string if no valid Punycode-encoded string is given.
|
||||
|
||||
## idnaEncode
|
||||
|
||||
Returns the the ASCII representation (ToASCII algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism.
|
||||
The input string must be UTF-encoded and translatable to an ASCII string, otherwise an exception is thrown.
|
||||
Note: No percent decoding or trimming of tabs, spaces or control characters is performed.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
idnaEncode(val)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `val` - Input value. [String](../data-types/string.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A ASCII representation according to the IDNA mechanism of the input value. [String](../data-types/string.md)
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
select idnaEncode('straße.münchen.de');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```result
|
||||
┌─idnaEncode('straße.münchen.de')─────┐
|
||||
│ xn--strae-oqa.xn--mnchen-3ya.de │
|
||||
└─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## tryIdnaEncode
|
||||
|
||||
Like `idnaEncode` but returns an empty string in case of an error instead of throwing an exception.
|
||||
|
||||
## idnaDecode
|
||||
|
||||
Returns the the Unicode (UTF-8) representation (ToUnicode algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism.
|
||||
In case of an error (e.g. because the input is invalid), the input string is returned.
|
||||
Note that repeated application of `idnaEncode()` and `idnaDecode()` does not necessarily return the original string due to case normalization.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
idnaDecode(val)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `val` - Input value. [String](../data-types/string.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A Unicode (UTF-8) representation according to the IDNA mechanism of the input value. [String](../data-types/string.md)
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
select idnaDecode('xn--strae-oqa.xn--mnchen-3ya.de');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```result
|
||||
┌─idnaDecode('xn--strae-oqa.xn--mnchen-3ya.de')─┐
|
||||
│ straße.münchen.de │
|
||||
└───────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## byteHammingDistance
|
||||
|
||||
Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings.
|
||||
|
@ -28,6 +28,7 @@
|
||||
#cmakedefine01 USE_S2_GEOMETRY
|
||||
#cmakedefine01 USE_FASTOPS
|
||||
#cmakedefine01 USE_SQIDS
|
||||
#cmakedefine01 USE_IDNA
|
||||
#cmakedefine01 USE_NLP
|
||||
#cmakedefine01 USE_VECTORSCAN
|
||||
#cmakedefine01 USE_LIBURING
|
||||
|
@ -83,6 +83,10 @@ if (TARGET ch_contrib::sqids)
|
||||
list (APPEND PRIVATE_LIBS ch_contrib::sqids)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::idna)
|
||||
list (APPEND PRIVATE_LIBS ch_contrib::idna)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::h3)
|
||||
list (APPEND PRIVATE_LIBS ch_contrib::h3)
|
||||
endif()
|
||||
|
202
src/Functions/idna.cpp
Normal file
202
src/Functions/idna.cpp
Normal file
@ -0,0 +1,202 @@
|
||||
#include "config.h"
|
||||
|
||||
#if USE_IDNA
|
||||
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wnewline-eof"
|
||||
#endif
|
||||
# include <ada/idna/to_ascii.h>
|
||||
# include <ada/idna/to_unicode.h>
|
||||
# include <ada/idna/unicode_transcoding.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
/// Implementation of
|
||||
/// - idnaEncode(), tryIdnaEncode() and idnaDecode(), see https://en.wikipedia.org/wiki/Internationalized_domain_name#ToASCII_and_ToUnicode
|
||||
/// and [3] https://www.unicode.org/reports/tr46/#ToUnicode
|
||||
|
||||
enum class ErrorHandling
|
||||
{
|
||||
Throw, /// Throw exception
|
||||
Empty /// Return empty string
|
||||
};
|
||||
|
||||
|
||||
/// Translates a UTF-8 string (typically an Internationalized Domain Name for Applications, IDNA) to an ASCII-encoded equivalent. The
|
||||
/// encoding is performed per domain component and based on Punycode with ASCII Compatible Encoding (ACE) prefix "xn--".
|
||||
/// Example: "straße.münchen.de" --> "xn--strae-oqa.xn--mnchen-3ya.de"
|
||||
/// Note: doesn't do percent decoding. Doesn't trim tabs, spaces or control characters. Expects non-empty inputs.
|
||||
template <ErrorHandling error_handling>
|
||||
struct IdnaEncode
|
||||
{
|
||||
static void vector(
|
||||
const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
const size_t rows = offsets.size();
|
||||
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
|
||||
res_offsets.reserve(rows);
|
||||
|
||||
size_t prev_offset = 0;
|
||||
std::string ascii;
|
||||
for (size_t row = 0; row < rows; ++row)
|
||||
{
|
||||
const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
|
||||
const size_t value_length = offsets[row] - prev_offset - 1;
|
||||
|
||||
std::string_view value_view(value, value_length);
|
||||
if (!value_view.empty()) /// to_ascii() expects non-empty input
|
||||
{
|
||||
ascii = ada::idna::to_ascii(value_view);
|
||||
const bool ok = !ascii.empty();
|
||||
if (!ok)
|
||||
{
|
||||
if constexpr (error_handling == ErrorHandling::Throw)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' cannot be converted to ASCII", value_view);
|
||||
}
|
||||
else
|
||||
{
|
||||
static_assert(error_handling == ErrorHandling::Empty);
|
||||
ascii.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
res_data.insert(ascii.c_str(), ascii.c_str() + ascii.size() + 1);
|
||||
res_offsets.push_back(res_data.size());
|
||||
|
||||
prev_offset = offsets[row];
|
||||
|
||||
ascii.clear();
|
||||
}
|
||||
}
|
||||
|
||||
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
|
||||
}
|
||||
};
|
||||
|
||||
/// Translates an ASII-encoded IDNA string back to its UTF-8 representation.
|
||||
struct IdnaDecode
|
||||
{
|
||||
/// As per the specification, invalid inputs are returned as is, i.e. there is no special error handling.
|
||||
static void vector(
|
||||
const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
const size_t rows = offsets.size();
|
||||
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
|
||||
res_offsets.reserve(rows);
|
||||
|
||||
size_t prev_offset = 0;
|
||||
std::string unicode;
|
||||
for (size_t row = 0; row < rows; ++row)
|
||||
{
|
||||
const char * ascii = reinterpret_cast<const char *>(&data[prev_offset]);
|
||||
const size_t ascii_length = offsets[row] - prev_offset - 1;
|
||||
std::string_view ascii_view(ascii, ascii_length);
|
||||
|
||||
unicode = ada::idna::to_unicode(ascii_view);
|
||||
|
||||
res_data.insert(unicode.c_str(), unicode.c_str() + unicode.size() + 1);
|
||||
res_offsets.push_back(res_data.size());
|
||||
|
||||
prev_offset = offsets[row];
|
||||
|
||||
unicode.clear();
|
||||
}
|
||||
}
|
||||
|
||||
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
|
||||
}
|
||||
};
|
||||
|
||||
struct NameIdnaEncode { static constexpr auto name = "idnaEncode"; };
|
||||
struct NameTryIdnaEncode { static constexpr auto name = "tryIdnaEncode"; };
|
||||
struct NameIdnaDecode { static constexpr auto name = "idnaDecode"; };
|
||||
|
||||
using FunctionIdnaEncode = FunctionStringToString<IdnaEncode<ErrorHandling::Throw>, NameIdnaEncode>;
|
||||
using FunctionTryIdnaEncode = FunctionStringToString<IdnaEncode<ErrorHandling::Empty>, NameTryIdnaEncode>;
|
||||
using FunctionIdnaDecode = FunctionStringToString<IdnaDecode, NameIdnaDecode>;
|
||||
|
||||
REGISTER_FUNCTION(Idna)
|
||||
{
|
||||
factory.registerFunction<FunctionIdnaEncode>(FunctionDocumentation{
|
||||
.description=R"(
|
||||
Computes an ASCII representation of an Internationalized Domain Name. Throws an exception in case of error.)",
|
||||
.syntax="idnaEncode(str)",
|
||||
.arguments={{"str", "Input string"}},
|
||||
.returned_value="An ASCII-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
|
||||
.examples={
|
||||
{"simple",
|
||||
"SELECT idnaEncode('straße.münchen.de') AS ascii;",
|
||||
R"(
|
||||
┌─ascii───────────────────────────┐
|
||||
│ xn--strae-oqa.xn--mnchen-3ya.de │
|
||||
└─────────────────────────────────┘
|
||||
)"
|
||||
}}
|
||||
});
|
||||
|
||||
factory.registerFunction<FunctionTryIdnaEncode>(FunctionDocumentation{
|
||||
.description=R"(
|
||||
Computes a ASCII representation of an Internationalized Domain Name. Returns an empty string in case of error)",
|
||||
.syntax="punycodeEncode(str)",
|
||||
.arguments={{"str", "Input string"}},
|
||||
.returned_value="An ASCII-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
|
||||
.examples={
|
||||
{"simple",
|
||||
"SELECT idnaEncodeOrNull('München') AS ascii;",
|
||||
R"(
|
||||
┌─ascii───────────────────────────┐
|
||||
│ xn--strae-oqa.xn--mnchen-3ya.de │
|
||||
└─────────────────────────────────┘
|
||||
)"
|
||||
}}
|
||||
});
|
||||
|
||||
factory.registerFunction<FunctionIdnaDecode>(FunctionDocumentation{
|
||||
.description=R"(
|
||||
Computes the Unicode representation of ASCII-encoded Internationalized Domain Name.)",
|
||||
.syntax="idnaDecode(str)",
|
||||
.arguments={{"str", "Input string"}},
|
||||
.returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
|
||||
.examples={
|
||||
{"simple",
|
||||
"SELECT idnaDecode('xn--strae-oqa.xn--mnchen-3ya.de') AS unicode;",
|
||||
R"(
|
||||
┌─unicode───────────┐
|
||||
│ straße.münchen.de │
|
||||
└───────────────────┘
|
||||
)"
|
||||
}}
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
206
src/Functions/punycode.cpp
Normal file
206
src/Functions/punycode.cpp
Normal file
@ -0,0 +1,206 @@
|
||||
#include "config.h"
|
||||
|
||||
#if USE_IDNA
|
||||
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wnewline-eof"
|
||||
#endif
|
||||
# include <ada/idna/punycode.h>
|
||||
# include <ada/idna/unicode_transcoding.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
/// Implementation of
|
||||
/// - punycodeEncode(), punycodeDecode() and tryPunycodeDecode(), see https://en.wikipedia.org/wiki/Punycode
|
||||
|
||||
enum class ErrorHandling
|
||||
{
|
||||
Throw, /// Throw exception
|
||||
Empty /// Return empty string
|
||||
};
|
||||
|
||||
|
||||
struct PunycodeEncode
|
||||
{
|
||||
/// Encoding-as-punycode can only fail if the input isn't valid UTF8. In that case, return undefined output, i.e. garbage-in, garbage-out.
|
||||
static void vector(
|
||||
const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
const size_t rows = offsets.size();
|
||||
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
|
||||
res_offsets.reserve(rows);
|
||||
|
||||
size_t prev_offset = 0;
|
||||
std::u32string value_utf32;
|
||||
std::string value_puny;
|
||||
for (size_t row = 0; row < rows; ++row)
|
||||
{
|
||||
const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
|
||||
const size_t value_length = offsets[row] - prev_offset - 1;
|
||||
|
||||
const size_t value_utf32_length = ada::idna::utf32_length_from_utf8(value, value_length);
|
||||
value_utf32.resize(value_utf32_length);
|
||||
const size_t codepoints = ada::idna::utf8_to_utf32(value, value_length, value_utf32.data());
|
||||
if (codepoints == 0)
|
||||
value_utf32.clear(); /// input was empty or no valid UTF-8
|
||||
|
||||
const bool ok = ada::idna::utf32_to_punycode(value_utf32, value_puny);
|
||||
if (!ok)
|
||||
value_puny.clear();
|
||||
|
||||
res_data.insert(value_puny.c_str(), value_puny.c_str() + value_puny.size() + 1);
|
||||
res_offsets.push_back(res_data.size());
|
||||
|
||||
prev_offset = offsets[row];
|
||||
|
||||
value_utf32.clear();
|
||||
value_puny.clear(); /// utf32_to_punycode() appends to its output string
|
||||
}
|
||||
}
|
||||
|
||||
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <ErrorHandling error_handling>
|
||||
struct PunycodeDecode
|
||||
{
|
||||
static void vector(
|
||||
const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
const size_t rows = offsets.size();
|
||||
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
|
||||
res_offsets.reserve(rows);
|
||||
|
||||
size_t prev_offset = 0;
|
||||
std::u32string value_utf32;
|
||||
std::string value_utf8;
|
||||
for (size_t row = 0; row < rows; ++row)
|
||||
{
|
||||
const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
|
||||
const size_t value_length = offsets[row] - prev_offset - 1;
|
||||
|
||||
const std::string_view value_punycode(value, value_length);
|
||||
const bool ok = ada::idna::punycode_to_utf32(value_punycode, value_utf32);
|
||||
if (!ok)
|
||||
{
|
||||
if constexpr (error_handling == ErrorHandling::Throw)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' is not a valid Punycode-encoded string", value_punycode);
|
||||
}
|
||||
else
|
||||
{
|
||||
static_assert(error_handling == ErrorHandling::Empty);
|
||||
value_utf32.clear();
|
||||
}
|
||||
}
|
||||
|
||||
const size_t utf8_length = ada::idna::utf8_length_from_utf32(value_utf32.data(), value_utf32.size());
|
||||
value_utf8.resize(utf8_length);
|
||||
ada::idna::utf32_to_utf8(value_utf32.data(), value_utf32.size(), value_utf8.data());
|
||||
|
||||
res_data.insert(value_utf8.c_str(), value_utf8.c_str() + value_utf8.size() + 1);
|
||||
res_offsets.push_back(res_data.size());
|
||||
|
||||
prev_offset = offsets[row];
|
||||
|
||||
value_utf32.clear(); /// punycode_to_utf32() appends to its output string
|
||||
value_utf8.clear();
|
||||
}
|
||||
}
|
||||
|
||||
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
|
||||
}
|
||||
};
|
||||
|
||||
struct NamePunycodeEncode { static constexpr auto name = "punycodeEncode"; };
|
||||
struct NamePunycodeDecode { static constexpr auto name = "punycodeDecode"; };
|
||||
struct NameTryPunycodeDecode { static constexpr auto name = "tryPunycodeDecode"; };
|
||||
|
||||
using FunctionPunycodeEncode = FunctionStringToString<PunycodeEncode, NamePunycodeEncode>;
|
||||
using FunctionPunycodeDecode = FunctionStringToString<PunycodeDecode<ErrorHandling::Throw>, NamePunycodeDecode>;
|
||||
using FunctionTryPunycodeDecode = FunctionStringToString<PunycodeDecode<ErrorHandling::Empty>, NameTryPunycodeDecode>;
|
||||
|
||||
REGISTER_FUNCTION(Punycode)
|
||||
{
|
||||
factory.registerFunction<FunctionPunycodeEncode>(FunctionDocumentation{
|
||||
.description=R"(
|
||||
Computes a Punycode representation of a string.)",
|
||||
.syntax="punycodeEncode(str)",
|
||||
.arguments={{"str", "Input string"}},
|
||||
.returned_value="The punycode representation [String](/docs/en/sql-reference/data-types/string.md).",
|
||||
.examples={
|
||||
{"simple",
|
||||
"SELECT punycodeEncode('München') AS puny;",
|
||||
R"(
|
||||
┌─puny───────┐
|
||||
│ Mnchen-3ya │
|
||||
└────────────┘
|
||||
)"
|
||||
}}
|
||||
});
|
||||
|
||||
factory.registerFunction<FunctionPunycodeDecode>(FunctionDocumentation{
|
||||
.description=R"(
|
||||
Computes a Punycode representation of a string. Throws an exception if the input is not valid Punycode.)",
|
||||
.syntax="punycodeDecode(str)",
|
||||
.arguments={{"str", "A Punycode-encoded string"}},
|
||||
.returned_value="The plaintext representation [String](/docs/en/sql-reference/data-types/string.md).",
|
||||
.examples={
|
||||
{"simple",
|
||||
"SELECT punycodeDecode('Mnchen-3ya') AS plain;",
|
||||
R"(
|
||||
┌─plain───┐
|
||||
│ München │
|
||||
└─────────┘
|
||||
)"
|
||||
}}
|
||||
});
|
||||
|
||||
factory.registerFunction<FunctionTryPunycodeDecode>(FunctionDocumentation{
|
||||
.description=R"(
|
||||
Computes a Punycode representation of a string. Returns an empty string if the input is not valid Punycode.)",
|
||||
.syntax="punycodeDecode(str)",
|
||||
.arguments={{"str", "A Punycode-encoded string"}},
|
||||
.returned_value="The plaintext representation [String](/docs/en/sql-reference/data-types/string.md).",
|
||||
.examples={
|
||||
{"simple",
|
||||
"SELECT tryPunycodeDecode('Mnchen-3ya') AS plain;",
|
||||
R"(
|
||||
┌─plain───┐
|
||||
│ München │
|
||||
└─────────┘
|
||||
)"
|
||||
}}
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -129,6 +129,9 @@ endif()
|
||||
if (TARGET ch_contrib::sqids)
|
||||
set(USE_SQIDS 1)
|
||||
endif()
|
||||
if (TARGET ch_contrib::idna)
|
||||
set(USE_IDNA 1)
|
||||
endif()
|
||||
if (TARGET ch_contrib::vectorscan)
|
||||
set(USE_VECTORSCAN 1)
|
||||
endif()
|
||||
|
88
tests/queries/0_stateless/02932_idna.reference
Normal file
88
tests/queries/0_stateless/02932_idna.reference
Normal file
@ -0,0 +1,88 @@
|
||||
-- Negative tests
|
||||
-- Regular cases
|
||||
straße.de xn--strae-oqa.de xn--strae-oqa.de straße.de straße.de
|
||||
2001:4860:4860::8888 2001:4860:4860::8888 2001:4860:4860::8888 2001:4860:4860::8888 2001:4860:4860::8888
|
||||
AMAZON amazon amazon amazon amazon
|
||||
aa-- aa-- aa-- aa-- aa--
|
||||
a†-- xn--a---kp0a xn--a---kp0a a†-- a†--
|
||||
ab--c ab--c ab--c ab--c ab--c
|
||||
-† xn----xhn xn----xhn -† -†
|
||||
-x.xn--zca -x.xn--zca -x.xn--zca -x.ß -x.ß
|
||||
x-.xn--zca x-.xn--zca x-.xn--zca x-.ß x-.ß
|
||||
x-.ß x-.xn--zca x-.xn--zca x-.ß x-.ß
|
||||
x..ß x..xn--zca x..xn--zca x..ß x..ß
|
||||
128.0,0.1 128.0,0.1 128.0,0.1 128.0,0.1 128.0,0.1
|
||||
xn--zca.xn--zca xn--zca.xn--zca xn--zca.xn--zca ß.ß ß.ß
|
||||
xn--zca.ß xn--zca.xn--zca xn--zca.xn--zca ß.ß ß.ß
|
||||
x01234567890123456789012345678901234567890123456789012345678901x x01234567890123456789012345678901234567890123456789012345678901x x01234567890123456789012345678901234567890123456789012345678901x x01234567890123456789012345678901234567890123456789012345678901x x01234567890123456789012345678901234567890123456789012345678901x
|
||||
x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.ß x01234567890123456789012345678901234567890123456789012345678901x.ß
|
||||
x01234567890123456789012345678901234567890123456789012345678901x.ß x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.ß x01234567890123456789012345678901234567890123456789012345678901x.ß
|
||||
01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x 01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x 01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x 01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x 01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x
|
||||
≠ xn--1ch xn--1ch ≠ ≠
|
||||
aa-- aa-- aa-- aa--
|
||||
ab--c ab--c ab--c ab--c
|
||||
-x -x -x -x
|
||||
|
||||
xn--1ch ≠ xn--1ch xn--1ch
|
||||
xn--dqd20apc ᄎᆞᆷ xn--dqd20apc xn--dqd20apc
|
||||
xn--gdh ≮ xn--gdh xn--gdh
|
||||
xn--80aaa0ahbbeh4c йайзаакпий xn--80aaa0ahbbeh4c xn--80aaa0ahbbeh4c
|
||||
xn--3bs854c 团淄 xn--3bs854c xn--3bs854c
|
||||
xn--mgb9awbf عمان xn--mgb9awbf xn--mgb9awbf
|
||||
xn--mgbaam7a8h امارات xn--mgbaam7a8h xn--mgbaam7a8h
|
||||
xn--mgbbh1a71e بھارت xn--mgbbh1a71e xn--mgbbh1a71e
|
||||
xn--s7y.com 短.com xn--s7y.com xn--s7y.com
|
||||
xn--55qx5d.xn--tckwe 公司.コム xn--55qx5d.xn--tckwe xn--55qx5d.xn--tckwe
|
||||
xn--4dbrk0ce ישראל xn--4dbrk0ce xn--4dbrk0ce
|
||||
xn--zckzah テスト xn--zckzah xn--zckzah
|
||||
xn--p1ai.com рф.com xn--p1ai.com xn--p1ai.com
|
||||
xn--mxahbxey0c.gr εχαμπλε.gr xn--mxahbxey0c.gr xn--mxahbxey0c.gr
|
||||
xn--h2brj9c भारत xn--h2brj9c xn--h2brj9c
|
||||
xn--d1acpjx3f.xn--p1ai яндекс.рф xn--d1acpjx3f.xn--p1ai xn--d1acpjx3f.xn--p1ai
|
||||
xn--q9jyb4c みんな xn--q9jyb4c xn--q9jyb4c
|
||||
xn--sterreich-z7a.at österreich.at xn--sterreich-z7a.at xn--sterreich-z7a.at
|
||||
xn--h2breg3eve.xn--h2brj9c भारतम्.भारत xn--h2breg3eve.xn--h2brj9c xn--h2breg3eve.xn--h2brj9c
|
||||
ejemplo.xn--q9jyb4c ejemplo.みんな ejemplo.xn--q9jyb4c ejemplo.xn--q9jyb4c
|
||||
xn--9t4b11yi5a.com 테스트.com xn--9t4b11yi5a.com xn--9t4b11yi5a.com
|
||||
xn--gk3at1e.com 通販.com xn--gk3at1e.com xn--gk3at1e.com
|
||||
xn--42c2d9a คอม xn--42c2d9a xn--42c2d9a
|
||||
1xn-- 1xn-- 1xn-- 1xn--
|
||||
xn--bih.com ⌘.com xn--bih.com xn--bih.com
|
||||
xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c موقع.وزارة-الأتصالات.مصر xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c
|
||||
xn--mgbb9fbpob موبايلي xn--mgbb9fbpob xn--mgbb9fbpob
|
||||
xn--55qw42g.xn--55qw42g 公益.公益 xn--55qw42g.xn--55qw42g xn--55qw42g.xn--55qw42g
|
||||
≠ ≠ xn--1ch xn--1ch
|
||||
ファッション.biz ファッション.biz xn--bck1b9a5dre4c.biz xn--bck1b9a5dre4c.biz
|
||||
-- Special cases
|
||||
---- Empty input
|
||||
|
||||
|
||||
|
||||
---- NULL input
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
---- Garbage inputs for idnaEncode
|
||||
|
||||
|
||||
|
||||
|
||||
---- Long input
|
||||
Row 1:
|
||||
──────
|
||||
idna: Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
|
||||
ascii: wenn sie ...xn-- vom hauptbahnhof in mnchen -n7c...xn-- mit zehn minuten, ohne, dass sie am flughafen noch einchecken mssen, dann starten sie im grunde genommen am flughafen -8gm... am ...xn-- am hauptbahnhof in mnchen starten sie ihren flug-0cf. zehn minuten.xn-- schauen sie sich mal die groen flughfen an, wenn sie in heathrow in london oder sonst wo, meine se -83h23c...xn-- charles de gaulle h in frankreich oder in -jvd...xn--h-zfa... in ... in...xn--h-zfa...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ...xn-- an den flughafen franz josef strau-z2c.xn-- dann starten sie praktisch hier am hauptbahnhof in mnchen-t9f.xn-- das bedeutet natrlich, dass der hauptbahnhof im grunde genommen nher an bayern -lxg23q...xn-- an die bayerischen stdte heranwchst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen-1hkk.
|
||||
ascii_try: wenn sie ...xn-- vom hauptbahnhof in mnchen -n7c...xn-- mit zehn minuten, ohne, dass sie am flughafen noch einchecken mssen, dann starten sie im grunde genommen am flughafen -8gm... am ...xn-- am hauptbahnhof in mnchen starten sie ihren flug-0cf. zehn minuten.xn-- schauen sie sich mal die groen flughfen an, wenn sie in heathrow in london oder sonst wo, meine se -83h23c...xn-- charles de gaulle h in frankreich oder in -jvd...xn--h-zfa... in ... in...xn--h-zfa...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ...xn-- an den flughafen franz josef strau-z2c.xn-- dann starten sie praktisch hier am hauptbahnhof in mnchen-t9f.xn-- das bedeutet natrlich, dass der hauptbahnhof im grunde genommen nher an bayern -lxg23q...xn-- an die bayerischen stdte heranwchst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen-1hkk.
|
||||
original: wenn sie ... vom hauptbahnhof in münchen ... mit zehn minuten, ohne, dass sie am flughafen noch einchecken müssen, dann starten sie im grunde genommen am flughafen ... am ... am hauptbahnhof in münchen starten sie ihren flug. zehn minuten. schauen sie sich mal die großen flughäfen an, wenn sie in heathrow in london oder sonst wo, meine se ... charles de gaulle äh in frankreich oder in ...äh... in ... in...äh...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ... an den flughafen franz josef strauß. dann starten sie praktisch hier am hauptbahnhof in münchen. das bedeutet natürlich, dass der hauptbahnhof im grunde genommen näher an bayern ... an die bayerischen städte heranwächst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen.
|
||||
original_try: wenn sie ... vom hauptbahnhof in münchen ... mit zehn minuten, ohne, dass sie am flughafen noch einchecken müssen, dann starten sie im grunde genommen am flughafen ... am ... am hauptbahnhof in münchen starten sie ihren flug. zehn minuten. schauen sie sich mal die großen flughäfen an, wenn sie in heathrow in london oder sonst wo, meine se ... charles de gaulle äh in frankreich oder in ...äh... in ... in...äh...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ... an den flughafen franz josef strauß. dann starten sie praktisch hier am hauptbahnhof in münchen. das bedeutet natürlich, dass der hauptbahnhof im grunde genommen näher an bayern ... an die bayerischen städte heranwächst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen.
|
||||
---- Non-const input
|
||||
|
||||
münchen xn--mnchen-3ya xn--mnchen-3ya münchen münchen
|
||||
straße.münchen.de xn--strae-oqa.xn--mnchen-3ya.de xn--strae-oqa.xn--mnchen-3ya.de straße.münchen.de straße.münchen.de
|
||||
---- Non-const input with invalid values sprinkled in
|
||||
london.co.uk london.co.uk london.co.uk
|
||||
microsoft.com microsoft.com microsoft.com
|
||||
xn--
|
||||
xn--
|
||||
xn--tešla
|
||||
ytraße.münchen.de xn--ytrae-oqa.xn--mnchen-3ya.de ytraße.münchen.de
|
124
tests/queries/0_stateless/02932_idna.sql
Normal file
124
tests/queries/0_stateless/02932_idna.sql
Normal file
@ -0,0 +1,124 @@
|
||||
-- Tags: no-fasttest
|
||||
-- no-fasttest: requires idna library
|
||||
|
||||
-- See also 02932_punycode.sql
|
||||
|
||||
SELECT '-- Negative tests';
|
||||
|
||||
SELECT idnaEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||
SELECT tryIdnaEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||
SELECT idnaDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||
|
||||
SELECT idnaEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||
SELECT tryIdnaEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||
SELECT idnaDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||
|
||||
SELECT idnaEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||
SELECT tryIdnaEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||
SELECT idnaDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||
|
||||
SELECT idnaEncode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED }
|
||||
SELECT tryIdnaEncode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED }
|
||||
SELECT idnaDecode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED }
|
||||
|
||||
SELECT '-- Regular cases';
|
||||
|
||||
-- The test cases originate from the ada idna unit tests:
|
||||
-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/to_ascii_alternating.txt
|
||||
-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/to_unicode_alternating.txt
|
||||
--
|
||||
SELECT 'straße.de' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
|
||||
SELECT '2001:4860:4860::8888' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
|
||||
SELECT 'AMAZON' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
|
||||
SELECT 'aa--' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
|
||||
SELECT 'a†--' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
|
||||
SELECT 'ab--c' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
|
||||
SELECT '-†' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
|
||||
SELECT '-x.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
|
||||
SELECT 'x-.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
|
||||
SELECT 'x-.ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
|
||||
SELECT 'x..ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
|
||||
SELECT '128.0,0.1' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
|
||||
SELECT 'xn--zca.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
|
||||
SELECT 'xn--zca.ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
|
||||
SELECT 'x01234567890123456789012345678901234567890123456789012345678901x' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
|
||||
SELECT 'x01234567890123456789012345678901234567890123456789012345678901x.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
|
||||
SELECT 'x01234567890123456789012345678901234567890123456789012345678901x.ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
|
||||
SELECT '01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
|
||||
SELECT '≠' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
|
||||
|
||||
SELECT 'aa--' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'ab--c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT '-x' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT '' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--1ch' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--dqd20apc' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--gdh' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--80aaa0ahbbeh4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--3bs854c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--mgb9awbf' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--mgbaam7a8h' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--mgbbh1a71e' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--s7y.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--55qx5d.xn--tckwe' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--4dbrk0ce' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--zckzah' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--p1ai.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--mxahbxey0c.gr' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--h2brj9c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--d1acpjx3f.xn--p1ai' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--q9jyb4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--sterreich-z7a.at' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--h2breg3eve.xn--h2brj9c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'ejemplo.xn--q9jyb4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--9t4b11yi5a.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--gk3at1e.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--42c2d9a' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT '1xn--' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--bih.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--mgbb9fbpob' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'xn--55qw42g.xn--55qw42g' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT '≠' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
SELECT 'ファッション.biz' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
|
||||
--
|
||||
SELECT '-- Special cases';
|
||||
|
||||
SELECT '---- Empty input';
|
||||
SELECT idnaEncode('');
|
||||
SELECT tryIdnaEncode('');
|
||||
SELECT idnaDecode('');
|
||||
|
||||
SELECT '---- NULL input';
|
||||
SELECT idnaEncode(NULL);
|
||||
SELECT tryIdnaEncode(NULL);
|
||||
SELECT idnaDecode(NULL);
|
||||
|
||||
SELECT '---- Garbage inputs for idnaEncode';
|
||||
-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/to_ascii_invalid.txt
|
||||
SELECT idnaEncode('xn--'); -- { serverError BAD_ARGUMENTS }
|
||||
SELECT tryIdnaEncode('xn--');
|
||||
SELECT idnaEncode('ﻱa'); -- { serverError BAD_ARGUMENTS }
|
||||
SELECT tryIdnaEncode('ﻱa');
|
||||
SELECT idnaEncode('xn--a-yoc'); -- { serverError BAD_ARGUMENTS }
|
||||
SELECT tryIdnaEncode('xn--a-yoc');
|
||||
SELECT idnaEncode('xn--tešla'); -- { serverError BAD_ARGUMENTS }
|
||||
SELECT tryIdnaEncode('xn--tešla');
|
||||
|
||||
SELECT '---- Long input';
|
||||
SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(ascii) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try FORMAT Vertical;
|
||||
|
||||
SELECT '---- Non-const input';
|
||||
DROP TABLE IF EXISTS tab;
|
||||
CREATE TABLE tab (idna String) ENGINE=MergeTree ORDER BY idna;
|
||||
INSERT INTO tab VALUES ('straße.münchen.de') ('') ('münchen');
|
||||
SELECT idna, idnaEncode(idna) AS ascii, tryIdnaEncode(ascii) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try FROM tab;
|
||||
DROP TABLE tab;
|
||||
|
||||
SELECT '---- Non-const input with invalid values sprinkled in';
|
||||
DROP TABLE IF EXISTS tab;
|
||||
CREATE TABLE tab (idna String) ENGINE=MergeTree ORDER BY idna;
|
||||
INSERT INTO tab VALUES ('xn--') ('london.co.uk') ('ytraße.münchen.de') ('xn--tešla') ('microsoft.com') ('xn--');
|
||||
SELECT idna, idnaEncode(idna) AS ascii FROM tab; -- { serverError BAD_ARGUMENTS }
|
||||
SELECT idna, tryIdnaEncode(idna) AS ascii, idnaDecode(ascii) AS original FROM tab;
|
||||
DROP TABLE tab;
|
55
tests/queries/0_stateless/02932_punycode.reference
Normal file
55
tests/queries/0_stateless/02932_punycode.reference
Normal file
@ -0,0 +1,55 @@
|
||||
-- Negative tests
|
||||
-- Regular cases
|
||||
a a- a a
|
||||
A A- A A
|
||||
-- --- -- --
|
||||
London London- London London
|
||||
Lloyd-Atkinson Lloyd-Atkinson- Lloyd-Atkinson Lloyd-Atkinson
|
||||
This has spaces This has spaces- This has spaces This has spaces
|
||||
-> $1.00 <- -> $1.00 <-- -> $1.00 <- -> $1.00 <-
|
||||
а 80a а а
|
||||
ü tda ü ü
|
||||
α mxa α α
|
||||
例 fsq 例 例
|
||||
😉 n28h 😉 😉
|
||||
αβγ mxacd αβγ αβγ
|
||||
München Mnchen-3ya München München
|
||||
Mnchen-3ya Mnchen-3ya- Mnchen-3ya Mnchen-3ya
|
||||
München-Ost Mnchen-Ost-9db München-Ost München-Ost
|
||||
Bahnhof München-Ost Bahnhof Mnchen-Ost-u6b Bahnhof München-Ost Bahnhof München-Ost
|
||||
abæcdöef abcdef-qua4k abæcdöef abæcdöef
|
||||
правда 80aafi6cg правда правда
|
||||
ยจฆฟคฏข 22cdfh1b8fsa ยจฆฟคฏข ยจฆฟคฏข
|
||||
ドメイン名例 eckwd4c7cu47r2wf ドメイン名例 ドメイン名例
|
||||
MajiでKoiする5秒前 MajiKoi5-783gue6qz075azm5e MajiでKoiする5秒前 MajiでKoiする5秒前
|
||||
「bücher」 bcher-kva8445foa 「bücher」 「bücher」
|
||||
团淄 3bs854c 团淄 团淄
|
||||
-- Special cases
|
||||
---- Empty input
|
||||
|
||||
|
||||
|
||||
---- NULL input
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
---- Garbage Punycode-encoded input
|
||||
|
||||
---- Long input
|
||||
Row 1:
|
||||
──────
|
||||
str: Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
|
||||
puny: Wenn Sie ... vom Hauptbahnhof in Mnchen ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken mssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in Mnchen starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die groen Flughfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle h in Frankreich oder in ...h... in ... in...h...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strau. Dann starten Sie praktisch hier am Hauptbahnhof in Mnchen. Das bedeutet natrlich, dass der Hauptbahnhof im Grunde genommen nher an Bayern ... an die bayerischen Stdte heranwchst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.-pu7fjtp0npc1ar54cibk471wdc9d18axa
|
||||
original: Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
|
||||
original_try: Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
|
||||
---- Non-const values
|
||||
München Mnchen-3ya München München
|
||||
abc abc- abc abc
|
||||
aäoöuü aou-qla5gqb aäoöuü aäoöuü
|
||||
---- Non-const values with invalid values sprinkled in
|
||||
Also no punycode
|
||||
London- London
|
||||
Mnchen-3ya München
|
||||
No punycode
|
||||
Rtting-3ya Rütting
|
||||
XYZ no punycode
|
86
tests/queries/0_stateless/02932_punycode.sql
Normal file
86
tests/queries/0_stateless/02932_punycode.sql
Normal file
@ -0,0 +1,86 @@
|
||||
-- Tags: no-fasttest
|
||||
-- no-fasttest: requires idna library
|
||||
|
||||
-- See also 02932_idna.sql
|
||||
|
||||
SELECT '-- Negative tests';
|
||||
|
||||
SELECT punycodeEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||
SELECT punycodeDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||
SELECT tryPunycodeDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||
|
||||
SELECT punycodeEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||
SELECT punycodeDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||
SELECT tryPunycodeDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||
|
||||
SELECT punycodeEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||
SELECT punycodeDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||
SELECT tryPunycodeDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||
|
||||
SELECT punycodeEncode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED }
|
||||
SELECT punycodeDecode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED }
|
||||
SELECT tryPunycodeDecode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED }
|
||||
|
||||
SELECT '-- Regular cases';
|
||||
|
||||
-- The test cases originate from the ada idna unit tests:
|
||||
-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/utf8_punycode_alternating.txt
|
||||
|
||||
SELECT 'a' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT 'A' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT '--' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT 'London' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT 'Lloyd-Atkinson' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT 'This has spaces' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT '-> $1.00 <-' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT 'а' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT 'ü' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT 'α' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT '例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT '😉' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT 'αβγ' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT 'München' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT 'Mnchen-3ya' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT 'München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT 'Bahnhof München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT 'abæcdöef' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT 'правда' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT 'ยจฆฟคฏข' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT 'ドメイン名例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT 'MajiでKoiする5秒前' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT '「bücher」' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
SELECT '团淄' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
|
||||
--
|
||||
SELECT '-- Special cases';
|
||||
|
||||
SELECT '---- Empty input';
|
||||
SELECT punycodeEncode('');
|
||||
SELECT punycodeDecode('');
|
||||
SELECT tryPunycodeDecode('');
|
||||
|
||||
SELECT '---- NULL input';
|
||||
SELECT punycodeEncode(NULL);
|
||||
SELECT punycodeDecode(NULL);
|
||||
SELECT tryPunycodeDecode(NULL);
|
||||
|
||||
SELECT '---- Garbage Punycode-encoded input';
|
||||
SELECT punycodeDecode('no punycode'); -- { serverError BAD_ARGUMENTS }
|
||||
SELECT tryPunycodeDecode('no punycode');
|
||||
|
||||
SELECT '---- Long input';
|
||||
SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try FORMAT Vertical;
|
||||
|
||||
SELECT '---- Non-const values';
|
||||
DROP TABLE IF EXISTS tab;
|
||||
CREATE TABLE tab (str String) ENGINE=MergeTree ORDER BY str;
|
||||
INSERT INTO tab VALUES ('abc') ('aäoöuü') ('München');
|
||||
SELECT str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try FROM tab;
|
||||
DROP TABLE tab;
|
||||
|
||||
SELECT '---- Non-const values with invalid values sprinkled in';
|
||||
DROP TABLE IF EXISTS tab;
|
||||
CREATE TABLE tab (puny String) ENGINE=MergeTree ORDER BY puny;
|
||||
INSERT INTO tab VALUES ('Also no punycode') ('London-') ('Mnchen-3ya') ('No punycode') ('Rtting-3ya') ('XYZ no punycode');
|
||||
SELECT puny, punycodeDecode(puny) AS original FROM tab; -- { serverError BAD_ARGUMENTS }
|
||||
SELECT puny, tryPunycodeDecode(puny) AS original FROM tab;
|
||||
DROP TABLE tab;
|
@ -344,6 +344,7 @@ Hypot
|
||||
IANA
|
||||
IDE
|
||||
IDEs
|
||||
IDNA
|
||||
IMDS
|
||||
INFILE
|
||||
INSERTed
|
||||
@ -701,8 +702,6 @@ PrettySpaceMonoBlock
|
||||
PrettySpaceNoEscapes
|
||||
PrettySpaceNoEscapesMonoBlock
|
||||
Prewhere
|
||||
TotalPrimaryKeyBytesInMemory
|
||||
TotalPrimaryKeyBytesInMemoryAllocated
|
||||
PrivateKeyPassphraseHandler
|
||||
ProfileEvents
|
||||
Profiler
|
||||
@ -714,6 +713,7 @@ Promtail
|
||||
Protobuf
|
||||
ProtobufSingle
|
||||
ProxySQL
|
||||
Punycode
|
||||
PyArrow
|
||||
PyCharm
|
||||
QEMU
|
||||
@ -912,6 +912,7 @@ ThreadsInOvercommitTracker
|
||||
Timeunit
|
||||
TinyLog
|
||||
Tkachenko
|
||||
ToASCII
|
||||
ToCenterChild
|
||||
ToChildren
|
||||
ToGeo
|
||||
@ -920,10 +921,13 @@ ToIPv
|
||||
ToParent
|
||||
ToSnowflake
|
||||
ToString
|
||||
ToUnicode
|
||||
Toolset
|
||||
TopK
|
||||
TotalBytesOfMergeTreeTables
|
||||
TotalPartsOfMergeTreeTables
|
||||
TotalPrimaryKeyBytesInMemory
|
||||
TotalPrimaryKeyBytesInMemoryAllocated
|
||||
TotalRowsOfMergeTreeTables
|
||||
TotalTemporaryFiles
|
||||
Tradeoff
|
||||
@ -1651,6 +1655,8 @@ hyvor
|
||||
icosahedron
|
||||
icudata
|
||||
idempotency
|
||||
idnaDecode
|
||||
idnaEncode
|
||||
ifNotFinite
|
||||
ifNull
|
||||
iframe
|
||||
@ -1848,14 +1854,14 @@ metrica
|
||||
metroHash
|
||||
mfedotov
|
||||
minMap
|
||||
minSampleSizeContinuous
|
||||
minSampleSizeConversion
|
||||
mindsdb
|
||||
minimalistic
|
||||
mininum
|
||||
miniselect
|
||||
minmap
|
||||
minmax
|
||||
minSampleSizeContinuous
|
||||
minSampleSizeConversion
|
||||
mins
|
||||
misconfiguration
|
||||
mispredictions
|
||||
@ -2075,6 +2081,8 @@ pseudorandom
|
||||
pseudorandomize
|
||||
psql
|
||||
ptrs
|
||||
punycodeDecode
|
||||
punycodeEncode
|
||||
pushdown
|
||||
pwrite
|
||||
py
|
||||
@ -2521,6 +2529,8 @@ trimRight
|
||||
trunc
|
||||
tryBase
|
||||
tryDecrypt
|
||||
tryIdnaEncode
|
||||
tryPunycodeDecode
|
||||
tskv
|
||||
tsv
|
||||
tui
|
||||
|
Loading…
Reference in New Issue
Block a user