mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-09 17:14:47 +00:00
Revert "Revert "Implement punycode encoding/decoding""
This reverts commit 345d29a3c1
.
This commit is contained in:
parent
ce13b21d95
commit
2186aa8f21
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -360,3 +360,6 @@
|
||||
[submodule "contrib/sqids-cpp"]
|
||||
path = contrib/sqids-cpp
|
||||
url = https://github.com/sqids/sqids-cpp.git
|
||||
[submodule "contrib/idna"]
|
||||
path = contrib/idna
|
||||
url = https://github.com/ada-url/idna.git
|
||||
|
1
contrib/CMakeLists.txt
vendored
1
contrib/CMakeLists.txt
vendored
@ -154,6 +154,7 @@ add_contrib (libpqxx-cmake libpqxx)
|
||||
add_contrib (libpq-cmake libpq)
|
||||
add_contrib (nuraft-cmake NuRaft)
|
||||
add_contrib (fast_float-cmake fast_float)
|
||||
add_contrib (idna-cmake idna)
|
||||
add_contrib (datasketches-cpp-cmake datasketches-cpp)
|
||||
add_contrib (incbin-cmake incbin)
|
||||
add_contrib (sqids-cpp-cmake sqids-cpp)
|
||||
|
1
contrib/idna
vendored
Submodule
1
contrib/idna
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 3c8be01d42b75649f1ac9b697d0ef757eebfe667
|
24
contrib/idna-cmake/CMakeLists.txt
Normal file
24
contrib/idna-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,24 @@
|
||||
option(ENABLE_IDNA "Enable idna support" ${ENABLE_LIBRARIES})
|
||||
if ((NOT ENABLE_IDNA))
|
||||
message (STATUS "Not using idna")
|
||||
return()
|
||||
endif()
|
||||
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/idna")
|
||||
|
||||
set (SRCS
|
||||
"${LIBRARY_DIR}/src/idna.cpp"
|
||||
"${LIBRARY_DIR}/src/mapping.cpp"
|
||||
"${LIBRARY_DIR}/src/mapping_tables.cpp"
|
||||
"${LIBRARY_DIR}/src/normalization.cpp"
|
||||
"${LIBRARY_DIR}/src/normalization_tables.cpp"
|
||||
"${LIBRARY_DIR}/src/punycode.cpp"
|
||||
"${LIBRARY_DIR}/src/to_ascii.cpp"
|
||||
"${LIBRARY_DIR}/src/to_unicode.cpp"
|
||||
"${LIBRARY_DIR}/src/unicode_transcoding.cpp"
|
||||
"${LIBRARY_DIR}/src/validity.cpp"
|
||||
)
|
||||
|
||||
add_library (_idna ${SRCS})
|
||||
target_include_directories(_idna PUBLIC "${LIBRARY_DIR}/include")
|
||||
|
||||
add_library (ch_contrib::idna ALIAS _idna)
|
@ -1383,6 +1383,71 @@ Result:
|
||||
└──────────────────┘
|
||||
```
|
||||
|
||||
## punycodeEncode
|
||||
|
||||
Returns the [Punycode](https://en.wikipedia.org/wiki/Punycode) of a string.
|
||||
The string must be UTF8-encoded, otherwise results are undefined.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
punycodeEncode(val)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `val` - Input value. [String](../data-types/string.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A Punycode representation of the input value. [String](../data-types/string.md)
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
select punycodeEncode('München');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```result
|
||||
┌─punycodeEncode('München')─┐
|
||||
│ Mnchen-3ya │
|
||||
└───────────────────────────┘
|
||||
```
|
||||
|
||||
## punycodeDecode
|
||||
|
||||
Returns the UTF8-encoded plaintext of a [Punycode](https://en.wikipedia.org/wiki/Punycode)-encoded string.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
punycodeEncode(val)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `val` - Punycode-encoded string. [String](../data-types/string.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The plaintext of the input value. [String](../data-types/string.md)
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
select punycodeDecode('Mnchen-3ya');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```result
|
||||
┌─punycodeEncode('Mnchen-3ya')─┐
|
||||
│ München │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
## byteHammingDistance
|
||||
|
||||
Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings.
|
||||
|
@ -28,6 +28,7 @@
|
||||
#cmakedefine01 USE_S2_GEOMETRY
|
||||
#cmakedefine01 USE_FASTOPS
|
||||
#cmakedefine01 USE_SQIDS
|
||||
#cmakedefine01 USE_IDNA
|
||||
#cmakedefine01 USE_NLP
|
||||
#cmakedefine01 USE_VECTORSCAN
|
||||
#cmakedefine01 USE_LIBURING
|
||||
|
@ -83,6 +83,10 @@ if (TARGET ch_contrib::sqids)
|
||||
list (APPEND PRIVATE_LIBS ch_contrib::sqids)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::idna)
|
||||
list (APPEND PRIVATE_LIBS ch_contrib::idna)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::h3)
|
||||
list (APPEND PRIVATE_LIBS ch_contrib::h3)
|
||||
endif()
|
||||
|
@ -1,6 +1,6 @@
|
||||
#include "config.h"
|
||||
|
||||
#ifdef ENABLE_SQIDS
|
||||
#if USE_SQIDS
|
||||
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
|
165
src/Functions/punycode.cpp
Normal file
165
src/Functions/punycode.cpp
Normal file
@ -0,0 +1,165 @@
|
||||
#include "config.h"
|
||||
|
||||
#if USE_IDNA
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wnewline-eof"
|
||||
#endif
|
||||
# include <ada/idna/punycode.h>
|
||||
# include <ada/idna/unicode_transcoding.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
struct PunycodeEncodeImpl
|
||||
{
|
||||
static void vector(
|
||||
const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
const size_t rows = offsets.size();
|
||||
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
|
||||
res_offsets.reserve(rows);
|
||||
|
||||
size_t prev_offset = 0;
|
||||
std::u32string value_utf32;
|
||||
std::string value_puny;
|
||||
for (size_t row = 0; row < rows; ++row)
|
||||
{
|
||||
const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
|
||||
const size_t value_length = offsets[row] - prev_offset - 1;
|
||||
|
||||
const size_t value_utf32_length = ada::idna::utf32_length_from_utf8(value, value_length);
|
||||
value_utf32.resize(value_utf32_length);
|
||||
ada::idna::utf8_to_utf32(value, value_length, value_utf32.data());
|
||||
|
||||
const bool ok = ada::idna::utf32_to_punycode(value_utf32, value_puny);
|
||||
if (!ok)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Internal error during Punycode encoding");
|
||||
|
||||
res_data.insert(value_puny.c_str(), value_puny.c_str() + value_puny.size() + 1);
|
||||
res_offsets.push_back(res_data.size());
|
||||
|
||||
prev_offset = offsets[row];
|
||||
|
||||
value_utf32.clear();
|
||||
value_puny.clear(); /// utf32_to_punycode() appends to its output string
|
||||
}
|
||||
}
|
||||
|
||||
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by punycodeEncode function");
|
||||
}
|
||||
};
|
||||
|
||||
struct PunycodeDecodeImpl
|
||||
{
|
||||
static void vector(
|
||||
const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
const size_t rows = offsets.size();
|
||||
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
|
||||
res_offsets.reserve(rows);
|
||||
|
||||
size_t prev_offset = 0;
|
||||
std::u32string value_utf32;
|
||||
std::string value_utf8;
|
||||
for (size_t row = 0; row < rows; ++row)
|
||||
{
|
||||
const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
|
||||
const size_t value_length = offsets[row] - prev_offset - 1;
|
||||
|
||||
const std::string_view value_punycode(value, value_length);
|
||||
const bool ok = ada::idna::punycode_to_utf32(value_punycode, value_utf32);
|
||||
if (!ok)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Internal error during Punycode decoding");
|
||||
|
||||
const size_t utf8_length = ada::idna::utf8_length_from_utf32(value_utf32.data(), value_utf32.size());
|
||||
value_utf8.resize(utf8_length);
|
||||
ada::idna::utf32_to_utf8(value_utf32.data(), value_utf32.size(), value_utf8.data());
|
||||
|
||||
res_data.insert(value_utf8.c_str(), value_utf8.c_str() + value_utf8.size() + 1);
|
||||
res_offsets.push_back(res_data.size());
|
||||
|
||||
prev_offset = offsets[row];
|
||||
|
||||
value_utf32.clear(); /// punycode_to_utf32() appends to its output string
|
||||
value_utf8.clear();
|
||||
}
|
||||
}
|
||||
|
||||
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by punycodeDecode function");
|
||||
}
|
||||
};
|
||||
|
||||
struct NamePunycodeEncode
|
||||
{
|
||||
static constexpr auto name = "punycodeEncode";
|
||||
};
|
||||
|
||||
struct NamePunycodeDecode
|
||||
{
|
||||
static constexpr auto name = "punycodeDecode";
|
||||
};
|
||||
|
||||
REGISTER_FUNCTION(Punycode)
|
||||
{
|
||||
factory.registerFunction<FunctionStringToString<PunycodeEncodeImpl, NamePunycodeEncode>>(FunctionDocumentation{
|
||||
.description=R"(
|
||||
Computes a Punycode representation of a string.)",
|
||||
.syntax="punycodeEncode(str)",
|
||||
.arguments={{"str", "Input string"}},
|
||||
.returned_value="The punycode representation [String](/docs/en/sql-reference/data-types/string.md).",
|
||||
.examples={
|
||||
{"simple",
|
||||
"SELECT punycodeEncode('München') AS puny;",
|
||||
R"(
|
||||
┌─puny───────┐
|
||||
│ Mnchen-3ya │
|
||||
└────────────┘
|
||||
)"
|
||||
}}
|
||||
});
|
||||
|
||||
factory.registerFunction<FunctionStringToString<PunycodeDecodeImpl, NamePunycodeDecode>>(FunctionDocumentation{
|
||||
.description=R"(
|
||||
Computes a Punycode representation of a string.)",
|
||||
.syntax="punycodeDecode(str)",
|
||||
.arguments={{"str", "A Punycode-encoded string"}},
|
||||
.returned_value="The plaintext representation [String](/docs/en/sql-reference/data-types/string.md).",
|
||||
.examples={
|
||||
{"simple",
|
||||
"SELECT punycodeDecode('Mnchen-3ya') AS plain;",
|
||||
R"(
|
||||
┌─plain───┐
|
||||
│ München │
|
||||
└─────────┘
|
||||
)"
|
||||
}}
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -129,6 +129,9 @@ endif()
|
||||
if (TARGET ch_contrib::sqids)
|
||||
set(USE_SQIDS 1)
|
||||
endif()
|
||||
if (TARGET ch_contrib::idna)
|
||||
set(USE_IDNA 1)
|
||||
endif()
|
||||
if (TARGET ch_contrib::vectorscan)
|
||||
set(USE_VECTORSCAN 1)
|
||||
endif()
|
||||
|
35
tests/queries/0_stateless/02932_punycode.reference
Normal file
35
tests/queries/0_stateless/02932_punycode.reference
Normal file
@ -0,0 +1,35 @@
|
||||
-- Negative tests
|
||||
-- Regular cases
|
||||
a a- a
|
||||
A A- A
|
||||
-- --- --
|
||||
London London- London
|
||||
Lloyd-Atkinson Lloyd-Atkinson- Lloyd-Atkinson
|
||||
This has spaces This has spaces- This has spaces
|
||||
-> $1.00 <- -> $1.00 <-- -> $1.00 <-
|
||||
а 80a а
|
||||
ü tda ü
|
||||
α mxa α
|
||||
例 fsq 例
|
||||
😉 n28h 😉
|
||||
αβγ mxacd αβγ
|
||||
München Mnchen-3ya München
|
||||
Mnchen-3ya Mnchen-3ya- Mnchen-3ya
|
||||
München-Ost Mnchen-Ost-9db München-Ost
|
||||
Bahnhof München-Ost Bahnhof Mnchen-Ost-u6b Bahnhof München-Ost
|
||||
abæcdöef abcdef-qua4k abæcdöef
|
||||
правда 80aafi6cg правда
|
||||
ยจฆฟคฏข 22cdfh1b8fsa ยจฆฟคฏข
|
||||
ドメイン名例 eckwd4c7cu47r2wf ドメイン名例
|
||||
MajiでKoiする5秒前 MajiKoi5-783gue6qz075azm5e MajiでKoiする5秒前
|
||||
「bücher」 bcher-kva8445foa 「bücher」
|
||||
团淄 3bs854c 团淄
|
||||
-- Special cases
|
||||
|
||||
|
||||
\N
|
||||
\N
|
||||
Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen. Wenn Sie ... vom Hauptbahnhof in Mnchen ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken mssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in Mnchen starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die groen Flughfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle h in Frankreich oder in ...h... in ... in...h...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strau. Dann starten Sie praktisch hier am Hauptbahnhof in Mnchen. Das bedeutet natrlich, dass der Hauptbahnhof im Grunde genommen nher an Bayern ... an die bayerischen Stdte heranwchst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.-pu7fjtp0npc1ar54cibk471wdc9d18axa Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
|
||||
München Mnchen-3ya München
|
||||
abc abc- abc
|
||||
aäoöuü aou-qla5gqb aäoöuü
|
63
tests/queries/0_stateless/02932_punycode.sql
Normal file
63
tests/queries/0_stateless/02932_punycode.sql
Normal file
@ -0,0 +1,63 @@
|
||||
-- Tags: no-fasttest
|
||||
-- no-fasttest: requires idna library
|
||||
|
||||
SELECT '-- Negative tests';
|
||||
|
||||
SELECT punycodeDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||
SELECT punycodeEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||
SELECT punycodeDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||
SELECT punycodeEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||
SELECT punycodeDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||
SELECT punycodeEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||
SELECT punycodeDecode(toFixedString('two', 3)); -- { serverError ILLEGAL_COLUMN }
|
||||
SELECT punycodeEncode(toFixedString('two', 3)); -- { serverError ILLEGAL_COLUMN }
|
||||
|
||||
SELECT '-- Regular cases';
|
||||
|
||||
-- The test cases originate from the ada idna unit tests:
|
||||
--- https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/utf8_punycode_alternating.txt
|
||||
|
||||
SELECT 'a' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT 'A' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT '--' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT 'London' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT 'Lloyd-Atkinson' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT 'This has spaces' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT '-> $1.00 <-' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT 'а' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT 'ü' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT 'α' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT '例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT '😉' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT 'αβγ' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT 'München' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT 'Mnchen-3ya' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT 'München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT 'Bahnhof München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT 'abæcdöef' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT 'правда' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT 'ยจฆฟคฏข' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT 'ドメイン名例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT 'MajiでKoiする5秒前' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT '「bücher」' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
SELECT '团淄' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
|
||||
SELECT '-- Special cases';
|
||||
|
||||
SELECT punycodeDecode('');
|
||||
SELECT punycodeEncode('');
|
||||
SELECT punycodeDecode(NULL);
|
||||
SELECT punycodeEncode(NULL);
|
||||
|
||||
-- garbage Punycode-encoded values
|
||||
SELECT punycodeDecode('no punycode'); -- { serverError BAD_ARGUMENTS }
|
||||
|
||||
-- long input
|
||||
SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
|
||||
|
||||
-- non-const values
|
||||
DROP TABLE IF EXISTS tab;
|
||||
CREATE TABLE tab (str String) ENGINE=MergeTree ORDER BY str;
|
||||
INSERT INTO tab VALUES ('abc') ('aäoöuü') ('München');
|
||||
SELECT str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original FROM tab;
|
||||
DROP TABLE tab;
|
@ -714,6 +714,7 @@ Promtail
|
||||
Protobuf
|
||||
ProtobufSingle
|
||||
ProxySQL
|
||||
Punycode
|
||||
PyArrow
|
||||
PyCharm
|
||||
QEMU
|
||||
@ -2075,6 +2076,8 @@ pseudorandom
|
||||
pseudorandomize
|
||||
psql
|
||||
ptrs
|
||||
punycodeDecode
|
||||
punycodeEncode
|
||||
pushdown
|
||||
pwrite
|
||||
py
|
||||
|
Loading…
Reference in New Issue
Block a user