2021-09-27 15:45:04 +00:00
|
|
|
-- Tags: no-fasttest
|
|
|
|
|
2021-09-05 23:25:22 +00:00
|
|
|
DROP TABLE IF EXISTS normalize_test;
|
2021-09-27 15:45:04 +00:00
|
|
|
CREATE TABLE normalize_test (id int, value String) ENGINE = MergeTree ORDER BY value;
|
|
|
|
|
2021-09-05 23:25:22 +00:00
|
|
|
|
|
|
|
SELECT
|
2021-09-27 15:45:04 +00:00
|
|
|
'ё' AS norm, 'ё' AS denorm,
|
|
|
|
length(norm), length(denorm),
|
|
|
|
normalizeUTF8NFC(norm) AS norm_nfc,
|
|
|
|
normalizeUTF8NFC(denorm) AS denorm_nfc,
|
|
|
|
length(norm_nfc),
|
|
|
|
length(denorm_nfc);
|
|
|
|
|
|
|
|
|
|
|
|
INSERT INTO normalize_test (id, value) VALUES (1, 'ё');
|
|
|
|
INSERT INTO normalize_test (id, value) VALUES (2, 'ё');
|
|
|
|
INSERT INTO normalize_test (id, value) VALUES (3, 'జ్ఞా');
|
|
|
|
INSERT INTO normalize_test (id, value) VALUES (4, '本気ですか');
|
|
|
|
INSERT INTO normalize_test (id, value) VALUES (5, 'ﷺ');
|
|
|
|
INSERT INTO normalize_test (id, value) VALUES (6, 'ᾂ');
|
|
|
|
INSERT INTO normalize_test (id, value) VALUES (7, 'ΐ');
|
|
|
|
INSERT INTO normalize_test (id, value) VALUES (8, 'שּׁ');
|
|
|
|
INSERT INTO normalize_test (id, value) VALUES (9, '𝅘𝅥𝅮');
|
|
|
|
|
2021-09-05 23:25:22 +00:00
|
|
|
|
2021-09-27 15:45:04 +00:00
|
|
|
INSERT INTO normalize_test (id, value) VALUES (10, 'Q̹̣̩̭̰̰̹̄ͬ̿͋̃ṷ̬̰ͥe̘͚͈̰̺̍͐s͎̜̖t͔̣̯̲̜̠ͣ̑ͨ̉̈̈o̲͙̺͊ͯͣ̐̋̂̔ ̳͉͍̒̂è̗ͥͯͨ̍ͮ͛ ̦̹̣̰̐̅̑͑̅̂t͙̭̻̖͛̾e̺͙ͣ͒̚ṣ̠͉͓͔̲̦̎t̖͖̝͓̣ͭ͑̈́̂ỏ̥͕͈͛̓ ̀ͦ̽ͅZͯ̑̎a͆l̻ͨ̋ͧͣͨͬg͉̙̟̾̅̾ͬo̠ͮ͒');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SELECT
|
|
|
|
id, value, length(value),
|
|
|
|
normalizeUTF8NFC(value) AS nfc, length(nfc) AS nfc_len,
|
|
|
|
normalizeUTF8NFD(value) AS nfd, length(nfd) AS nfd_len,
|
|
|
|
normalizeUTF8NFKC(value) AS nfkc, length(nfkc) AS nfkc_len,
|
|
|
|
normalizeUTF8NFKD(value) AS nfkd, length(nfkd) AS nfkd_len
|
|
|
|
FROM normalize_test
|
|
|
|
ORDER BY id;
|
2021-09-05 23:25:22 +00:00
|
|
|
|
|
|
|
|
2024-05-30 12:27:25 +00:00
|
|
|
SELECT char(228) AS value, normalizeUTF8NFC(value); -- { serverError CANNOT_NORMALIZE_STRING }
|
|
|
|
SELECT char(228) AS value, normalizeUTF8NFD(value); -- { serverError CANNOT_NORMALIZE_STRING }
|
|
|
|
SELECT char(228) AS value, normalizeUTF8NFKC(value); -- { serverError CANNOT_NORMALIZE_STRING }
|
|
|
|
SELECT char(228) AS value, normalizeUTF8NFKD(value); -- { serverError CANNOT_NORMALIZE_STRING }
|