1 2BEGIN { 3 unless ('A' eq pack('U', 0x41)) { 4 print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n"; 5 exit 0; 6 } 7 unless (0x41 == unpack('U', 'A')) { 8 print "1..0 # Unicode::Normalize cannot get a Unicode code point\n"; 9 exit 0; 10 } 11} 12 13BEGIN { 14 if ($ENV{PERL_CORE}) { 15 chdir('t') if -d 't'; 16 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 17 } 18} 19 20######################### 21 22use strict; 23use warnings; 24BEGIN { $| = 1; print "1..72\n"; } 25my $count = 0; 26sub ok ($;$) { 27 my $p = my $r = shift; 28 if (@_) { 29 my $x = shift; 30 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 31 } 32 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 33} 34 35use Unicode::Normalize; 36 37ok(1); 38 39sub _pack_U { Unicode::Normalize::pack_U(@_) } 40sub _unpack_U { Unicode::Normalize::unpack_U(@_) } 41 42######################### 43 44ok(NFD(""), ""); 45ok(NFC(""), ""); 46ok(NFKD(""), ""); 47ok(NFKC(""), ""); 48 49ok(NFD("A"), "A"); 50ok(NFC("A"), "A"); 51ok(NFKD("A"), "A"); 52ok(NFKC("A"), "A"); 53 54# 9 55 56# don't modify the source 57my $sNFD = "\x{FA19}"; 58ok(NFD($sNFD), "\x{795E}"); 59ok($sNFD, "\x{FA19}"); 60 61my $sNFC = "\x{FA1B}"; 62ok(NFC($sNFC), "\x{798F}"); 63ok($sNFC, "\x{FA1B}"); 64 65my $sNFKD = "\x{FA1E}"; 66ok(NFKD($sNFKD), "\x{7FBD}"); 67ok($sNFKD, "\x{FA1E}"); 68 69my $sNFKC = "\x{FA26}"; 70ok(NFKC($sNFKC), "\x{90FD}"); 71ok($sNFKC, "\x{FA26}"); 72 73# 17 74 75sub hexNFC { 76 join " ", map sprintf("%04X", $_), 77 _unpack_U NFC _pack_U map hex, split ' ', shift; 78} 79sub hexNFD { 80 join " ", map sprintf("%04X", $_), 81 _unpack_U NFD _pack_U map hex, split ' ', shift; 82} 83 84ok(hexNFD("1E14 AC01"), "0045 0304 0300 1100 1161 11A8"); 85ok(hexNFD("AC00 AE00"), "1100 1161 1100 1173 11AF"); 86 87ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062"); 88ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); 89ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); 90ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01"); 91ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00"); 92ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF"); 93 94ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062"); 95ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); 96ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); 97ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); 98ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); 99ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); 100ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); 101ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000"); 102ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000"); 103 104ok(hexNFC("AC00 11A7"), "AC00 11A7"); 105ok(hexNFC("AC00 11A8"), "AC01"); 106ok(hexNFC("AC00 11A9"), "AC02"); 107ok(hexNFC("AC00 11C2"), "AC1B"); 108ok(hexNFC("AC00 11C3"), "AC00 11C3"); 109 110# 39 111 112# Test Cases from Public Review Issue #29: Normalization Issue 113# cf. http://www.unicode.org/review/pr-29.html 114ok(hexNFC("0B47 0300 0B3E"), "0B47 0300 0B3E"); 115ok(hexNFC("1100 0300 1161"), "1100 0300 1161"); 116ok(hexNFC("0B47 0B3E 0300"), "0B4B 0300"); 117ok(hexNFC("1100 1161 0300"), "AC00 0300"); 118ok(hexNFC("0B47 0300 0B3E 0327"), "0B47 0300 0B3E 0327"); 119ok(hexNFC("1100 0300 1161 0327"), "1100 0300 1161 0327"); 120 121ok(hexNFC("0300 0041"), "0300 0041"); 122ok(hexNFC("0300 0301 0041"), "0300 0301 0041"); 123ok(hexNFC("0301 0300 0041"), "0301 0300 0041"); 124ok(hexNFC("0000 0300 0000 0301"), "0000 0300 0000 0301"); 125ok(hexNFC("0000 0301 0000 0300"), "0000 0301 0000 0300"); 126 127ok(hexNFC("0327 0061 0300"), "0327 00E0"); 128ok(hexNFC("0301 0061 0300"), "0301 00E0"); 129ok(hexNFC("0315 0061 0300"), "0315 00E0"); 130ok(hexNFC("0000 0327 0061 0300"), "0000 0327 00E0"); 131ok(hexNFC("0000 0301 0061 0300"), "0000 0301 00E0"); 132ok(hexNFC("0000 0315 0061 0300"), "0000 0315 00E0"); 133 134# 56 135 136# NFC() and NFKC() should be unary. 137my $str11 = _pack_U(0x41, 0x0302, 0x0301, 0x62); 138my $str12 = _pack_U(0x1EA4, 0x62); 139ok(NFC $str11 eq $str12); 140ok(NFKC $str11 eq $str12); 141 142# NFD() and NFKD() should be unary. 143my $str21 = _pack_U(0xE0, 0xAC00); 144my $str22 = _pack_U(0x61, 0x0300, 0x1100, 0x1161); 145ok(NFD $str21 eq $str22); 146ok(NFKD $str21 eq $str22); 147 148# 60 149 150## Bug #53197: NFKC("\x{2000}") produces... 151 152ok(NFKC("\x{2002}") eq ' '); 153ok(NFKD("\x{2002}") eq ' '); 154ok(NFKC("\x{2000}") eq ' '); 155ok(NFKD("\x{2000}") eq ' '); 156 157ok(NFKC("\x{210C}") eq 'H'); 158ok(NFKD("\x{210C}") eq 'H'); 159ok(NFKC("\x{210D}") eq 'H'); 160ok(NFKD("\x{210D}") eq 'H'); 161 162ok(NFC("\x{F907}") eq "\x{9F9C}"); 163ok(NFD("\x{F907}") eq "\x{9F9C}"); 164ok(NFKC("\x{F907}") eq "\x{9F9C}"); 165ok(NFKD("\x{F907}") eq "\x{9F9C}"); 166 167# 72 168 169