xref: /openbsd-src/gnu/usr.bin/perl/dist/Unicode-Normalize/t/split.t (revision 256a93a44f36679bee503f12e49566c2183f6181)
15759b3d2Safresh1
25759b3d2Safresh1BEGIN {
35759b3d2Safresh1    if ($ENV{PERL_CORE}) {
45759b3d2Safresh1        chdir('t') if -d 't';
55759b3d2Safresh1        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
65759b3d2Safresh1    }
75759b3d2Safresh1}
85759b3d2Safresh1
95759b3d2Safresh1BEGIN {
105759b3d2Safresh1    unless (5.006001 <= $]) {
115759b3d2Safresh1	print "1..0 # skipped: Perl 5.6.1 or later".
125759b3d2Safresh1		" needed for this test\n";
135759b3d2Safresh1	exit;
145759b3d2Safresh1    }
155759b3d2Safresh1}
165759b3d2Safresh1
175759b3d2Safresh1#########################
185759b3d2Safresh1
195759b3d2Safresh1use strict;
205759b3d2Safresh1use warnings;
215759b3d2Safresh1BEGIN { $| = 1; print "1..34\n"; }
225759b3d2Safresh1my $count = 0;
23*256a93a4Safresh1sub ok { Unicode::Normalize::ok(\$count, @_) }
245759b3d2Safresh1
255759b3d2Safresh1use Unicode::Normalize qw(:all);
265759b3d2Safresh1
275759b3d2Safresh1ok(1);
285759b3d2Safresh1
29*256a93a4Safresh1sub _pack_U   { Unicode::Normalize::dot_t_pack_U(@_) }
30*256a93a4Safresh1sub _unpack_U { Unicode::Normalize::dot_t_unpack_U(@_) }
315759b3d2Safresh1
325759b3d2Safresh1#########################
335759b3d2Safresh1
345759b3d2Safresh1our $proc;    # before the last starter
355759b3d2Safresh1our $unproc;  # the last starter and after
365759b3d2Safresh1# If string has no starter, entire string is set to $unproc.
375759b3d2Safresh1
385759b3d2Safresh1($proc, $unproc) = splitOnLastStarter("");
395759b3d2Safresh1ok($proc,   "");
405759b3d2Safresh1ok($unproc, "");
415759b3d2Safresh1
425759b3d2Safresh1($proc, $unproc) = splitOnLastStarter("A");
435759b3d2Safresh1ok($proc,   "");
445759b3d2Safresh1ok($unproc, "A");
455759b3d2Safresh1
465759b3d2Safresh1($proc, $unproc) = splitOnLastStarter(_pack_U(0x41, 0x300, 0x327, 0x42));
475759b3d2Safresh1ok($proc,   _pack_U(0x41, 0x300, 0x327));
485759b3d2Safresh1ok($unproc, "B");
495759b3d2Safresh1
505759b3d2Safresh1($proc, $unproc) = splitOnLastStarter(_pack_U(0x4E00, 0x41, 0x301));
515759b3d2Safresh1ok($proc,   _pack_U(0x4E00));
525759b3d2Safresh1ok($unproc, _pack_U(0x41, 0x301));
535759b3d2Safresh1
545759b3d2Safresh1($proc, $unproc) = splitOnLastStarter(_pack_U(0x302, 0x301, 0x300));
555759b3d2Safresh1ok($proc,   "");
565759b3d2Safresh1ok($unproc, _pack_U(0x302, 0x301, 0x300));
575759b3d2Safresh1
585759b3d2Safresh1our $ka_grave = _pack_U(0x41, 0, 0x42, 0x304B, 0x300);
595759b3d2Safresh1our $dakuten  = _pack_U(0x3099);
605759b3d2Safresh1our $ga_grave = _pack_U(0x41, 0, 0x42, 0x304C, 0x300);
615759b3d2Safresh1
625759b3d2Safresh1our ($p, $u) = splitOnLastStarter($ka_grave);
635759b3d2Safresh1our $concat = $p . NFC($u.$dakuten);
645759b3d2Safresh1
655759b3d2Safresh1ok(NFC($ka_grave.$dakuten) eq $ga_grave);
665759b3d2Safresh1ok(NFC($ka_grave).NFC($dakuten) ne $ga_grave);
675759b3d2Safresh1ok($concat eq $ga_grave);
685759b3d2Safresh1
695759b3d2Safresh1# 14
705759b3d2Safresh1
715759b3d2Safresh1sub arraynorm {
725759b3d2Safresh1    my $form   = shift;
735759b3d2Safresh1    my @string = @_;
745759b3d2Safresh1    my $result = "";
755759b3d2Safresh1    my $unproc = "";
765759b3d2Safresh1    foreach my $str (@string) {
775759b3d2Safresh1        $unproc .= $str;
785759b3d2Safresh1        my $n = normalize($form, $unproc);
795759b3d2Safresh1        my($p, $u) = splitOnLastStarter($n);
805759b3d2Safresh1        $result .= $p;
815759b3d2Safresh1        $unproc  = $u;
825759b3d2Safresh1    }
835759b3d2Safresh1    $result .= $unproc;
845759b3d2Safresh1    return $result;
855759b3d2Safresh1}
865759b3d2Safresh1
875759b3d2Safresh1my $strD = "\x{3C9}\x{301}\x{1100}\x{1161}\x{11A8}\x{1100}\x{1161}\x{11AA}";
885759b3d2Safresh1my $strC = "\x{3CE}\x{AC01}\x{AC03}";
895759b3d2Safresh1my @str1 = (substr($strD,0,3), substr($strD,3,4), substr($strD,7));
905759b3d2Safresh1my @str2 = (substr($strD,0,1), substr($strD,1,3), substr($strD,4));
915759b3d2Safresh1ok($strC eq NFC($strD));
925759b3d2Safresh1ok($strD eq join('', @str1));
935759b3d2Safresh1ok($strC eq arraynorm('NFC', @str1));
945759b3d2Safresh1ok($strD eq join('', @str2));
955759b3d2Safresh1ok($strC eq arraynorm('NFC', @str2));
965759b3d2Safresh1
975759b3d2Safresh1my @strX = ("\x{300}\x{AC00}", "\x{11A8}");
985759b3d2Safresh1my $strX =  "\x{300}\x{AC01}";
995759b3d2Safresh1ok($strX eq NFC(join('', @strX)));
1005759b3d2Safresh1ok($strX eq arraynorm('NFC', @strX));
1015759b3d2Safresh1ok($strX eq NFKC(join('', @strX)));
1025759b3d2Safresh1ok($strX eq arraynorm('NFKC', @strX));
1035759b3d2Safresh1
1045759b3d2Safresh1my @strY = ("\x{304B}\x{0308}", "\x{0323}\x{3099}");
1055759b3d2Safresh1my $strY = ("\x{304C}\x{0323}\x{0308}");
1065759b3d2Safresh1ok($strY eq NFC(join('', @strY)));
1075759b3d2Safresh1ok($strY eq arraynorm('NFC', @strY));
1085759b3d2Safresh1ok($strY eq NFKC(join('', @strY)));
1095759b3d2Safresh1ok($strY eq arraynorm('NFKC', @strY));
1105759b3d2Safresh1
1115759b3d2Safresh1my @strZ = ("\x{304B}\x{0308}", "\x{0323}", "\x{3099}");
1125759b3d2Safresh1my $strZ = ("\x{304B}\x{3099}\x{0323}\x{0308}");
1135759b3d2Safresh1ok($strZ eq NFD(join('', @strZ)));
1145759b3d2Safresh1ok($strZ eq arraynorm('NFD', @strZ));
1155759b3d2Safresh1ok($strZ eq NFKD(join('', @strZ)));
1165759b3d2Safresh1ok($strZ eq arraynorm('NFKD', @strZ));
1175759b3d2Safresh1
1185759b3d2Safresh1# 31
1195759b3d2Safresh1
1205759b3d2Safresh1# don't modify the source
1215759b3d2Safresh1
1225759b3d2Safresh1my $source = "ABC";
1235759b3d2Safresh1($proc, $unproc) = splitOnLastStarter($source);
1245759b3d2Safresh1ok($proc,   "AB");
1255759b3d2Safresh1ok($unproc, "C");
1265759b3d2Safresh1ok($source, "ABC");
1275759b3d2Safresh1
1285759b3d2Safresh1# 34
1295759b3d2Safresh1
130