xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/t/notable.t (revision 256a93a44f36679bee503f12e49566c2183f6181)
191f110e0Safresh1
291f110e0Safresh1BEGIN {
391f110e0Safresh1    if ($ENV{PERL_CORE}) {
491f110e0Safresh1	chdir('t') if -d 't';
591f110e0Safresh1	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
691f110e0Safresh1    }
791f110e0Safresh1}
891f110e0Safresh1
991f110e0Safresh1use strict;
1091f110e0Safresh1use warnings;
1191f110e0Safresh1BEGIN { $| = 1; print "1..32\n"; }
1291f110e0Safresh1my $count = 0;
1391f110e0Safresh1sub ok ($;$) {
1491f110e0Safresh1    my $p = my $r = shift;
1591f110e0Safresh1    if (@_) {
1691f110e0Safresh1	my $x = shift;
1791f110e0Safresh1	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
1891f110e0Safresh1    }
1991f110e0Safresh1    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
2091f110e0Safresh1}
2191f110e0Safresh1
2291f110e0Safresh1use Unicode::Collate;
2391f110e0Safresh1
2491f110e0Safresh1ok(1);
2591f110e0Safresh1
26*256a93a4Safresh1sub _pack_U   { Unicode::Collate::pack_U(@_) }
27*256a93a4Safresh1sub _unpack_U { Unicode::Collate::unpack_U(@_) }
28*256a93a4Safresh1
2991f110e0Safresh1#########################
3091f110e0Safresh1
3191f110e0Safresh1{
3291f110e0Safresh1    # Table is undefined, then no entry is defined.
3391f110e0Safresh1    my $undef_table = Unicode::Collate->new(
3491f110e0Safresh1	table => undef,
3591f110e0Safresh1	normalization => undef,
3691f110e0Safresh1	level => 1,
3791f110e0Safresh1    );
3891f110e0Safresh1
3991f110e0Safresh1    # in the Unicode code point order
4091f110e0Safresh1    ok($undef_table->lt('', 'A'));
4191f110e0Safresh1    ok($undef_table->lt('ABC', 'B'));
4291f110e0Safresh1
4391f110e0Safresh1    # Hangul should be decomposed (even w/o Unicode::Normalize).
4491f110e0Safresh1    ok($undef_table->lt("Perl", "\x{AC00}"));
4591f110e0Safresh1    ok($undef_table->eq("\x{AC00}", "\x{1100}\x{1161}"));
4691f110e0Safresh1    ok($undef_table->eq("\x{AE00}", "\x{1100}\x{1173}\x{11AF}"));
4791f110e0Safresh1    ok($undef_table->lt("\x{AE00}", "\x{3042}"));
4891f110e0Safresh1
4991f110e0Safresh1    # U+AC00: Hangul GA
5091f110e0Safresh1    # U+AE00: Hangul GEUL
5191f110e0Safresh1    # U+3042: Hiragana A
5291f110e0Safresh1
5391f110e0Safresh1    # Weight for CJK Ideographs is defined, though.
5491f110e0Safresh1    ok($undef_table->lt("", "\x{4E00}"));
5591f110e0Safresh1    ok($undef_table->lt("\x{4E8C}","ABC"));
5691f110e0Safresh1    ok($undef_table->lt("\x{4E00}","\x{3042}"));
5791f110e0Safresh1    ok($undef_table->lt("\x{4E00}","\x{4E8C}"));
5891f110e0Safresh1
5991f110e0Safresh1# 11
6091f110e0Safresh1
6191f110e0Safresh1    # U+4E00: Ideograph "ONE"
6291f110e0Safresh1    # U+4E8C: Ideograph "TWO"
6391f110e0Safresh1
6491f110e0Safresh1    for my $v ('', 8, 9, 11, 14) {
6591f110e0Safresh1	$undef_table->change(UCA_Version => $v) if $v;
6691f110e0Safresh1	ok($undef_table->lt("\x{4E00}","\0"));
6791f110e0Safresh1    }
6891f110e0Safresh1}
6991f110e0Safresh1
7091f110e0Safresh1# 16
7191f110e0Safresh1
7291f110e0Safresh1{
7391f110e0Safresh1    my $onlyABC = Unicode::Collate->new(
7491f110e0Safresh1	table => undef,
7591f110e0Safresh1	normalization => undef,
7691f110e0Safresh1	entry => << 'ENTRIES',
7791f110e0Safresh10061 ; [.0101.0020.0002.0061] # LATIN SMALL LETTER A
7891f110e0Safresh10041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A
7991f110e0Safresh10062 ; [.0102.0020.0002.0062] # LATIN SMALL LETTER B
8091f110e0Safresh10042 ; [.0102.0020.0008.0042] # LATIN CAPITAL LETTER B
8191f110e0Safresh10063 ; [.0103.0020.0002.0063] # LATIN SMALL LETTER C
8291f110e0Safresh10043 ; [.0103.0020.0008.0043] # LATIN CAPITAL LETTER C
8391f110e0Safresh1ENTRIES
8491f110e0Safresh1    );
8591f110e0Safresh1    ok(
8691f110e0Safresh1	join(':', $onlyABC->sort( qw/ ABA BAC cc A Ab cAc aB / ) ),
8791f110e0Safresh1	join(':',                 qw/ A aB Ab ABA BAC cAc cc / ),
8891f110e0Safresh1    );
8991f110e0Safresh1}
9091f110e0Safresh1
9191f110e0Safresh1# 17
9291f110e0Safresh1
9391f110e0Safresh1{
9491f110e0Safresh1    my $few_entries = Unicode::Collate->new(
9591f110e0Safresh1	entry => <<'ENTRIES',
9691f110e0Safresh10050 ; [.0101.0020.0002.0050]  # P
9791f110e0Safresh10045 ; [.0102.0020.0002.0045]  # E
9891f110e0Safresh10052 ; [.0103.0020.0002.0052]  # R
9991f110e0Safresh1004C ; [.0104.0020.0002.004C]  # L
10091f110e0Safresh11100 ; [.0105.0020.0002.1100]  # Hangul Jamo initial G
10191f110e0Safresh11175 ; [.0106.0020.0002.1175]  # Hangul Jamo middle I
10291f110e0Safresh15B57 ; [.0107.0020.0002.5B57]  # CJK Ideograph "Letter"
10391f110e0Safresh1ENTRIES
10491f110e0Safresh1	table => undef,
10591f110e0Safresh1	normalization => undef,
10691f110e0Safresh1    );
10791f110e0Safresh1    # defined before undefined
10891f110e0Safresh1    my $sortABC = join '',
10991f110e0Safresh1	$few_entries->sort(split //, "ABCDEFGHIJKLMNOPQRSTUVWXYZ ");
11091f110e0Safresh1
11191f110e0Safresh1    ok($sortABC eq "PERL ABCDFGHIJKMNOQSTUVWXYZ");
11291f110e0Safresh1
11391f110e0Safresh1    ok($few_entries->lt('E', 'D'));
11491f110e0Safresh1    ok($few_entries->lt("\x{5B57}", "\x{4E00}"));
11591f110e0Safresh1    ok($few_entries->lt("\x{AE30}", "\x{AC00}"));
11691f110e0Safresh1
11791f110e0Safresh1    # Hangul must be decomposed.
11891f110e0Safresh1    ok($few_entries->eq("\x{AC00}", "\x{1100}\x{1161}"));
11991f110e0Safresh1}
12091f110e0Safresh1
12191f110e0Safresh1# 22
12291f110e0Safresh1
12391f110e0Safresh1{
12491f110e0Safresh1    my $highestNUL = Unicode::Collate->new(
12591f110e0Safresh1	table => undef,
12691f110e0Safresh1	normalization => undef,
12791f110e0Safresh1	level => 1,
12891f110e0Safresh1	entry => '0000 ; [.FFFE.0020.0005.0000]',
12991f110e0Safresh1    );
13091f110e0Safresh1
13191f110e0Safresh1    for my $v ('', 8, 9, 11, 14) {
13291f110e0Safresh1	$highestNUL->change(UCA_Version => $v) if $v;
13391f110e0Safresh1	ok($highestNUL->lt("abc\x{4E00}", "abc\0"));
13491f110e0Safresh1	ok($highestNUL->lt("abc\x{E0000}","abc\0"));
13591f110e0Safresh1    }
13691f110e0Safresh1}
13791f110e0Safresh1
13891f110e0Safresh1# 32
139