xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/t/nonchar.t (revision eac174f2741a08d8deb8aae59a7f778ef9b5d770)
1898184e3Ssthen
2898184e3SsthenBEGIN {
3b8851fccSafresh1    unless (5.008 <= $]) {
4b8851fccSafresh1	print "1..0 # skipped: Perl 5.8.0 or later needed for this test\n";
5898184e3Ssthen	print $@;
6898184e3Ssthen	exit;
7898184e3Ssthen    }
8*eac174f2Safresh1    if ($ENV{PERL_CORE}) {
9*eac174f2Safresh1	chdir('t') if -d 't';
10*eac174f2Safresh1	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
11*eac174f2Safresh1    }
12898184e3Ssthen}
13898184e3Ssthen
14898184e3Ssthenuse strict;
15898184e3Ssthenuse warnings;
1691f110e0Safresh1BEGIN { $| = 1; print "1..90\n"; }
17898184e3Ssthenmy $count = 0;
18898184e3Ssthensub ok ($;$) {
19898184e3Ssthen    my $p = my $r = shift;
20898184e3Ssthen    if (@_) {
21898184e3Ssthen	my $x = shift;
22898184e3Ssthen	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
23898184e3Ssthen    }
24898184e3Ssthen    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
25898184e3Ssthen}
26898184e3Ssthen
27*eac174f2Safresh1use Unicode::Collate;
28*eac174f2Safresh1
29898184e3Ssthenok(1);
30898184e3Ssthen
31*eac174f2Safresh1sub _pack_U   { Unicode::Collate::pack_U(@_) }
32*eac174f2Safresh1sub _unpack_U { Unicode::Collate::unpack_U(@_) }
33*eac174f2Safresh1
34898184e3Ssthen#########################
35898184e3Ssthen
36898184e3Ssthenno warnings 'utf8';
37898184e3Ssthen
38898184e3Ssthen# Unicode 6.0 Sorting
39898184e3Ssthen#
40898184e3Ssthen# Special Database Values. The data files for CLDR provide
41898184e3Ssthen# special weights for two noncharacters:
42898184e3Ssthen#
43898184e3Ssthen# 1. A special noncharacter <HIGH> (U+FFFF) for specification of a range
44898184e3Ssthen#    in a database, allowing "Sch" <= X <= "Sch<HIGH>" to pick all strings
45898184e3Ssthen#    starting with "sch" plus those that sort equivalently.
46898184e3Ssthen# 2. A special noncharacter <LOW> (U+FFFE) for merged database fields,
47898184e3Ssthen#    allowing "Disi\x{301}lva<LOW>John" to sort next to "Disilva<LOW>John".
48898184e3Ssthen
49898184e3Ssthenmy $entry = <<'ENTRIES';
5091f110e0Safresh1FFFE  ; [.0001.0020.0005.FFFE] # <noncharacter-FFFE>
51898184e3SsthenFFFF  ; [.FFFE.0020.0005.FFFF] # <noncharacter-FFFF>
52898184e3SsthenENTRIES
53898184e3Ssthen
54898184e3Ssthenmy @disilva = ("di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva");
55898184e3Ssthenmy @dsf = map "$_\x{FFFE}Fred", @disilva;
56898184e3Ssthenmy @dsj = map "$_\x{FFFE}John", @disilva;
57898184e3Ssthenmy @dsJ = map        "$_ John", @disilva;
58898184e3Ssthen
59898184e3Ssthenfor my $norm (undef, 'NFD') {
60898184e3Ssthen    if (defined $norm) {
61898184e3Ssthen	eval { require Unicode::Normalize };
62898184e3Ssthen	if ($@) {
6391f110e0Safresh1	    ok(1) for 1..34; # silent skip
64898184e3Ssthen	    next;
65898184e3Ssthen	}
66898184e3Ssthen    }
67898184e3Ssthen
68898184e3Ssthen    my $coll = Unicode::Collate->new(
69898184e3Ssthen	table => 'keys.txt',
70898184e3Ssthen	level => 1,
71898184e3Ssthen	normalization => $norm,
72898184e3Ssthen	UCA_Version => 22,
73898184e3Ssthen	entry => $entry,
74898184e3Ssthen    );
75898184e3Ssthen
76898184e3Ssthen    # 1..4
77898184e3Ssthen    ok($coll->lt("\x{FFFD}",   "\x{FFFF}"));
78898184e3Ssthen    ok($coll->lt("\x{1FFFD}",  "\x{1FFFF}"));
79898184e3Ssthen    ok($coll->lt("\x{2FFFD}",  "\x{2FFFF}"));
80898184e3Ssthen    ok($coll->lt("\x{10FFFD}", "\x{10FFFF}"));
81898184e3Ssthen
82898184e3Ssthen    # 5..14
83898184e3Ssthen    ok($coll->lt("perl\x{FFFD}",   "perl\x{FFFF}"));
84898184e3Ssthen    ok($coll->lt("perl\x{1FFFD}",  "perl\x{FFFF}"));
85898184e3Ssthen    ok($coll->lt("perl\x{1FFFE}",  "perl\x{FFFF}"));
86898184e3Ssthen    ok($coll->lt("perl\x{1FFFF}",  "perl\x{FFFF}"));
87898184e3Ssthen    ok($coll->lt("perl\x{2FFFD}",  "perl\x{FFFF}"));
88898184e3Ssthen    ok($coll->lt("perl\x{2FFFE}",  "perl\x{FFFF}"));
89898184e3Ssthen    ok($coll->lt("perl\x{2FFFF}",  "perl\x{FFFF}"));
90898184e3Ssthen    ok($coll->lt("perl\x{10FFFD}", "perl\x{FFFF}"));
91898184e3Ssthen    ok($coll->lt("perl\x{10FFFE}", "perl\x{FFFF}"));
92898184e3Ssthen    ok($coll->lt("perl\x{10FFFF}", "perl\x{FFFF}"));
93898184e3Ssthen
94898184e3Ssthen    # 15..16
95898184e3Ssthen    ok($coll->gt("perl\x{FFFF}AB", "perl\x{FFFF}"));
96898184e3Ssthen    ok($coll->lt("perl\x{FFFF}\x{10FFFF}", "perl\x{FFFF}\x{FFFF}"));
97898184e3Ssthen
98898184e3Ssthen    $coll->change(level => 4);
99898184e3Ssthen
100898184e3Ssthen    # 17..25
101898184e3Ssthen    for my $i (0 .. $#disilva - 1) {
102898184e3Ssthen	ok($coll->lt($dsf[$i], $dsf[$i+1]));
103898184e3Ssthen	ok($coll->lt($dsj[$i], $dsj[$i+1]));
104898184e3Ssthen	ok($coll->lt($dsJ[$i], $dsJ[$i+1]));
105898184e3Ssthen    }
106898184e3Ssthen
107898184e3Ssthen    # 26
108898184e3Ssthen    ok($coll->lt($dsf[-1], $dsj[0]));
109898184e3Ssthen
11091f110e0Safresh1    $coll->change(level => 1);
11191f110e0Safresh1
11291f110e0Safresh1    # 27..34
113898184e3Ssthen    for my $i (0 .. $#disilva) {
11491f110e0Safresh1	ok($coll->lt($dsf[$i], $dsJ[$i]));
115898184e3Ssthen	ok($coll->lt($dsj[$i], $dsJ[$i]));
116898184e3Ssthen    }
117898184e3Ssthen}
118898184e3Ssthen
11991f110e0Safresh1# 69
12091f110e0Safresh1
12191f110e0Safresh1{
12291f110e0Safresh1    my $coll = Unicode::Collate->new(
12391f110e0Safresh1	table => 'keys.txt',
12491f110e0Safresh1	normalization => undef,
12591f110e0Safresh1	highestFFFF => 1,
12691f110e0Safresh1	minimalFFFE => 1,
12791f110e0Safresh1    );
12891f110e0Safresh1
12991f110e0Safresh1    $coll->change(level => 1);
13091f110e0Safresh1    ok($coll->lt("perl\x{FFFD}",   "perl\x{FFFF}"));
13191f110e0Safresh1    ok($coll->lt("perl\x{1FFFD}",  "perl\x{FFFF}"));
13291f110e0Safresh1    ok($coll->lt("perl\x{1FFFE}",  "perl\x{FFFF}"));
13391f110e0Safresh1    ok($coll->lt("perl\x{1FFFF}",  "perl\x{FFFF}"));
13491f110e0Safresh1    ok($coll->lt("perl\x{2FFFD}",  "perl\x{FFFF}"));
13591f110e0Safresh1    ok($coll->lt("perl\x{2FFFE}",  "perl\x{FFFF}"));
13691f110e0Safresh1    ok($coll->lt("perl\x{2FFFF}",  "perl\x{FFFF}"));
13791f110e0Safresh1    ok($coll->lt("perl\x{10FFFD}", "perl\x{FFFF}"));
13891f110e0Safresh1    ok($coll->lt("perl\x{10FFFE}", "perl\x{FFFF}"));
13991f110e0Safresh1    ok($coll->lt("perl\x{10FFFF}", "perl\x{FFFF}"));
14091f110e0Safresh1
14191f110e0Safresh1# 79
14291f110e0Safresh1
14391f110e0Safresh1    $coll->change(level => 3);
14491f110e0Safresh1    my @list = (
14591f110e0Safresh1	"ab\x{FFFE}a",
14691f110e0Safresh1	"Ab\x{FFFE}a",
14791f110e0Safresh1	"ab\x{FFFE}c",
14891f110e0Safresh1	"Ab\x{FFFE}c",
14991f110e0Safresh1	"ab\x{FFFE}xyz",
15091f110e0Safresh1	"abc\x{FFFE}def",
15191f110e0Safresh1	"abc\x{FFFE}xYz",
15291f110e0Safresh1	"aBc\x{FFFE}xyz",
15391f110e0Safresh1	"abcX\x{FFFE}def",
15491f110e0Safresh1	"abcx\x{FFFE}xyz",
15591f110e0Safresh1	"b\x{FFFE}aaa",
15691f110e0Safresh1	"bbb\x{FFFE}a",
15791f110e0Safresh1    );
15891f110e0Safresh1    my $p = shift @list;
15991f110e0Safresh1    for my $c (@list) {
16091f110e0Safresh1	ok($coll->lt($p, $c));
16191f110e0Safresh1	$p = $c;
16291f110e0Safresh1    }
16391f110e0Safresh1}
16491f110e0Safresh1
16591f110e0Safresh1# 90
166