xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/t/nonchar.t (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1
2BEGIN {
3    unless ("A" eq pack('U', 0x41)) {
4	print "1..0 # Unicode::Collate " .
5	    "cannot stringify a Unicode code point\n";
6	exit 0;
7    }
8    if ($ENV{PERL_CORE}) {
9	chdir('t') if -d 't';
10	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
11    }
12}
13
14
15BEGIN {
16    use Unicode::Collate;
17
18    unless (exists &Unicode::Collate::bootstrap or 5.008 <= $]) {
19	print "1..0 # skipped: XSUB, or Perl 5.8.0 or later".
20		" needed for this test\n";
21	print $@;
22	exit;
23    }
24}
25
26use strict;
27use warnings;
28BEGIN { $| = 1; print "1..90\n"; }
29my $count = 0;
30sub ok ($;$) {
31    my $p = my $r = shift;
32    if (@_) {
33	my $x = shift;
34	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
35    }
36    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
37}
38
39ok(1);
40
41#########################
42
43no warnings 'utf8';
44
45# Unicode 6.0 Sorting
46#
47# Special Database Values. The data files for CLDR provide
48# special weights for two noncharacters:
49#
50# 1. A special noncharacter <HIGH> (U+FFFF) for specification of a range
51#    in a database, allowing "Sch" <= X <= "Sch<HIGH>" to pick all strings
52#    starting with "sch" plus those that sort equivalently.
53# 2. A special noncharacter <LOW> (U+FFFE) for merged database fields,
54#    allowing "Disi\x{301}lva<LOW>John" to sort next to "Disilva<LOW>John".
55
56my $entry = <<'ENTRIES';
57FFFE  ; [.0001.0020.0005.FFFE] # <noncharacter-FFFE>
58FFFF  ; [.FFFE.0020.0005.FFFF] # <noncharacter-FFFF>
59ENTRIES
60
61my @disilva = ("di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva");
62my @dsf = map "$_\x{FFFE}Fred", @disilva;
63my @dsj = map "$_\x{FFFE}John", @disilva;
64my @dsJ = map        "$_ John", @disilva;
65
66for my $norm (undef, 'NFD') {
67    if (defined $norm) {
68	eval { require Unicode::Normalize };
69	if ($@) {
70	    ok(1) for 1..34; # silent skip
71	    next;
72	}
73    }
74
75    my $coll = Unicode::Collate->new(
76	table => 'keys.txt',
77	level => 1,
78	normalization => $norm,
79	UCA_Version => 22,
80	entry => $entry,
81    );
82
83    # 1..4
84    ok($coll->lt("\x{FFFD}",   "\x{FFFF}"));
85    ok($coll->lt("\x{1FFFD}",  "\x{1FFFF}"));
86    ok($coll->lt("\x{2FFFD}",  "\x{2FFFF}"));
87    ok($coll->lt("\x{10FFFD}", "\x{10FFFF}"));
88
89    # 5..14
90    ok($coll->lt("perl\x{FFFD}",   "perl\x{FFFF}"));
91    ok($coll->lt("perl\x{1FFFD}",  "perl\x{FFFF}"));
92    ok($coll->lt("perl\x{1FFFE}",  "perl\x{FFFF}"));
93    ok($coll->lt("perl\x{1FFFF}",  "perl\x{FFFF}"));
94    ok($coll->lt("perl\x{2FFFD}",  "perl\x{FFFF}"));
95    ok($coll->lt("perl\x{2FFFE}",  "perl\x{FFFF}"));
96    ok($coll->lt("perl\x{2FFFF}",  "perl\x{FFFF}"));
97    ok($coll->lt("perl\x{10FFFD}", "perl\x{FFFF}"));
98    ok($coll->lt("perl\x{10FFFE}", "perl\x{FFFF}"));
99    ok($coll->lt("perl\x{10FFFF}", "perl\x{FFFF}"));
100
101    # 15..16
102    ok($coll->gt("perl\x{FFFF}AB", "perl\x{FFFF}"));
103    ok($coll->lt("perl\x{FFFF}\x{10FFFF}", "perl\x{FFFF}\x{FFFF}"));
104
105    $coll->change(level => 4);
106
107    # 17..25
108    for my $i (0 .. $#disilva - 1) {
109	ok($coll->lt($dsf[$i], $dsf[$i+1]));
110	ok($coll->lt($dsj[$i], $dsj[$i+1]));
111	ok($coll->lt($dsJ[$i], $dsJ[$i+1]));
112    }
113
114    # 26
115    ok($coll->lt($dsf[-1], $dsj[0]));
116
117    $coll->change(level => 1);
118
119    # 27..34
120    for my $i (0 .. $#disilva) {
121	ok($coll->lt($dsf[$i], $dsJ[$i]));
122	ok($coll->lt($dsj[$i], $dsJ[$i]));
123    }
124}
125
126# 69
127
128{
129    my $coll = Unicode::Collate->new(
130	table => 'keys.txt',
131	normalization => undef,
132	highestFFFF => 1,
133	minimalFFFE => 1,
134    );
135
136    $coll->change(level => 1);
137    ok($coll->lt("perl\x{FFFD}",   "perl\x{FFFF}"));
138    ok($coll->lt("perl\x{1FFFD}",  "perl\x{FFFF}"));
139    ok($coll->lt("perl\x{1FFFE}",  "perl\x{FFFF}"));
140    ok($coll->lt("perl\x{1FFFF}",  "perl\x{FFFF}"));
141    ok($coll->lt("perl\x{2FFFD}",  "perl\x{FFFF}"));
142    ok($coll->lt("perl\x{2FFFE}",  "perl\x{FFFF}"));
143    ok($coll->lt("perl\x{2FFFF}",  "perl\x{FFFF}"));
144    ok($coll->lt("perl\x{10FFFD}", "perl\x{FFFF}"));
145    ok($coll->lt("perl\x{10FFFE}", "perl\x{FFFF}"));
146    ok($coll->lt("perl\x{10FFFF}", "perl\x{FFFF}"));
147
148# 79
149
150    $coll->change(level => 3);
151    my @list = (
152	"ab\x{FFFE}a",
153	"Ab\x{FFFE}a",
154	"ab\x{FFFE}c",
155	"Ab\x{FFFE}c",
156	"ab\x{FFFE}xyz",
157	"abc\x{FFFE}def",
158	"abc\x{FFFE}xYz",
159	"aBc\x{FFFE}xyz",
160	"abcX\x{FFFE}def",
161	"abcx\x{FFFE}xyz",
162	"b\x{FFFE}aaa",
163	"bbb\x{FFFE}a",
164    );
165    my $p = shift @list;
166    for my $c (@list) {
167	ok($coll->lt($p, $c));
168	$p = $c;
169    }
170}
171
172# 90
173