xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/t/nonchar.t (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1
2BEGIN {
3    unless ('A' eq pack('U', 0x41)) {
4	print "1..0 # Unicode::Collate cannot pack a Unicode code point\n";
5	exit 0;
6    }
7    unless (0x41 == unpack('U', 'A')) {
8	print "1..0 # Unicode::Collate cannot get a Unicode code point\n";
9	exit 0;
10    }
11    if ($ENV{PERL_CORE}) {
12	chdir('t') if -d 't';
13	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
14    }
15}
16
17
18BEGIN {
19    use Unicode::Collate;
20
21    unless (exists &Unicode::Collate::bootstrap or 5.008 <= $]) {
22	print "1..0 # skipped: XSUB, or Perl 5.8.0 or later".
23		" needed for this test\n";
24	print $@;
25	exit;
26    }
27}
28
29use strict;
30use warnings;
31BEGIN { $| = 1; print "1..90\n"; }
32my $count = 0;
33sub ok ($;$) {
34    my $p = my $r = shift;
35    if (@_) {
36	my $x = shift;
37	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
38    }
39    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
40}
41
42ok(1);
43
44#########################
45
46no warnings 'utf8';
47
48# Unicode 6.0 Sorting
49#
50# Special Database Values. The data files for CLDR provide
51# special weights for two noncharacters:
52#
53# 1. A special noncharacter <HIGH> (U+FFFF) for specification of a range
54#    in a database, allowing "Sch" <= X <= "Sch<HIGH>" to pick all strings
55#    starting with "sch" plus those that sort equivalently.
56# 2. A special noncharacter <LOW> (U+FFFE) for merged database fields,
57#    allowing "Disi\x{301}lva<LOW>John" to sort next to "Disilva<LOW>John".
58
59my $entry = <<'ENTRIES';
60FFFE  ; [.0001.0020.0005.FFFE] # <noncharacter-FFFE>
61FFFF  ; [.FFFE.0020.0005.FFFF] # <noncharacter-FFFF>
62ENTRIES
63
64my @disilva = ("di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva");
65my @dsf = map "$_\x{FFFE}Fred", @disilva;
66my @dsj = map "$_\x{FFFE}John", @disilva;
67my @dsJ = map        "$_ John", @disilva;
68
69for my $norm (undef, 'NFD') {
70    if (defined $norm) {
71	eval { require Unicode::Normalize };
72	if ($@) {
73	    ok(1) for 1..34; # silent skip
74	    next;
75	}
76    }
77
78    my $coll = Unicode::Collate->new(
79	table => 'keys.txt',
80	level => 1,
81	normalization => $norm,
82	UCA_Version => 22,
83	entry => $entry,
84    );
85
86    # 1..4
87    ok($coll->lt("\x{FFFD}",   "\x{FFFF}"));
88    ok($coll->lt("\x{1FFFD}",  "\x{1FFFF}"));
89    ok($coll->lt("\x{2FFFD}",  "\x{2FFFF}"));
90    ok($coll->lt("\x{10FFFD}", "\x{10FFFF}"));
91
92    # 5..14
93    ok($coll->lt("perl\x{FFFD}",   "perl\x{FFFF}"));
94    ok($coll->lt("perl\x{1FFFD}",  "perl\x{FFFF}"));
95    ok($coll->lt("perl\x{1FFFE}",  "perl\x{FFFF}"));
96    ok($coll->lt("perl\x{1FFFF}",  "perl\x{FFFF}"));
97    ok($coll->lt("perl\x{2FFFD}",  "perl\x{FFFF}"));
98    ok($coll->lt("perl\x{2FFFE}",  "perl\x{FFFF}"));
99    ok($coll->lt("perl\x{2FFFF}",  "perl\x{FFFF}"));
100    ok($coll->lt("perl\x{10FFFD}", "perl\x{FFFF}"));
101    ok($coll->lt("perl\x{10FFFE}", "perl\x{FFFF}"));
102    ok($coll->lt("perl\x{10FFFF}", "perl\x{FFFF}"));
103
104    # 15..16
105    ok($coll->gt("perl\x{FFFF}AB", "perl\x{FFFF}"));
106    ok($coll->lt("perl\x{FFFF}\x{10FFFF}", "perl\x{FFFF}\x{FFFF}"));
107
108    $coll->change(level => 4);
109
110    # 17..25
111    for my $i (0 .. $#disilva - 1) {
112	ok($coll->lt($dsf[$i], $dsf[$i+1]));
113	ok($coll->lt($dsj[$i], $dsj[$i+1]));
114	ok($coll->lt($dsJ[$i], $dsJ[$i+1]));
115    }
116
117    # 26
118    ok($coll->lt($dsf[-1], $dsj[0]));
119
120    $coll->change(level => 1);
121
122    # 27..34
123    for my $i (0 .. $#disilva) {
124	ok($coll->lt($dsf[$i], $dsJ[$i]));
125	ok($coll->lt($dsj[$i], $dsJ[$i]));
126    }
127}
128
129# 69
130
131{
132    my $coll = Unicode::Collate->new(
133	table => 'keys.txt',
134	normalization => undef,
135	highestFFFF => 1,
136	minimalFFFE => 1,
137    );
138
139    $coll->change(level => 1);
140    ok($coll->lt("perl\x{FFFD}",   "perl\x{FFFF}"));
141    ok($coll->lt("perl\x{1FFFD}",  "perl\x{FFFF}"));
142    ok($coll->lt("perl\x{1FFFE}",  "perl\x{FFFF}"));
143    ok($coll->lt("perl\x{1FFFF}",  "perl\x{FFFF}"));
144    ok($coll->lt("perl\x{2FFFD}",  "perl\x{FFFF}"));
145    ok($coll->lt("perl\x{2FFFE}",  "perl\x{FFFF}"));
146    ok($coll->lt("perl\x{2FFFF}",  "perl\x{FFFF}"));
147    ok($coll->lt("perl\x{10FFFD}", "perl\x{FFFF}"));
148    ok($coll->lt("perl\x{10FFFE}", "perl\x{FFFF}"));
149    ok($coll->lt("perl\x{10FFFF}", "perl\x{FFFF}"));
150
151# 79
152
153    $coll->change(level => 3);
154    my @list = (
155	"ab\x{FFFE}a",
156	"Ab\x{FFFE}a",
157	"ab\x{FFFE}c",
158	"Ab\x{FFFE}c",
159	"ab\x{FFFE}xyz",
160	"abc\x{FFFE}def",
161	"abc\x{FFFE}xYz",
162	"aBc\x{FFFE}xyz",
163	"abcX\x{FFFE}def",
164	"abcx\x{FFFE}xyz",
165	"b\x{FFFE}aaa",
166	"bbb\x{FFFE}a",
167    );
168    my $p = shift @list;
169    for my $c (@list) {
170	ok($coll->lt($p, $c));
171	$p = $c;
172    }
173}
174
175# 90
176